gate.treetagger2
Class TreeTaggerBase

java.lang.Object
  extended by gate.util.AbstractFeatureBearer
      extended by gate.creole.AbstractResource
          extended by gate.creole.AbstractProcessingResource
              extended by gate.creole.AbstractLanguageAnalyser
                  extended by gate.treetagger2.TreeTaggerBase
All Implemented Interfaces:
gate.creole.ANNIEConstants, gate.Executable, gate.LanguageAnalyser, gate.ProcessingResource, gate.Resource, gate.util.FeatureBearer, gate.util.NameBearer, java.io.Serializable
Direct Known Subclasses:
TreeTaggerChunk, TreeTaggerPOS

public abstract class TreeTaggerBase
extends gate.creole.AbstractLanguageAnalyser
implements gate.ProcessingResource

This class is a wrapper for the language-independent POS tagger from the University of Stuttgart, Germany. It passes GATE a document and a Tree Tagger shell script. Results are stored in the document's TreeTaggerToken annotations

See Also:
Serialized Form

Nested Class Summary
 
Nested classes/interfaces inherited from class gate.creole.AbstractProcessingResource
gate.creole.AbstractProcessingResource.InternalStatusListener, gate.creole.AbstractProcessingResource.IntervalProgressListener
 
Field Summary
protected  java.lang.String addToScriptParms
           
protected  java.lang.String annotationSetName
           
protected  boolean debugMode
           
protected  java.lang.String encoding
           
protected  boolean failOnUnmappableChar
           
protected  java.lang.String tokenAnnotationType
           
protected  java.lang.String treeTaggerInvocationScriptParms
           
 
Fields inherited from class gate.creole.AbstractLanguageAnalyser
corpus
 
Fields inherited from class gate.creole.AbstractProcessingResource
interrupted
 
Fields inherited from class gate.creole.AbstractResource
name
 
Fields inherited from class gate.util.AbstractFeatureBearer
features
 
Fields inherited from interface gate.creole.ANNIEConstants
ANNOTATION_COREF_FEATURE_NAME, DATE_ANNOTATION_TYPE, DATE_POSTED_ANNOTATION_TYPE, DOCUMENT_COREF_FEATURE_NAME, JOB_ID_ANNOTATION_TYPE, LOCATION_ANNOTATION_TYPE, LOOKUP_ANNOTATION_TYPE, LOOKUP_CLASS_FEATURE_NAME, LOOKUP_MAJOR_TYPE_FEATURE_NAME, LOOKUP_MINOR_TYPE_FEATURE_NAME, LOOKUP_ONTOLOGY_FEATURE_NAME, MONEY_ANNOTATION_TYPE, ORGANIZATION_ANNOTATION_TYPE, PERSON_ANNOTATION_TYPE, PERSON_GENDER_FEATURE_NAME, PR_NAMES, SENTENCE_ANNOTATION_TYPE, SPACE_TOKEN_ANNOTATION_TYPE, TOKEN_ANNOTATION_TYPE, TOKEN_CATEGORY_FEATURE_NAME, TOKEN_KIND_FEATURE_NAME, TOKEN_LENGTH_FEATURE_NAME, TOKEN_ORTH_FEATURE_NAME, TOKEN_STRING_FEATURE_NAME
 
Constructor Summary
TreeTaggerBase()
           
 
Method Summary
 void execute()
          Run the TreeTagger on the current document.
 java.lang.String getAnnotationSetName()
          Return the annotation set name used for the TreeTaggerTokens.
 java.lang.Boolean getDebugMode()
           
 gate.Document getDocument()
          Return the document being processed.
 java.lang.String getEncoding()
          Get the character encoding used for the temporary files.
 java.lang.Boolean getFailOnUnmappableChar()
          Get the flag for whether we should fail if an unmappable character is found.
protected abstract  void getFeatures4Tokens(java.util.ArrayList lines, java.util.ArrayList tokens)
           
 java.lang.String getTokenAnnotationType()
           
 java.lang.String getTreeTaggerInvocationScriptParms()
          Return the location of the TreeTagger script.
 gate.Resource init()
          Initialize this resource.
 void setAnnotationSetName(java.lang.String annotationSetName)
          Set the name of the annotation set to place the generated TreeTaggerToken annotations in.
 void setDebugMode(java.lang.Boolean value)
           
 void setDocument(gate.Document document)
          Set the document to process.
 void setEncoding(java.lang.String newEncoding)
          Set the character encoding to use for the temporary files.
 void setFailOnUnmappableChar(java.lang.Boolean newValue)
          Set the flag for whether we should fail if an unmappable character is found.
 void setTokenAnnotationType(java.lang.String type)
           
 void setTreeTaggerInvocationScriptParms(java.lang.String treeTaggerInvocationScriptParms)
          Set the location of the TreeTagger script.
 
Methods inherited from class gate.creole.AbstractLanguageAnalyser
getCorpus, setCorpus
 
Methods inherited from class gate.creole.AbstractProcessingResource
addProgressListener, addStatusListener, cleanup, fireProcessFinished, fireProgressChanged, fireStatusChanged, interrupt, isInterrupted, reInit, removeProgressListener, removeStatusListener
 
Methods inherited from class gate.creole.AbstractResource
checkParameterValues, getBeanInfo, getName, getParameterValue, getParameterValue, removeResourceListeners, setName, setParameterValue, setParameterValue, setParameterValues, setParameterValues, setResourceListeners
 
Methods inherited from class gate.util.AbstractFeatureBearer
getFeatures, setFeatures
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 
Methods inherited from interface gate.ProcessingResource
reInit
 
Methods inherited from interface gate.Resource
cleanup, getParameterValue, setParameterValue, setParameterValues
 
Methods inherited from interface gate.util.FeatureBearer
getFeatures, setFeatures
 
Methods inherited from interface gate.util.NameBearer
getName, setName
 
Methods inherited from interface gate.Executable
interrupt, isInterrupted
 

Field Detail

annotationSetName

protected java.lang.String annotationSetName

treeTaggerInvocationScriptParms

protected java.lang.String treeTaggerInvocationScriptParms

addToScriptParms

protected java.lang.String addToScriptParms

encoding

protected java.lang.String encoding

failOnUnmappableChar

protected boolean failOnUnmappableChar

tokenAnnotationType

protected java.lang.String tokenAnnotationType

debugMode

protected boolean debugMode
Constructor Detail

TreeTaggerBase

public TreeTaggerBase()
Method Detail

init

public gate.Resource init()
                   throws gate.creole.ResourceInstantiationException
Initialize this resource. Find the location of the script for invoking the TreeTagger. Check if there is a temporary directory.

Specified by:
init in interface gate.Resource
Overrides:
init in class gate.creole.AbstractProcessingResource
Throws:
gate.creole.ResourceInstantiationException

execute

public void execute()
             throws gate.creole.ExecutionException
Run the TreeTagger on the current document. This writes the document text to a temporary file, runs the tagger and processes its output to produce TreeTaggerToken annotations on the document.

Specified by:
execute in interface gate.Executable
Overrides:
execute in class gate.creole.AbstractProcessingResource
Throws:
gate.creole.ExecutionException

getFeatures4Tokens

protected abstract void getFeatures4Tokens(java.util.ArrayList lines,
                                           java.util.ArrayList tokens)

setDocument

public void setDocument(gate.Document document)
Set the document to process.

Specified by:
setDocument in interface gate.LanguageAnalyser
Overrides:
setDocument in class gate.creole.AbstractLanguageAnalyser

getDocument

public gate.Document getDocument()
Return the document being processed.

Specified by:
getDocument in interface gate.LanguageAnalyser
Overrides:
getDocument in class gate.creole.AbstractLanguageAnalyser

setAnnotationSetName

public void setAnnotationSetName(java.lang.String annotationSetName)
Set the name of the annotation set to place the generated TreeTaggerToken annotations in.


getAnnotationSetName

public java.lang.String getAnnotationSetName()
Return the annotation set name used for the TreeTaggerTokens.


setTreeTaggerInvocationScriptParms

public void setTreeTaggerInvocationScriptParms(java.lang.String treeTaggerInvocationScriptParms)
Set the location of the TreeTagger script.


getTreeTaggerInvocationScriptParms

public java.lang.String getTreeTaggerInvocationScriptParms()
Return the location of the TreeTagger script.


setEncoding

public void setEncoding(java.lang.String newEncoding)
Set the character encoding to use for the temporary files. This must be the encoding that your tree tagger understands.

Throws:
java.nio.charset.IllegalCharsetNameException - if the specified string is not a valid encoding name.

getEncoding

public java.lang.String getEncoding()
Get the character encoding used for the temporary files.


setFailOnUnmappableChar

public void setFailOnUnmappableChar(java.lang.Boolean newValue)
Set the flag for whether we should fail if an unmappable character is found.


getFailOnUnmappableChar

public java.lang.Boolean getFailOnUnmappableChar()
Get the flag for whether we should fail if an unmappable character is found.


setDebugMode

public void setDebugMode(java.lang.Boolean value)

getDebugMode

public java.lang.Boolean getDebugMode()

getTokenAnnotationType

public java.lang.String getTokenAnnotationType()

setTokenAnnotationType

public void setTokenAnnotationType(java.lang.String type)