|
|||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objecttextmaven.application.analyzer.extractor.WordExtractor
ITextHandler
implementation extracting individual words
which can be finally written to the output stream.
Field Summary |
Fields inherited from interface textmaven.Globals |
C_ATTR_CLASSNAME, C_DICT_BASE_TYPE, C_DICT_TYPE, C_SERVER_BASE_TYPE, C_STEMMER_BASE_TYPE, C_WRITER_BASE_TYPE, CONFIG_FILE, DEFAULT_SENTENCES, DEFAULT_SEPARATOR, NEW_PARA, T_COL_DICTIONARY, T_COL_HOMONYMID, T_COL_ID, T_COL_LEXKEY, T_COL_TRANSLATION, T_COL_WORDCLASS, VERBOSE |
Constructor Summary | |
WordExtractor()
|
Method Summary | |
void |
addExclusionList(IExclusionList list)
|
void |
endParagraph()
Called when a paragraph ends |
void |
endSentence(java.lang.String sentence,
char punctuation)
Called when the sentence ended. |
void |
endText()
Called when text reading finished. |
java.util.Comparator |
getComparator()
|
IDictionary |
getDictionary()
|
long |
getExcludedWords()
|
IExclusionList |
getExclusionList()
|
IWordDistributionMap |
getIntersectionMap()
|
java.lang.String |
getLanguage()
|
java.util.Vector |
getOccurences()
|
long |
getPrintedWords()
|
java.lang.StringBuffer |
getSentence()
|
java.lang.String |
getSeparator()
|
long |
getThreshold()
|
long |
getTotalWords()
|
IWordDistributionMap |
getWords()
|
void |
heading(int depth,
java.lang.String title)
Called when a heading was read. |
boolean |
isAllOccurrences()
|
boolean |
isKeepContext()
|
boolean |
isPrintDistribution()
|
boolean |
isPrintOccurence()
|
void |
setAllOccurrences(boolean allOccurrences)
|
void |
setComparator(java.util.Comparator comp)
|
void |
setDictionary(IDictionary dictionary)
|
void |
setExcludedWords(long excludedWords)
|
void |
setInitialWordDistribution(IWordDistributionMap map)
|
void |
setIntersectionMap(IWordDistributionMap intersectionMap)
|
void |
setKeepContext(boolean keepContext)
|
void |
setNrOfSentences(int nr)
|
void |
setOccurences(java.util.Vector occurences)
|
void |
setPrintDistribution(boolean inclDistr)
|
void |
setPrintOccurence(boolean inclOcc)
|
void |
setSentence(java.lang.StringBuffer sentence)
|
void |
setSeparator(java.lang.String separator)
|
void |
setThreshold(long threshold)
|
void |
setTotalWords(long totalWords)
|
void |
setWords(IWordDistributionMap words)
|
void |
startParagraph()
Called when a new paragraph starts |
void |
startSentence()
Called when a new sentence starts. |
void |
startText()
Called when text reading starts. |
void |
word(java.lang.String word)
Called when a word was read |
void |
write(java.io.OutputStream out)
Writes all words extracted to the output stream. |
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Constructor Detail |
public WordExtractor()
Method Detail |
public void startText()
ITextHandler
startText
in interface ITextHandler
public void endText()
ITextHandler
endText
in interface ITextHandler
public void startParagraph()
ITextHandler
startParagraph
in interface ITextHandler
public void endParagraph()
ITextHandler
endParagraph
in interface ITextHandler
public void heading(int depth, java.lang.String title)
ITextHandler
heading
in interface ITextHandler
depth
- heading depthtitle
- heading titlepublic void startSentence()
ITextHandler
startSentence
in interface ITextHandler
public void endSentence(java.lang.String sentence, char punctuation)
ITextHandler
endSentence
in interface ITextHandler
sentence
- Sentence read, incl. punctuation char.punctuation
- Punctuation character terminating the sentencepublic void word(java.lang.String word)
ITextHandler
word
in interface ITextHandler
word
- word readpublic void write(java.io.OutputStream out)
out
- public java.lang.String getLanguage()
public IDictionary getDictionary()
public void setDictionary(IDictionary dictionary)
dictionary
- The dictionary to set.public void addExclusionList(IExclusionList list)
public boolean isKeepContext()
public void setKeepContext(boolean keepContext)
keepContext
- The keepContext to set.public void setNrOfSentences(int nr)
nr
- public boolean isAllOccurrences()
public void setAllOccurrences(boolean allOccurrences)
allOccurrences
- The allOccurrences to set.public long getThreshold()
public void setThreshold(long threshold)
threshold
- The threshold to set.public void setInitialWordDistribution(IWordDistributionMap map)
public void setPrintDistribution(boolean inclDistr)
inclDistr
- public void setPrintOccurence(boolean inclOcc)
inclOcc
- public void setComparator(java.util.Comparator comp)
comp
- public void setSeparator(java.lang.String separator)
separator
- public long getExcludedWords()
public void setExcludedWords(long excludedWords)
excludedWords
- The excludedWords to set.public IExclusionList getExclusionList()
public IWordDistributionMap getIntersectionMap()
public void setIntersectionMap(IWordDistributionMap intersectionMap)
intersectionMap
- The intersectionMap to set.public java.util.Vector getOccurences()
public void setOccurences(java.util.Vector occurences)
occurences
- The occurences to set.public long getPrintedWords()
public java.lang.StringBuffer getSentence()
public void setSentence(java.lang.StringBuffer sentence)
sentence
- The sentence to set.public long getTotalWords()
public void setTotalWords(long totalWords)
totalWords
- The totalWords to set.public IWordDistributionMap getWords()
public void setWords(IWordDistributionMap words)
words
- The words to set.public java.util.Comparator getComparator()
public boolean isPrintDistribution()
public boolean isPrintOccurence()
public java.lang.String getSeparator()
|
|||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |