textmaven.application.analyzer.extractor
Class WordExtractor

java.lang.Object
  extended bytextmaven.application.analyzer.extractor.WordExtractor
All Implemented Interfaces:
Globals, ITextHandler

public class WordExtractor
extends java.lang.Object
implements ITextHandler, Globals

ITextHandler implementation extracting individual words which can be finally written to the output stream.

Author:
krebtho

Field Summary
 
Fields inherited from interface textmaven.Globals
C_ATTR_CLASSNAME, C_DICT_BASE_TYPE, C_DICT_TYPE, C_SERVER_BASE_TYPE, C_STEMMER_BASE_TYPE, C_WRITER_BASE_TYPE, CONFIG_FILE, DEFAULT_SENTENCES, DEFAULT_SEPARATOR, NEW_PARA, T_COL_DICTIONARY, T_COL_HOMONYMID, T_COL_ID, T_COL_LEXKEY, T_COL_TRANSLATION, T_COL_WORDCLASS, VERBOSE
 
Constructor Summary
WordExtractor()
           
 
Method Summary
 void addExclusionList(IExclusionList list)
           
 void endParagraph()
          Called when a paragraph ends
 void endSentence(java.lang.String sentence, char punctuation)
          Called when the sentence ended.
 void endText()
          Called when text reading finished.
 java.util.Comparator getComparator()
           
 IDictionary getDictionary()
           
 long getExcludedWords()
           
 IExclusionList getExclusionList()
           
 IWordDistributionMap getIntersectionMap()
           
 java.lang.String getLanguage()
           
 java.util.Vector getOccurences()
           
 long getPrintedWords()
           
 java.lang.StringBuffer getSentence()
           
 java.lang.String getSeparator()
           
 long getThreshold()
           
 long getTotalWords()
           
 IWordDistributionMap getWords()
           
 void heading(int depth, java.lang.String title)
          Called when a heading was read.
 boolean isAllOccurrences()
           
 boolean isKeepContext()
           
 boolean isPrintDistribution()
           
 boolean isPrintOccurence()
           
 void setAllOccurrences(boolean allOccurrences)
           
 void setComparator(java.util.Comparator comp)
           
 void setDictionary(IDictionary dictionary)
           
 void setExcludedWords(long excludedWords)
           
 void setInitialWordDistribution(IWordDistributionMap map)
           
 void setIntersectionMap(IWordDistributionMap intersectionMap)
           
 void setKeepContext(boolean keepContext)
           
 void setNrOfSentences(int nr)
           
 void setOccurences(java.util.Vector occurences)
           
 void setPrintDistribution(boolean inclDistr)
           
 void setPrintOccurence(boolean inclOcc)
           
 void setSentence(java.lang.StringBuffer sentence)
           
 void setSeparator(java.lang.String separator)
           
 void setThreshold(long threshold)
           
 void setTotalWords(long totalWords)
           
 void setWords(IWordDistributionMap words)
           
 void startParagraph()
          Called when a new paragraph starts
 void startSentence()
          Called when a new sentence starts.
 void startText()
          Called when text reading starts.
 void word(java.lang.String word)
          Called when a word was read
 void write(java.io.OutputStream out)
          Writes all words extracted to the output stream.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

WordExtractor

public WordExtractor()
Method Detail

startText

public void startText()
Description copied from interface: ITextHandler
Called when text reading starts.

Specified by:
startText in interface ITextHandler

endText

public void endText()
Description copied from interface: ITextHandler
Called when text reading finished.

Specified by:
endText in interface ITextHandler

startParagraph

public void startParagraph()
Description copied from interface: ITextHandler
Called when a new paragraph starts

Specified by:
startParagraph in interface ITextHandler

endParagraph

public void endParagraph()
Description copied from interface: ITextHandler
Called when a paragraph ends

Specified by:
endParagraph in interface ITextHandler

heading

public void heading(int depth,
                    java.lang.String title)
Description copied from interface: ITextHandler
Called when a heading was read.

Specified by:
heading in interface ITextHandler
Parameters:
depth - heading depth
title - heading title

startSentence

public void startSentence()
Description copied from interface: ITextHandler
Called when a new sentence starts.

Specified by:
startSentence in interface ITextHandler

endSentence

public void endSentence(java.lang.String sentence,
                        char punctuation)
Description copied from interface: ITextHandler
Called when the sentence ended.

Specified by:
endSentence in interface ITextHandler
Parameters:
sentence - Sentence read, incl. punctuation char.
punctuation - Punctuation character terminating the sentence

word

public void word(java.lang.String word)
Description copied from interface: ITextHandler
Called when a word was read

Specified by:
word in interface ITextHandler
Parameters:
word - word read

write

public void write(java.io.OutputStream out)
Writes all words extracted to the output stream. Each word is written on a separate line.

Parameters:
out -

getLanguage

public java.lang.String getLanguage()
Returns:
Returns the language.

getDictionary

public IDictionary getDictionary()
Returns:
Returns the dictionary.

setDictionary

public void setDictionary(IDictionary dictionary)
Parameters:
dictionary - The dictionary to set.

addExclusionList

public void addExclusionList(IExclusionList list)

isKeepContext

public boolean isKeepContext()
Returns:
Returns the keepContext.

setKeepContext

public void setKeepContext(boolean keepContext)
Parameters:
keepContext - The keepContext to set.

setNrOfSentences

public void setNrOfSentences(int nr)
Parameters:
nr -

isAllOccurrences

public boolean isAllOccurrences()
Returns:
Returns the allOccurrences.

setAllOccurrences

public void setAllOccurrences(boolean allOccurrences)
Parameters:
allOccurrences - The allOccurrences to set.

getThreshold

public long getThreshold()
Returns:
Returns the threshold.

setThreshold

public void setThreshold(long threshold)
Parameters:
threshold - The threshold to set.

setInitialWordDistribution

public void setInitialWordDistribution(IWordDistributionMap map)

setPrintDistribution

public void setPrintDistribution(boolean inclDistr)
Parameters:
inclDistr -

setPrintOccurence

public void setPrintOccurence(boolean inclOcc)
Parameters:
inclOcc -

setComparator

public void setComparator(java.util.Comparator comp)
Parameters:
comp -

setSeparator

public void setSeparator(java.lang.String separator)
Parameters:
separator -

getExcludedWords

public long getExcludedWords()
Returns:
Returns the excludedWords.

setExcludedWords

public void setExcludedWords(long excludedWords)
Parameters:
excludedWords - The excludedWords to set.

getExclusionList

public IExclusionList getExclusionList()
Returns:
Returns the exclusionList.

getIntersectionMap

public IWordDistributionMap getIntersectionMap()
Returns:
Returns the intersectionMap.

setIntersectionMap

public void setIntersectionMap(IWordDistributionMap intersectionMap)
Parameters:
intersectionMap - The intersectionMap to set.

getOccurences

public java.util.Vector getOccurences()
Returns:
Returns the occurences.

setOccurences

public void setOccurences(java.util.Vector occurences)
Parameters:
occurences - The occurences to set.

getPrintedWords

public long getPrintedWords()
Returns:
Returns the printedWords.

getSentence

public java.lang.StringBuffer getSentence()
Returns:
Returns the sentence.

setSentence

public void setSentence(java.lang.StringBuffer sentence)
Parameters:
sentence - The sentence to set.

getTotalWords

public long getTotalWords()
Returns:
Returns the totalWords.

setTotalWords

public void setTotalWords(long totalWords)
Parameters:
totalWords - The totalWords to set.

getWords

public IWordDistributionMap getWords()
Returns:
Returns the words.

setWords

public void setWords(IWordDistributionMap words)
Parameters:
words - The words to set.

getComparator

public java.util.Comparator getComparator()
Returns:
Returns the comparator.

isPrintDistribution

public boolean isPrintDistribution()
Returns:
Returns the printDistribution.

isPrintOccurence

public boolean isPrintOccurence()
Returns:
Returns the printOccurence.

getSeparator

public java.lang.String getSeparator()
Returns:
Returns the separator.


Copyright © 2002-2005 Sourceforge. All Rights Reserved.