Used mainly to search tables and word-indexable fields. Default behaviour in this base class is used if no overriding default object is attached to a table or dbConnection.
includes stop-word list and minimum length. Doesn't do any stemming or more complex parsing.
A word is generated from the isalnum() chars between each isspace() separators. Any non alnum chars inside a word are skipped.
mpString remains a pointer to the start of the current word. mWord is updated (in theory) with the filtered copy of the word.
|
Public Member Functions |
| | oofWordParser (const char *stringToParse=0, unsigned short minWordLen=3, const char **stopWords=0, unsigned short numStopWords=0) |
| virtual | ~oofWordParser () |
| | dtor.
|
| virtual void | start () |
| | sets the oofWordParser to the start of the string.
|
| virtual void | start (const char *stringToParse) |
| | call to start parsing different string
|
| virtual void | next (void) |
| | Find the next word, updating the mWord pointer returned by word();.
|
| virtual bool | more () const |
| | return true if any words left.
|
| virtual const char * | word () const |
| | returns the current word.
|
| virtual const char * | wordString () const |
| | returns the original string.
|
| virtual unsigned short & | minWordLength () |
| | returns/sets the minimum word length cut-off.
|
| virtual bool | isValidWord (const char *) const |
| virtual void | generateSearchArray () |
| const char ** | generatedStrings () const |
| | return array of pointers to individual words.
|
| unsigned long | generatedCount () const |
| void | extract (std::ostream &) |
| void | incRefs () |
| void | decRefs () |
Protected Types |
| enum | { maxWordLength = 40
} |
Protected Attributes |
| const char * | mpString |
| | pointer to start of words to process, may point to mGeneratedStrings entries
|
| const char * | mpStringStart |
| | pointer to original strings
|
| char | mWord [maxWordLength] |
| unsigned short | mWordLength |
| unsigned short | mMinWordLength |
| const char ** | mStopWords |
| | passed in list of stop words to ignore
|
| unsigned short | mNumStopWords |
| const char ** | mGeneratedStrings |
| | owned strings generated for subsearch call
|
| unsigned long | mGeneratedCount |
| unsigned long | mIterStrings |
| unsigned int | mReferences |