00001 /* -*- C++ -*- 00002 00003 textIR - A fast text document retrieval engine 00004 00005 Copyright (C) 2005-2009 Laurence Park 00006 00007 This program is free software: you can redistribute it and/or modify 00008 it under the terms of the GNU General Public License as published by 00009 the Free Software Foundation, either version 3 of the License, or 00010 (at your option) any later version. 00011 00012 This program is distributed in the hope that it will be useful, 00013 but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 GNU General Public License for more details. 00016 00017 You should have received a copy of the GNU General Public License 00018 along with this program. If not, see <http://www.gnu.org/licenses/>. 00019 00020 File information: 00021 $Header: /home/staff/lapark/cvsroot/web_search/textIR/src/BuildWordList.h,v 1.5 2009/10/08 06:41:29 lapark Exp $ 00022 */ 00023 00024 #ifndef BUILDWORDLIST_H 00025 #define BUILDWORDLIST_H 00026 00027 #include "StopTerms.h" 00028 #include "BuildList.tcc" 00029 #include "Word.h" 00030 00049 class BuildWordList : public BuildList<Word> { 00050 public: 00051 00053 BuildWordList(void); 00054 ~BuildWordList(void); 00055 00057 bool prepareWordBuffer(char *word); 00058 00060 inline char lowercase_char(char x); 00061 00063 void lowercase_word_buffer(void); 00064 00066 void iterated_stem(void); 00067 00068 private: 00069 /* word buffer has room for word size in byte 0 */ 00070 /* and null terminator */ 00071 char _lwordBuffer[MAXWORD_SIZE + 2]; 00072 StopTerms *_stopList; 00073 }; 00074 00075 00076 #endif