00001 /* -*- C++ -*- 00002 00003 textIR - A fast text document retrieval engine 00004 00005 Copyright (C) 2005-2009 Laurence Park 00006 00007 This program is free software: you can redistribute it and/or modify 00008 it under the terms of the GNU General Public License as published by 00009 the Free Software Foundation, either version 3 of the License, or 00010 (at your option) any later version. 00011 00012 This program is distributed in the hope that it will be useful, 00013 but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 GNU General Public License for more details. 00016 00017 You should have received a copy of the GNU General Public License 00018 along with this program. If not, see <http://www.gnu.org/licenses/>. 00019 00020 File information: 00021 $Header: /home/staff/lapark/cvsroot/web_search/textIR/src/PLSAFloatThesaurus.h,v 1.3.2.2 2009/11/02 14:35:36 lapark Exp $ 00022 */ 00023 00024 #ifndef PLSAFLOAT_THESAURUS_H 00025 #define PLSAFLOAT_THESAURUS_H 00026 00027 #include "SortedSubIndex.tcc" 00028 #include "Triplef_tfd.h" 00029 #include "ElementFloatIndex.tcc" 00030 #include "FileName.h" 00031 #include "CSCTriple_matrix.tcc" 00032 00033 #include "CSCPlsaMap.tcc" 00034 #include "WeightBM25.tcc" 00035 00056 class PLSAFloatThesaurus : public ElementFloatIndex<SortedSubIndex<WordSubIndex> > 00057 { 00058 public: 00059 00061 PLSAFloatThesaurus(int totalWords, BlockStats *stats, 00062 SortedSubIndex<WordSubIndex> *index, 00063 FileName *fileName, int eigenvalues, int iterations); 00064 ~PLSAFloatThesaurus(void); 00065 00066 bool calculateRow(int currentRealWord); 00067 int listRealPosition(int position); 00068 int indexRealPosition(int position); 00069 Quantise *createQuantiser(float lower, float upper); 00070 00071 00072 00073 private: 00074 int _elements; 00075 int _eigenvalues; 00076 00077 CSCPlsaMap<WeightBM25<TRIPLET> > *_mapping; 00078 ElementList<Word> *_wordList; 00079 }; 00080 00081 #endif