searchengine/cpix/tsrc/cpixunittest/src/pdftests.cpp
changeset 1 6f2c1c46032b
parent 0 671dee74050a
child 3 ae3f1779f6da
equal deleted inserted replaced
0:671dee74050a 1:6f2c1c46032b
    35 
    35 
    36 #include "config.h"
    36 #include "config.h"
    37 #include "testutils.h"
    37 #include "testutils.h"
    38 #include "suggestion.h"
    38 #include "suggestion.h"
    39 
    39 
    40 const char * PdfDocsToIndex[5] = {
    40 const char * PdfDocsToIndex[7] = {
    41 	FILE_TEST_CORPUS_PATH "\\pdf\\ctutor.pdf",
    41     "c:\\data\\cpixunittestcorpus\\pdf\\ctutor.pdf",
    42 //	FILE_TEST_CORPUS_PATH "\\pdf\\geology.pdf",
    42 	"c:\\data\\cpixunittestcorpus\\stem\\pdf\\geology.pdf",
    43 //	FILE_TEST_CORPUS_PATH "\\pdf\\samplepdf.pdf",
    43 	"c:\\data\\cpixunittestcorpus\\stem\\pdf\\samplepdf.pdf",
    44 //	FILE_TEST_CORPUS_PATH "\\pdf\\windjack.pdf",
    44 	"c:\\data\\cpixunittestcorpus\\stem\\pdf\\windjack.pdf",
       
    45 	"c:\\data\\cpixunittestcorpus\\stem\\pdf\\DCTDecode.pdf",
       
    46 	"c:\\data\\cpixunittestcorpus\\stem\\pdf\\Empty.pdf",
    45     NULL
    47     NULL
    46 };
    48 };
    47 
    49 
    48 
    50 
    49 const wchar_t * PdfSearchParameters[5] = {
    51 const wchar_t * PdfSearchParameters[5] = {
    50 	L"inline",
    52 	L"inline",
    51 //	L"CALDEBA",
    53 	L"CALDEBA",
    52 //	L"sample",
    54 	L"sample",
    53 //	L"reset",
    55 	L"reset",
    54     NULL
    56     NULL
    55 };
    57 };
    56 
    58 
    57 void pdfTestAppclassFilteredTermSearch(Itk::TestMgr * testMgr, const wchar_t* appclassPrefix)
    59 void pdfTestAppclassFilteredTermSearch(Itk::TestMgr * testMgr, const wchar_t* appclassPrefix)
    58 {
    60 {
    65                cpix_Succeeded(&result),
    67                cpix_Succeeded(&result),
    66                "Could not get rid of all test qbac-idx pairs");
    68                "Could not get rid of all test qbac-idx pairs");
    67 
    69 
    68     std::auto_ptr<FileIdxUtil> util( new FileIdxUtil ); 
    70     std::auto_ptr<FileIdxUtil> util( new FileIdxUtil ); 
    69     
    71     
    70     util->init(); 
    72     util->init(TRUE); 
    71     
    73     
    72     cpix_Analyzer* analyzer = cpix_CreateSimpleAnalyzer(&result); 
    74     cpix_Analyzer* analyzer = cpix_CreateSimpleAnalyzer(&result); 
    73     
    75     
    74     if ( cpix_Failed( &result) ) ITK_PANIC("Analyzer could not be created");
    76     if ( cpix_Failed( &result) ) ITK_PANIC("Analyzer could not be created");
    75     
    77     
    76     for (int i = 0; PdfDocsToIndex[i]; i++) 
    78     for (int i = 0; PdfDocsToIndex[i]; i++) 
    77     {
    79     {
    78         util->indexFile( PdfDocsToIndex[i], analyzer, testMgr ); 
    80         util->indexFile( PdfDocsToIndex[i], analyzer, testMgr ); 
    79     }
    81     }
    80 
    82 
    81     for (int i = 0; Mp3TestCorpus[i]; i++) 
    83     util->flush();
    82     {
       
    83         util->indexFile( Mp3TestCorpus[i], analyzer, testMgr ); 
       
    84     }
       
    85 
       
    86         util->flush();
       
    87     
    84     
    88     for (int i = 0; PdfSearchParameters[i]; i++) 
    85     for (int i = 0; PdfSearchParameters[i]; i++) 
    89     {
    86     {
    90         cpix_QueryParser
    87         cpix_QueryParser
    91             * queryParser = cpix_QueryParser_create(&result,
    88             * queryParser = cpix_QueryParser_create(&result,
    96                 cpix_Analyzer_destroy( analyzer );
    93                 cpix_Analyzer_destroy( analyzer );
    97                 ITK_PANIC("Could not create query parser");
    94                 ITK_PANIC("Could not create query parser");
    98             }
    95             }
    99         
    96         
   100         std::wostringstream queryString;
    97         std::wostringstream queryString;
   101         if ( appclassPrefix ) {
    98         queryString<<L"adobe";
   102             queryString<<L"$terms<5,'"<<appclassPrefix<<L"'>("<<PdfSearchParameters[i]<<L")";
       
   103         } else {
       
   104             queryString<<L"$terms<5>("<<PdfSearchParameters[i]<<L")";
       
   105         }
       
   106     
       
   107         cpix_Query* query = cpix_QueryParser_parse(queryParser,
    99         cpix_Query* query = cpix_QueryParser_parse(queryParser,
   108                                                    queryString.str().c_str());
   100                                                    queryString.str().c_str());
   109         if (cpix_Failed(queryParser)
   101         if (cpix_Failed(queryParser)
   110             || query == NULL)
   102             || query == NULL)
   111             {
   103             {
   112                 cpix_Analyzer_destroy(analyzer);
   104                 cpix_Analyzer_destroy(analyzer);
   113                 cpix_ClearError(queryParser);
   105                 cpix_ClearError(queryParser);
   114                 cpix_QueryParser_destroy(queryParser);
   106                 cpix_QueryParser_destroy(queryParser);
   115                 ITK_PANIC("Could not parse query string");
   107                 ITK_PANIC("Could not parse query string");
   116             }
   108             }
   117         cpix_QueryParser_destroy(queryParser);
   109        
   118 
       
   119         cpix_Hits
   110         cpix_Hits
   120             * hits = cpix_IdxDb_search(util->idxDb(),
   111             * hits = cpix_IdxDb_search(util->idxDb(),
   121                                        query );
   112                                        query );
   122         
   113         
   123         int32_t hitsLength = cpix_Hits_length(hits);
   114         int32_t hitsLength = cpix_Hits_length(hits);
   124         cpix_Query_destroy( query ); 
   115                  
   125         
       
   126         wprintf(L"Results for %S:\n", PdfSearchParameters[i]);
   116         wprintf(L"Results for %S:\n", PdfSearchParameters[i]);
   127         
   117         
   128         Suggestion::printSuggestions(hits,
   118         Suggestion::printSuggestions(hits,
   129                                      testMgr);
   119                                      testMgr);        
       
   120         printf("\n"); 
   130         
   121         
   131         printf("\n"); 
   122         cpix_Analyzer_destroy(analyzer);
   132                 
   123         cpix_Hits_destroy( hits );
   133         cpix_Hits_destroy( hits ); 
   124         cpix_Query_destroy( query );
       
   125         cpix_QueryParser_destroy(queryParser);
   134     }
   126     }
   135 
   127 
   136 }
   128 }
   137 // int32_t hitsLength = cpix_Hits_length(hits);
   129 // int32_t hitsLength = cpix_Hits_length(hits);
   138 void CreateSimplePdfSearch(Itk::TestMgr * testMgr) 
   130 void CreateSimplePdfSearch(Itk::TestMgr * testMgr) 
   149 
   141 
   150     pdfTests->add("pdfterms", &CreateSimplePdfSearch);
   142     pdfTests->add("pdfterms", &CreateSimplePdfSearch);
   151     
   143     
   152     return pdfTests;
   144     return pdfTests;
   153 }
   145 }
   154 
       
   155