问题描述
packagechapter5;importjava.io.IOException;importjava.io.File;importjava.io.FileReader;importorg.apache.lucene.store.Directory;importorg.apache.lucene.store.FSDirectory;importorg.apache.lucene.store.RAMDirectory;importorg.apache.lucene.util.Version;importorg.apache.lucene.document.Field;importorg.apache.lucene.document.Document;importorg.apache.lucene.index.IndexWriter;importorg.apache.lucene.index.IndexWriter.MaxFieldLength;importorg.apache.lucene.analysis.standard.StandardAnalyzer;/********************************************************************本代码完成本地指定目录的遍历和文件查找。对指定后缀的文件进行分析,利用Lucene建立*索引,为后续检索使用做好准备。*******************************************************************/publicclassLuceneIndexLocalDisk{privatestaticStringDest_Index_Path="E:\workplace2\Lucenechapter2\workshop\TextIndex";privatestaticStringText_File_Path="E:\workplace2\Lucenechapter2\workshop\ch2\002\";/*========================================================*主函数,指定索引目录和待分析的目录,生成Lucene索引*========================================================*/publicstaticvoidmain(String[]args){Fileindexpath=newFile(Dest_Index_Path);FilelocalPath=newFile(Text_File_Path);try{intnums=indexBuilder(indexpath,localPath);System.out.println("IndexFinished"+nums+"docs");}catch(IOExceptione){e.printStackTrace();}}/*========================================================*索引创建函数,生成IndexWriter创建索引,调用子目录索引函数,并优化*存储本地磁盘索引*========================================================*/publicstaticintindexBuilder(FileindexPath,FilelocalPath)throwsIOException{if(!localPath.exists()||!localPath.isDirectory()||!localPath.canRead()){thrownewIOException(localPath+"不存在或者不允许访问");}System.out.println("目标路径完好");MaxFieldLengthmaxFieldLength=newMaxFieldLength(10000);IndexWriterFSWriter=newIndexWriter(FSDirectory.open(indexPath),newStandardAnalyzer(Version.LUCENE_CURRENT),true,maxFieldLength);FSWriter.setUseCompoundFile(true);SubindexBuilder(FSWriter,localPath);intnum=((Object)FSWriter).docCount();//这行在lucene3.0.3中有错请问怎么改呢FSWriter.optimize();FSWriter.close();returnnum;}/*========================================================*递归函数,递归分析目录,如果找到子目录,继续递归;如果找到文件分析索引*========================================================*/privatestaticvoidSubindexBuilder(IndexWriterfswriter,FilesubPath)throwsIOException{File[]filelist=subPath.listFiles();System.out.println(subPath.getAbsolutePath()+"路径个数"+filelist.length);for(inti=0;i<filelist.length;i++){Filefile=filelist[i];if(file.isDirectory()){SubindexBuilder(fswriter,file);}elseif(IsValidType(file.getName())){fileindexBuilder(fswriter,file);}}}/*========================================================*创建RAM内存索引,生成并添新文档。合并到本地磁盘索引当中*========================================================*/privatestaticvoidfileindexBuilder(IndexWriterfswriter,Filesubfile)throwsIOException{if(subfile.isHidden()||!subfile.exists()||!subfile.canRead()){return;}Directoryramdirectory=newRAMDirectory();MaxFieldLengthmaxFieldLength=newMaxFieldLength(10000);IndexWriterRAMWriter=newIndexWriter(ramdirectory,newStandardAnalyzer(Version.LUCENE_CURRENT),true,maxFieldLength);//Filefile=newFile(subfile);FileReaderfpReader=newFileReader(subfile);System.out.println("创建索引"+subfile.getCanonicalPath());Documentdocument=newDocument();Fieldfield_name=newField("name",subfile.getName(),Field.Store.YES,Field.Index.NOT_ANALYZED);document.add(field_name);Fieldfield_path=newField("path",subfile.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED);document.add(field_path);Fieldfield_content=newField("content",fpReader);document.add(field_content);RAMWriter.addDocument(document);RAMWriter.close();fswriter.addIndexes(newDirectory[]{ramdirectory});}/*========================================================*判断当前文件名是否符合文件后缀要求*========================================================*/privatestaticbooleanIsValidType(Stringname){if(name.endsWith(".txt")||name.endsWith(".html")||name.endsWith(".ini")||name.endsWith(".conf")){returntrue;}else{returnfalse;}}}