luceneRAM索引

问题描述

packagechapter5;importjava.io.IOException;importjava.io.File;importjava.io.FileReader;importorg.apache.lucene.store.Directory;importorg.apache.lucene.store.FSDirectory;importorg.apache.lucene.store.RAMDirectory;importorg.apache.lucene.util.Version;importorg.apache.lucene.document.Field;importorg.apache.lucene.document.Document;importorg.apache.lucene.index.IndexWriter;importorg.apache.lucene.index.IndexWriter.MaxFieldLength;importorg.apache.lucene.analysis.standard.StandardAnalyzer;/********************************************************************本代码完成本地指定目录的遍历和文件查找。对指定后缀的文件进行分析，利用Lucene建立*索引，为后续检索使用做好准备。*******************************************************************/publicclassLuceneIndexLocalDisk{privatestaticStringDest_Index_Path="E:\workplace2\Lucenechapter2\workshop\TextIndex";privatestaticStringText_File_Path="E:\workplace2\Lucenechapter2\workshop\ch2\002\";/*========================================================*主函数，指定索引目录和待分析的目录，生成Lucene索引*========================================================*/publicstaticvoidmain(String[]args){Fileindexpath=newFile(Dest_Index_Path);FilelocalPath=newFile(Text_File_Path);try{intnums=indexBuilder(indexpath,localPath);System.out.println("IndexFinished"+nums+"docs");}catch(IOExceptione){e.printStackTrace();}}/*========================================================*索引创建函数，生成IndexWriter创建索引，调用子目录索引函数，并优化*存储本地磁盘索引*========================================================*/publicstaticintindexBuilder(FileindexPath,FilelocalPath)throwsIOException{if(!localPath.exists()||!localPath.isDirectory()||!localPath.canRead()){thrownewIOException(localPath+"不存在或者不允许访问");}System.out.println("目标路径完好");MaxFieldLengthmaxFieldLength=newMaxFieldLength(10000);IndexWriterFSWriter=newIndexWriter(FSDirectory.open(indexPath),newStandardAnalyzer(Version.LUCENE_CURRENT),true,maxFieldLength);FSWriter.setUseCompoundFile(true);SubindexBuilder(FSWriter,localPath);intnum=((Object)FSWriter).docCount();//这行在lucene3.0.3中有错请问怎么改呢FSWriter.optimize();FSWriter.close();returnnum;}/*========================================================*递归函数，递归分析目录，如果找到子目录，继续递归；如果找到文件分析索引*========================================================*/privatestaticvoidSubindexBuilder(IndexWriterfswriter,FilesubPath)throwsIOException{File[]filelist=subPath.listFiles();System.out.println(subPath.getAbsolutePath()+"路径个数"+filelist.length);for(inti=0;i<filelist.length;i++){Filefile=filelist[i];if(file.isDirectory()){SubindexBuilder(fswriter,file);}elseif(IsValidType(file.getName())){fileindexBuilder(fswriter,file);}}}/*========================================================*创建RAM内存索引，生成并添新文档。合并到本地磁盘索引当中*========================================================*/privatestaticvoidfileindexBuilder(IndexWriterfswriter,Filesubfile)throwsIOException{if(subfile.isHidden()||!subfile.exists()||!subfile.canRead()){return;}Directoryramdirectory=newRAMDirectory();MaxFieldLengthmaxFieldLength=newMaxFieldLength(10000);IndexWriterRAMWriter=newIndexWriter(ramdirectory,newStandardAnalyzer(Version.LUCENE_CURRENT),true,maxFieldLength);//Filefile=newFile(subfile);FileReaderfpReader=newFileReader(subfile);System.out.println("创建索引"+subfile.getCanonicalPath());Documentdocument=newDocument();Fieldfield_name=newField("name",subfile.getName(),Field.Store.YES,Field.Index.NOT_ANALYZED);document.add(field_name);Fieldfield_path=newField("path",subfile.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED);document.add(field_path);Fieldfield_content=newField("content",fpReader);document.add(field_content);RAMWriter.addDocument(document);RAMWriter.close();fswriter.addIndexes(newDirectory[]{ramdirectory});}/*========================================================*判断当前文件名是否符合文件后缀要求*========================================================*/privatestaticbooleanIsValidType(Stringname){if(name.endsWith(".txt")||name.endsWith(".html")||name.endsWith(".ini")||name.endsWith(".conf")){returntrue;}else{returnfalse;}}}

时间： 2024-12-29 07:28:28

luceneRAM索引

问题描述

luceneRAM索引的相关文章

lucene 搜索-lucene对文件名、文件路径进行索引，搜索的时候不能检索出来

Apache索引目录浏览的学习笔记

懒人促进社会进步 - 5种索引的原理和优化Case (btree,hash,gin,gist,brin)

PostgreSQL 索引虚拟列 - 表达式索引 - JOIN提速

乱序写入导致的索引膨胀(B-tree, GIN, GiST皆如此)

java-用过LIRE的朋友，请问在建索引的时候能额外添加文本信息并在检索时可同时加入文本条件吗？

mysql索引与视图

如何监控ORACLE索引使用与否

一个例子与InnoDB索引的几个概念