lucene对索引的创建,修改删除查询

Shaka 5月前 ⋅ 367 阅读
//创建索引
public static String index(HttpServletRequest request, FileService fileListService ,String fromPath, String toIndexPath) throws Exception {
if (StringUtils.isEmpty(toIndexPath)) {
toIndexPath = Constants.TOINDEXPATH;
}
if (fromPath.equals(Constants.FROMPATH)) {
new File(Constants.TOINDEXPATH).delete();
}
File docFile = new File(fromPath);
File indexFile = new File(toIndexPath);
if (!docFile.exists()) {
docFile.mkdirs();
}
indexFile.delete();
if (!indexFile.exists()) {
indexFile.mkdirs();
}
if (!docFile.canRead()) {
return "您所选择的文件夹不存在或者没有访问权限!文件路径:" + docFile.getAbsolutePath();
}
Date start = new Date();
IndexWriter writer = null;
try {
Directory directorty = FSDirectory.open(new File(toIndexPath));//RAMDirectory内存目录操作
Analyzer analyzer = new IKAnalyzer(true); //IKAnalyzer是一个中文分词器,有中文的时候建议使用这个
//索引写入配置
IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_40, analyzer);
cfg.setRAMBufferSizeMB(200).setMaxBufferedDocs(20000);
cfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
//创建索引读写器
writer = new IndexWriter(directorty, cfg);
MemoryMXBean memorymbean = ManagementFactory.getMemoryMXBean();
MemoryUsage usage = memorymbean.getHeapMemoryUsage();
logger.info("INIT HEAP: " + usage.getInit());
logger.info("MAX HEAP: " + usage.getMax());
logger.info("USE HEAP: " + usage.getUsed());

//开始创建索引
indexDoc(request,fileListService,writer, docFile, "create");
} catch (Exception e) {
logger.info(e.getMessage());
e.printStackTrace();
} finally {
if (writer != null) writer.close();
}
Date end = new Date();
seeVMStatus();
double useTime = (double) (end.getTime() - start.getTime()) / (1000);
logger.info("所有文件建立索引完毕,耗时:" + useTime + "秒");
return "所有文件建立索引完毕,耗时:" + useTime + "秒";
}

static void indexDoc(HttpServletRequest request, FileService fileListService,IndexWriter writer, File file, String type) throws Exception {
if (file.canRead()) {
if (file.isDirectory()) {
//递归调用自己
List<File> fileList = FileUtils.getFileList(file, true);
logger.info("共检索出:" + fileList.size() + "个文件!");

for (File file1 : fileList) {
indexDocOne(request,fileListService,writer, file1, type);
}
} else if (file.isFile()) {
//文件的编码格式
indexDocOne(request,fileListService,writer, file, type);
}
}else{
logger.info("文件不可读取");
deleteDoc(null,fileListService,file.getCanonicalPath());
}
}

static void indexDocOne(HttpServletRequest request, FileService fileListService,IndexWriter writer, File file, String type) {
logger.info("**********文件:" + file.getAbsolutePath() + "正在建立索引********************");

try {
//获取用户
User user = (User) request.getSession().getAttribute("user");
//<3天=新文件
Calendar calendar = Calendar.getInstance();
calendar.add(Calendar.DATE, 3);
Long threeDay = calendar.getTime().getTime();

//获取连接
Files files;
Connection conn = SQLiteUtils.openConn();
fileListService.init(conn);
try {
Document doc = new Document();
doc.add(new TextField("filename", file.getName(), Field.Store.YES));
doc.add(new StringField("id", String.valueOf(file.hashCode()), Field.Store.YES));
doc.add(new StringField("createtime", String.valueOf(file.lastModified()), Field.Store.YES));
doc.add(new StringField("fullpath", file.getCanonicalPath(), Field.Store.YES));

if (type.equals("create")) {
writer.addDocument(doc);
} else if (type.equals("update")) {
writer.updateDocument(new Term("id", String.valueOf(file.hashCode())), doc);
}
//添加修改文件至数据库.
files = fileListService.getFileByPath(conn, file.getCanonicalPath().replaceAll("\\\\", "/"));
if (null == files.getFileName() || "null".equals(files.getFileName())) {
files = new Files();
files.setDoc_id(Long.parseLong(String.valueOf(file.hashCode())));
files.setFileName(file.getName());
files.setFullPath(file.getCanonicalPath().replaceAll("\\\\", "/"));
files.setFileCreate(new Timestamp(file.lastModified() > 0 ? file.lastModified() : new Date().getTime()));
files.setFileCreateBy(user.getUserName());
files.setFileUpdate(new Timestamp(new Date().getTime()));
files.setFileUpdateBy(user.getUserName());
} else {
files.setFileUpdate(new Timestamp(new Date().getTime()));
files.setFileUpdateBy(user.getUserName());
}
if (file.lastModified() > new Date().getTime() && file.lastModified() < threeDay) {
files.setNewFile(1);
}
fileListService.saveFile(conn, files);
} catch (Exception e) {
e.printStackTrace();
} finally {
SQLiteUtils.closeConn(conn);
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 查看虚拟机内存信息
*/
private static void seeVMStatus() {
MemoryMXBean memorymbean = ManagementFactory.getMemoryMXBean();
logger.info("JVM Full Information:");
logger.info("Heap Memory Usage: "
+ memorymbean.getHeapMemoryUsage());
logger.info("Non-Heap Memory Usage: "
+ memorymbean.getNonHeapMemoryUsage());
}

//查询
public static List<JSONObject> doSearch(String indexPath, String queryString, String path) throws Exception {
Query query;
String content = "content";
if (StringUtils.isEmpty(indexPath)) {
indexPath = Constants.TOINDEXPATH;
}
if (StringUtils.isEmpty(path)) {
path = Constants.FROMPATH;
}
// 1、创建Directory
File file = new File(indexPath);
if (!file.exists()) {
boolean i = file.mkdirs();
}
//设置搜索目录
Directory directory = FSDirectory.open(file);
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);

//创建query对象
//Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
Analyzer analyzer = new IKAnalyzer(true);
// 使用QueryParser搜索时,需要指定分词器,搜索时的分词器要和索引时的分词器一致
String[] fields = new String[]{"filename", "content"};
// 参数:输入的lucene的查询语句(关键字一定要大写)
query = new MultiFieldQueryParser(Version.LUCENE_40, fields, analyzer).parse(queryString);
Date start = new Date();
//logger.info("查询关键字:" + query.toString());
// 通过searcher来搜索索引库
// 第二个参数:指定需要显示的顶部记录的N条
TopDocs results = searcher.search(query, 300);

// 根据查询条件匹配出的记录总数
int count = results.totalHits;
logger.info("匹配出的记录总数:" + count);
// 根据查询条件匹配出的记录
ScoreDoc[] hits = results.scoreDocs;
List<JSONObject> list = new ArrayList<JSONObject>();
for (ScoreDoc sdoc : hits) {
// 获取文档的ID
int docId = sdoc.doc;
JSONObject jsonObject = new JSONObject();
// 通过ID获取文档
Document doc = searcher.doc(docId);
String filePath = doc.get("fullpath").replaceAll("\\\\", "/");
if (filePath.contains(path)) {
String filename = "";
try {
filename = displayHtmlHighlight(query, analyzer, "filename", doc.get("filename"), 60);
} catch (Exception e) {
e.printStackTrace();
}
if (StringUtils.isEmpty(filename)) {
if (StringUtils.isNotBlank(doc.get("filename"))) {
filename = doc.get("filename");
} else {
continue;
}
}
if (filename.startsWith("~")) {
continue;
}
jsonObject.put("id", doc.get("id"));
jsonObject.put("name", filename);
jsonObject.put("filename", doc.get("filename"));
jsonObject.put("path", doc.get("fullpath").replaceAll("\\\\", "/"));
jsonObject.put("createtime", doc.get("createtime"));
String text = "";
if (null != doc.get(content)) {
String con = doc.get(content);
if (StringUtils.isNotBlank(con)) {
if (con.contains("</div>")) {
continue;
}
}
text = displayHtmlHighlight(query, analyzer, content, doc.get(content), 200);
}
if (text != null) {
Pattern p = Pattern.compile("\t|\r|\n");
Matcher m = p.matcher(text);
text = m.replaceAll("");
}
jsonObject.put("text", text);
jsonObject.put("results", text);
list.add(jsonObject);
}
}
Date end = new Date();
// 关闭资源
reader.close();
logger.info("耗时:" + (end.getTime() - start.getTime()));
return list;
}


/**
* 根据条件删除索引
*/
public static String deleteDoc(Connection conn,FileService fileListService,String path) {
try {
File file = new File(path);
IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_40, null);
Directory directorty = FSDirectory.open(new File(Constants.TOINDEXPATH));// cun chu
IndexWriter writer = new IndexWriter(directorty, cfg);
if(conn==null){
conn = SQLiteUtils.openConn();
}
fileListService.delPath(conn,file.getCanonicalPath().replaceAll("\\\\", "/"));
writer.deleteDocuments(new Term("fullpath", file.getCanonicalPath()));
writer.commit();
writer.close();
logger.info("删除完成!");
} catch (Exception e) {
logger.info(e.getMessage());
e.getStackTrace();
}
return "删除索引成功";
}


/**
* 修改索引
*/
public static String updateIndex(HttpServletRequest request, FileService fileListService,String path) throws Exception {
//创建IndexWriter
Analyzer analyzer = new IKAnalyzer(true);
// Analyzer analyzer = new ChineseWordAnalyzer();
Directory directorty = FSDirectory.open(new File(Constants.TOINDEXPATH));// cun chu
IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_40, analyzer);
IndexWriter writer = new IndexWriter(directorty, cfg);
//创建一个文档域,再创建一个File域进行更新
Date start = new Date();
File file = new File(path);
indexDoc(request,fileListService,writer, file, "update");
Date end = new Date();
seeVMStatus();
writer.commit();
writer.close();
logger.info("索引修改完毕,耗时:"
+ (double) (end.getTime() - start.getTime()) / (1000)
+ "秒");
return "修改索引完毕,耗时:"
+ (double) (end.getTime() - start.getTime()) / (1000)
+ "秒";
}

//高亮显示
private static String displayHtmlHighlight(Query query, Analyzer analyzer, String fieldName, String fieldContent, int fragmentSize) throws IOException, InvalidTokenOffsetsException {
//创建一个高亮器
Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color='red'>", "</font>"), new QueryScorer(query));
Fragmenter fragmenter = new SimpleFragmenter(fragmentSize);
highlighter.setTextFragmenter(fragmenter);
return highlighter.getBestFragment(analyzer, fieldName, fieldContent);
}

注意:本文归作者所有,未经作者允许,不得转载

全部评论: 0

    我有话说: