【第三节】Lucene5文档域加权

javazx · 发表于 2016-6-20 14:49:36

1、源码
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

public class IndexingTest2 {

private String ids[]={"1","2","3","4"};
private String authors[]={"Jack","Marry","John","Json"};
private String positions[]={"accounting","technician","salesperson","boss"};
private String titles[]={"Java is a good language.","Java is a cross platform language","Java powerful","You should learn java"};
private String contents[]={
         "If possible, use the same JRE major version at both index and search time.",
         "When upgrading to a different JRE major version, consider re-indexing. ",
         "Different JRE major versions may implement different versions of Unicode,",
         "For example: with Java 1.4, `LetterTokenizer` will split around the character U+02C6,"
};

private Directory dir;

/**
   * 获取IndexWriter实例
   * @return
   * @throws Exception
   */
private IndexWriter getWriter()throws Exception{
      Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
      IndexWriterConfig iwc=new IndexWriterConfig(analyzer);
      IndexWriter writer=new IndexWriter(dir, iwc);
      return writer;
}

/**
   * 生成索引
   * @throws Exception
   */
@Test
public void index()throws Exception{
      dir=FSDirectory.open(Paths.get("D:\\lucene3"));
      IndexWriter writer=getWriter();
      for(int i=0;i<ids.length;i++){
         Document doc=new Document();
         doc.add(new StringField("id", ids, Field.Store.YES));
! U) t6 W* B: X" Z( X. E6 ~4 i7 o! t          doc.add(new StringField("author",authors,Field.Store.YES));
6 B. g" H# p" p, O          doc.add(new StringField("position",positions,Field.Store.YES));
7 h, |) r; ^" t- g- T5 ?$ c4 @6 S          // 加权操作
2 P8 n3 b) }# Z6 G# H          TextField field=new TextField("title", titles, Field.Store.YES);
, }: z  t  ]. ^# K          if("boss".equals(positions)){
' g6 ^9 |& K4 H) c3 ?/ e/ Y             field.setBoost(1.5f);
# e  G) a" I9 B6 F          }% L% T% W1 F! D6 i
         doc.add(field);/ B& B# B9 h4 V, b: n( i% W+ X
         doc.add(new TextField("content", contents, Field.Store.NO));
1 H. c0 F/ ]) {: o  U          writer.addDocument(doc); // 添加文档
) P% H1 r) \6 E- m       }
3 u7 [! B% _4 S# B- o( O1 q" a       writer.close();' d! ~4 [6 e" g  V1 X) X
}
- D( [& ^! S! [! `! {3 s
7 c5 }1 D' f! o) _ /**
& K# c+ Z( v8 z/ {, C$ ~    * 查询
  M) d6 F3 q3 |- o9 A0 j    * @throws Exception% a( k" E5 L% F
   */
0 |9 D0 f( v; j* @; @# \ @Test5 S6 V4 L/ \4 }( r! i
public void search()throws Exception{4 @9 ?# B# W7 `1 P$ a
      dir=FSDirectory.open(Paths.get("D:\\lucene3"));
$ J$ ?. s' i& S8 ?       IndexReader reader=DirectoryReader.open(dir);: G" X$ k) N( t, r
      IndexSearcher is=new IndexSearcher(reader);
7 X2 |0 ?( |0 Q$ o. ~0 g# b       String searchField="title";
( b1 Q$ \# T% m" O- g       String q="java";
- J' A" @/ K2 \! B       Term t=new Term(searchField,q);1 G  m2 m; z2 z1 e
      Query query=new TermQuery(t);. j% Q0 N8 ^9 j
      TopDocs hits=is.search(query, 10);+ O4 l* F) c9 |  ?# `3 ^+ Z" k
      System.out.println("匹配 '"+q+"'，总共查询到"+hits.totalHits+"个文档");# a2 C7 c. U* f) p
      for(ScoreDoc scoreDoc:hits.scoreDocs){/ J+ ^3 S4 M- ~6 b
         Document doc=is.doc(scoreDoc.doc);
9 @7 U% H5 K! c( ~8 v& J0 r) S          System.out.println(doc.get("author"));
# \! R0 B8 _% x1 E. I       }
4 c1 Q8 Z8 z. e* D; u! j1 q7 K; U       reader.close();
1 }9 ]& T8 d2 D  \/ w# k# l' j6 p }* {6 s8 g' J: f/ I. \) f7 D' _

2 G( G- }# n$ B3 m% I$ E}
1 b$ z! L( c$ s9 l% f, C/ a) ^  \2 p9 Q( M- Q

3 @5 J" m! u& {% n; }5 w& X
# l* @9 {- F0 b! M: s: E2 z

学java来尚学堂 · 发表于 2016-10-29 14:14:51

看了那么多，还是觉得我参加的北京尚学堂的教的好。包教包会，而且毕业就有1W的工资在手。一起来学习吧

wwwfasss · 发表于 2016-10-30 12:24:00

java自学网给力亲测资源可以

		自动登录	找回密码
密码			立即注册

【第三节】Lucene5文档域加权

相关帖子

宣传达人

突出贡献

优秀版主

荣誉管理

论坛元老