【第三节】Lucene5文档域加权

javazx · 发表于 2016-6-20 14:49:36

1、源码
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

public class IndexingTest2 {

private String ids[]={"1","2","3","4"};
private String authors[]={"Jack","Marry","John","Json"};
private String positions[]={"accounting","technician","salesperson","boss"};
private String titles[]={"Java is a good language.","Java is a cross platform language","Java powerful","You should learn java"};
private String contents[]={
         "If possible, use the same JRE major version at both index and search time.",
         "When upgrading to a different JRE major version, consider re-indexing. ",
         "Different JRE major versions may implement different versions of Unicode,",
         "For example: with Java 1.4, `LetterTokenizer` will split around the character U+02C6,"
};

private Directory dir;

/**
   * 获取IndexWriter实例
   * @return
   * @throws Exception
   */
private IndexWriter getWriter()throws Exception{
      Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
      IndexWriterConfig iwc=new IndexWriterConfig(analyzer);
      IndexWriter writer=new IndexWriter(dir, iwc);
      return writer;
}

/**
   * 生成索引
   * @throws Exception
   */
@Test
public void index()throws Exception{
      dir=FSDirectory.open(Paths.get("D:\\lucene3"));
      IndexWriter writer=getWriter();
      for(int i=0;i<ids.length;i++){
         Document doc=new Document();
         doc.add(new StringField("id", ids, Field.Store.YES));7 ^6 r0 x/ k# D
         doc.add(new StringField("author",authors,Field.Store.YES));' }. A7 H6 T0 n5 @
         doc.add(new StringField("position",positions,Field.Store.YES));0 G8 G' J8 M8 r" y
         // 加权操作
" L' I1 R; }  L3 ^          TextField field=new TextField("title", titles, Field.Store.YES);
6 |) N1 U" F* Z, ?) B; ~          if("boss".equals(positions)){  `0 x& @' c1 I" u& e" m# j4 w$ J: e3 ~
            field.setBoost(1.5f);
; l9 [+ M# t& r; a' ^          }
$ F/ U( V* ]' z6 P0 j, d- c% _          doc.add(field);
+ y% k  Z0 r- U, o, w) p          doc.add(new TextField("content", contents, Field.Store.NO));
7 L# h! U% ^; G9 v, n: p          writer.addDocument(doc); // 添加文档4 q  Z8 {6 k" \- S+ J
      }
4 T' U/ v6 r2 j1 J+ h7 x       writer.close();
; |0 p( M' [3 N* ^ }
) N% |  K- @3 G* p3 j; l' A
5 W* w/ ~; }- ]8 D /**
3 \. Q  x% F- @1 W% ]1 G    * 查询! O) w4 D; g) p, x  J
   * @throws Exception" k% \/ n( C8 s6 I8 ~% r: m/ m
   */4 v4 a% z* v% o2 W5 j+ @
@Test
) w0 f+ K6 }5 M. R public void search()throws Exception{
2 v4 g$ |; Q( H0 |       dir=FSDirectory.open(Paths.get("D:\\lucene3"));7 ]& I; k; {; w, Z7 e6 ~. o+ J/ ?. J
      IndexReader reader=DirectoryReader.open(dir);6 r& m% B5 E) u: ~7 s
      IndexSearcher is=new IndexSearcher(reader);1 k8 V1 ?0 S' G! r  W' G, h
      String searchField="title";7 `" l+ Z/ m+ T; N
      String q="java";& K' h5 v! E! r
      Term t=new Term(searchField,q);
- c5 t7 Y7 d; ~/ t. p1 y; r       Query query=new TermQuery(t);" A. a+ E1 g' |. z! K
      TopDocs hits=is.search(query, 10);
) q. @+ z/ V  v9 m8 Y' l# q       System.out.println("匹配 '"+q+"'，总共查询到"+hits.totalHits+"个文档");; B) I; @6 R% N5 W& l7 K
      for(ScoreDoc scoreDoc:hits.scoreDocs){( v+ v- H% m. J& `) M( M
         Document doc=is.doc(scoreDoc.doc);
. X6 ]4 O2 ^1 @0 X          System.out.println(doc.get("author"));. f& u* _1 E4 o) w( {
      }9 k7 O; ~# u" b' Z2 c1 u
      reader.close();
* [/ m9 P- X" o: m9 [! Z0 O }8 }& i( r) c9 ]4 R* L
; L# b) w) N+ e% e7 j
}
# W2 U# |! z1 l- ?- u1 w  W# Q0 X- e. S! g

1 I/ m) J5 b& u6 m/ c$ Y6 C/ B
; f1 h0 J4 Z' G  |7 p4 i* J

学java来尚学堂 · 发表于 2016-10-29 14:14:51

看了那么多，还是觉得我参加的北京尚学堂的教的好。包教包会，而且毕业就有1W的工资在手。一起来学习吧

wwwfasss · 发表于 2016-10-30 12:24:00

java自学网给力亲测资源可以

		自动登录	找回密码
密码			立即注册

【第三节】Lucene5文档域加权

相关帖子

宣传达人

突出贡献

优秀版主

荣誉管理

论坛元老