|
1、中文分词(smartcn); t6 P/ E7 y) P) D4 n9 p: k
<dependency> S7 Z; G1 q7 z1 I( y' C* y, A
<groupId>org.apache.lucene</groupId>0 ^4 M% l: ]- ^- i- R9 E
<artifactId>lucene-analyzers-smartcn</artifactId>
% I3 ^6 P, K* x! W8 L8 d <version>5.3.1</version>
3 k) h" k) ]: Q1 w. P </dependency>& R7 l" o5 n9 C) r$ H. p$ J
4 | G- O; `0 w% I. D2 z
/ D. ^1 v& K" B5 e$ v' H( {2 O: N7 ^
6 G- b5 {5 Z' o2、高亮显示
. Z' K, G% l8 |3 j- W l" z* \ <dependency>* K F8 O- i( r" h) S. \, y1 J
<groupId>org.apache.lucene</groupId>
; H( h/ ?4 p! h/ K4 n8 ^4 F$ r F5 } <artifactId>lucene-highlighter</artifactId>
- ~! M0 O' p# e5 ?- x <version>5.3.1</version>
9 e1 c% ?7 U, V$ a, _' d6 Y </dependency>/ f( X: c5 `5 ?
! v2 D0 z1 @' ?
源码:' r, a0 U% _) j4 l6 V! b
(1)% M1 G/ k+ ?2 H
import java.nio.file.Paths;
' P/ @) |3 D2 O; A) e& k
% Y2 d- s8 m; A; ^4 aimport org.apache.lucene.analysis.Analyzer;! B: _/ a% V% ^; o: T
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
3 Z4 F2 W8 e, \" Bimport org.apache.lucene.analysis.standard.StandardAnalyzer;( F, E6 i; z# B; ?* Q& a* X) X
import org.apache.lucene.document.Document;+ S, j U6 d+ Z" y1 ]
import org.apache.lucene.document.Field;' K* {* b1 r- J
import org.apache.lucene.document.IntField;
8 k0 e# {5 Q* K+ kimport org.apache.lucene.document.StringField;
; P0 d+ {! C6 A6 T# rimport org.apache.lucene.document.TextField;
7 v6 P9 }/ M, n3 W: m2 bimport org.apache.lucene.index.IndexWriter;* L# _+ d: s! p6 b# A" m
import org.apache.lucene.index.IndexWriterConfig;# r0 a* O5 f+ x( V! F0 A( Z9 A
import org.apache.lucene.store.Directory;
( c! D4 K% R+ @# X$ V6 wimport org.apache.lucene.store.FSDirectory;. q* @. e, d6 { w& G/ l
- n6 R( k$ Q [% X3 H: o/ h4 `; _* Npublic class Indexer {
% {7 L( t5 |4 u6 I( e+ t% |' t
) R# D$ K3 Q( w' y$ n+ \; V* F private Integer ids[]={1,2,3};$ C( n3 [ i) F' [ @' Y5 \
private String citys[]={"青岛","南京","上海"};
/ K N0 Y/ [4 g6 X' U' {+ A private String descs[]={
0 @. _7 m) ?: n! R. ^ "青岛是一个美丽的城市。",
9 V& E8 @' [9 R3 j/ E: y" [2 @ "南京是一个有文化的城市。南京是一个文化的城市南京,简称宁,是江苏省会,地处中国东部地区,长江下游,濒江近海。全市下辖11个区,总面积6597平方公里,2013年建成区面积752.83平方公里,常住人口818.78万,其中城镇人口659.1万人。[1-4] “江南佳丽地,金陵帝王州”,南京拥有着6000多年文明史、近2600年建城史和近500年的建都史,是中国四大古都之一,有“六朝古都”、“十朝都会”之称,是中华文明的重要发祥地,历史上曾数次庇佑华夏之正朔,长期是中国南方的政治、经济、文化中心,拥有厚重的文化底蕴和丰富的历史遗存。[5-7] 南京是国家重要的科教中心,自古以来就是一座崇文重教的城市,有“天下文枢”、“东南第一学”的美誉。截至2013年,南京有高等院校75所,其中211高校8所,仅次于北京上海;国家重点实验室25所、国家重点学科169个、两院院士83人,均居中国第三。[8-10] 。",$ ^! A+ {8 E, P0 [+ Q1 x
"上海是一个繁华的城市。"
- w, K& A! q3 K5 d( U+ P };5 k' Z- ~! K) S* {# m; L2 r
: A, @, T8 o0 O, F' P6 k' C
private Directory dir;
( e6 a" h# s( B4 R" T
& ^6 K- X6 f# s, u z/ q# s. v8 } /**
, V. z! w2 D" X$ x * 获取IndexWriter实例% R! D( {- C# V: a; s) ]
* @return
0 s# u5 n" J& F" N * @throws Exception
% P, y+ G5 Q& N9 }* q i; Q */) j) Z% o) t1 @9 q3 a
private IndexWriter getWriter()throws Exception{, i/ C9 `4 `+ H
//Analyzer analyzer=new StandardAnalyzer(); // 标准分词器% v3 K8 {- G$ A$ c7 J
SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();
3 I4 u3 T0 u: v, W6 x1 D IndexWriterConfig iwc=new IndexWriterConfig(analyzer);' O8 K( _7 A2 C' x
IndexWriter writer=new IndexWriter(dir, iwc);
7 R% m6 v. ?# _ q7 A: Y return writer;
5 A0 B$ }5 X; E" q! B4 ]# y. W& J; k8 K5 F }
0 P5 F6 t1 t/ [1 W3 L r" }6 V, ]. F) N1 v& C j
/** m) G. M; F5 i- h
* 生成索引5 x8 c* Z4 {/ z2 r) E+ u5 B) ]6 _+ V
* @param indexDir
7 r8 ^4 Y* J- f: i2 P * @throws Exception
( F$ Y( ^$ L. r3 \) { */* O3 h, A0 U- R& ]# x$ P
private void index(String indexDir)throws Exception{1 _$ Q# o% i/ \8 f8 P6 s
dir=FSDirectory.open(Paths.get(indexDir));4 `( X c) C) J6 i: t' u5 j1 P5 y
IndexWriter writer=getWriter();0 E) Y. h6 \1 M% a/ R8 s$ A, E
for(int i=0;i<ids.length;i++){
( t5 C/ Q8 V4 w8 Y% F0 ^ Document doc=new Document();
' y1 g" R J- v, L doc.add(new IntField("id", ids, Field.Store.YES));
1 @5 ^0 s* ]$ B doc.add(new StringField("city",citys,Field.Store.YES));+ }$ F! u- V& A. H& [8 ^
doc.add(new TextField("desc", descs, Field.Store.YES));
8 h3 h* j) F1 ?" E2 h, E& n& d writer.addDocument(doc); // 添加文档/ G, V& N4 B9 [& j. v
}# d, B4 n5 \6 f6 [- \+ z2 ?
writer.close();
8 w8 j' D. O% k" R9 Y9 U( d, I }
9 O3 k% d! o3 {6 _) t5 t0 m9 S s. X$ g+ P
# P% F0 x/ d) w2 t$ y public static void main(String[] args) throws Exception {. S9 I3 q. J% u
new Indexer().index("D:\\lucene6");
4 N f# i5 M6 \0 s3 O( w: m7 k& u4 L }& H6 h& X3 f! H5 j, @$ z+ [6 n
( o @) S, }! Q% G3 ^ @4 z) g6 |}' q5 Q& m8 B% r7 Q; J U
% ^4 M, `2 y* w# X. b: l8 c
2 [& D& r* Y; h. }, y4 \: D% Q1 l& Q0 X. _* h. l3 X d8 h
(2). q& \) ^$ v* v% Q" n% d6 ]2 A
import java.io.StringReader;& g U( M$ h$ a! L
import java.nio.file.Paths;4 h2 l `( L; g% u. G: k
! I c$ D/ Z6 Z/ l/ V8 o
import org.apache.lucene.analysis.Analyzer;
- y" S" A& K1 C, m+ oimport org.apache.lucene.analysis.TokenStream;
: ]: _0 K$ y1 P( b3 H" N2 Dimport org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
: ~' }7 E6 e4 D/ ?2 rimport org.apache.lucene.analysis.standard.StandardAnalyzer;; ~+ y1 M7 B1 e' H. \5 ]* q
import org.apache.lucene.document.Document;
# G& j, t, Q) c4 g6 ^, ximport org.apache.lucene.index.DirectoryReader;/ X0 m3 |9 ?5 K4 {
import org.apache.lucene.index.IndexReader;1 @/ l E- Z7 p# A# i
import org.apache.lucene.queryparser.classic.QueryParser;
& e& Z L4 a5 Yimport org.apache.lucene.search.IndexSearcher;4 @- h# u% w2 ~) c; U% d+ @
import org.apache.lucene.search.Query;! w5 l0 r- H6 E" F" ?9 z8 ~7 D
import org.apache.lucene.search.ScoreDoc;& T8 ~9 r8 e9 c' w- P1 i9 f5 M
import org.apache.lucene.search.TopDocs;
+ Y+ e* ?3 {* J: f2 Q" ]- X8 [import org.apache.lucene.search.highlight.Fragmenter;7 B" Q1 x, `. @9 A) T( r
import org.apache.lucene.search.highlight.Highlighter;4 p. e' [: J, [, ~# p& g& Q
import org.apache.lucene.search.highlight.QueryScorer; \8 s3 P/ i+ w# _. J. n/ h) m( X
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;1 U6 l4 @/ p* ~6 R
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;' n# ~/ g7 ]4 t; f
import org.apache.lucene.store.Directory;& c9 R" g! K% e+ r( ?7 _8 g# ^
import org.apache.lucene.store.FSDirectory;+ [. S2 W- }4 ?! Y
' U3 @( n( l& A5 M: z
public class Searcher {4 @) D/ h( Z0 W) e- F7 B+ ?
6 {* Q/ h9 K a: W5 J7 U. A- _2 ? public static void search(String indexDir,String q)throws Exception{
2 J. m! T' d0 f5 Y% P3 } Directory dir=FSDirectory.open(Paths.get(indexDir));
# I# |# q ]: H IndexReader reader=DirectoryReader.open(dir); A2 I# [7 O( E9 P% I
IndexSearcher is=new IndexSearcher(reader);
* p0 O( l1 \/ p3 W; G+ j // Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
$ M' v/ G4 Y/ v$ K SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();
# g. m2 b; K" W. ~8 g8 G- N, s QueryParser parser=new QueryParser("desc", analyzer);% D5 a2 @7 \+ L( d; T
Query query=parser.parse(q);# c3 ^1 J h( i
long start=System.currentTimeMillis();
n4 M1 k% u% b! {8 d* I/ n+ A0 c* S+ B TopDocs hits=is.search(query, 10);# q9 ^3 w8 g/ Q
long end=System.currentTimeMillis();
/ d1 Y% s" N' M! C7 I/ O8 M System.out.println("匹配 "+q+" ,总共花费"+(end-start)+"毫秒"+"查询到"+hits.totalHits+"个记录");
# q# R$ M. t- P+ D, h( l: t9 b" c2 i4 N& Q2 X
QueryScorer scorer=new QueryScorer(query);( y2 x5 A0 q2 s6 u: K
Fragmenter fragmenter=new SimpleSpanFragmenter(scorer);# ]/ H! v# ^/ G) W) l
SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>");' w9 `6 y" P2 l4 r" f8 B
Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer);
) v) m( V" k3 ~# g7 T8 Q highlighter.setTextFragmenter(fragmenter);
( C9 r+ F- ^/ Z# { _ for(ScoreDoc scoreDoc:hits.scoreDocs){& T- y1 Z7 d6 ]
Document doc=is.doc(scoreDoc.doc);9 e& t) r# ]# q0 }/ n5 x! S9 ]# ^, x
System.out.println(doc.get("city"));9 G3 Y& G# G7 v! ~
System.out.println(doc.get("desc"));
8 x9 q- w A# e) r( I$ g% u; D String desc=doc.get("desc");
) ^1 j4 M4 K7 \. a: i- S if(desc!=null){
2 c7 w( V/ C5 s) Z! q TokenStream tokenStream=analyzer.tokenStream("desc", new StringReader(desc));, u1 l) I8 i' _
System.out.println(highlighter.getBestFragment(tokenStream, desc));
0 `( A$ S7 v( F }. J1 V! b- F5 _8 e- o
}& _ E: k' b) K7 w1 _
reader.close();
; S# b3 ~) Y2 c% W }% |; w, h- T% |9 Q4 A7 M
9 k4 p5 Q% c- F8 ^, j9 z' T$ l* V+ Y* c public static void main(String[] args) {
. H0 y6 Y6 H; O/ p( Q7 k" j _5 L" s2 w String indexDir="D:\\lucene6";6 |1 m) T# p3 y# f1 m' n
String q="南京文明";
$ `1 k5 X6 f- i8 _* h5 W try {0 h+ I1 G9 ?3 |
search(indexDir,q);7 d9 P) |9 J2 D' |" ]
} catch (Exception e) {
# z: c7 \) V; [; ~& L // TODO Auto-generated catch block
7 |/ \! z% O: M) } e.printStackTrace();% V8 P* y+ w: _ V6 }; i
}# ]! w# r; {8 |* T
}
' F. J" |: J* m% T0 }}
" g# {# `2 c4 L- }0 K1 h
$ W! ^! n5 F, Q/ ^, L, }8 Y3 g& X% P- e( M4 {4 u9 c' ?3 |7 A! W
( T+ E( V7 ]' V) `) Z' E* f
3 M& u) l# t2 P U" C5 P7 Z1 `& m3 s6 ^
|
|