|
1、中文分词(smartcn)5 K: k {/ d2 `7 }( M* s
<dependency>
8 C; H5 v ^( ^* m <groupId>org.apache.lucene</groupId>3 m/ F7 p& I( v' \! N6 h
<artifactId>lucene-analyzers-smartcn</artifactId>0 s& J. r9 p5 X- f
<version>5.3.1</version>3 l& o; T: f$ P
</dependency>, ^" m4 s5 ^' p" D
! M% P9 ^& D, C D5 y
" B# [( t- _, b0 z2 C) F4 N; U9 C, ~0 N$ D3 R" Y+ R# N
2、高亮显示
4 Q v% R( u8 F, U8 v <dependency>
3 y" m: @2 S, h7 n6 `: l <groupId>org.apache.lucene</groupId>
+ a3 e2 F3 \! q, x$ o& H <artifactId>lucene-highlighter</artifactId>- C5 k+ i4 }; ~ t' \( n2 g
<version>5.3.1</version>
" m) X. m& Y, n! e </dependency>
: }& E k% Q6 [. ]$ p3 ?9 z7 i- i- m: D7 R9 L
源码:3 |, K3 T1 e, @8 [0 m
(1)
) Q! `8 b( F9 Timport java.nio.file.Paths;
2 `, l/ y0 V+ ?7 A% ^
9 F2 b- P4 m, Q4 g+ G- Oimport org.apache.lucene.analysis.Analyzer;
5 o. G$ ^: z% C2 L& Zimport org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;$ ~7 I( Q2 b8 g; J/ M" A2 W6 f& G. w
import org.apache.lucene.analysis.standard.StandardAnalyzer;' y: d3 u% A* U3 O( i$ T
import org.apache.lucene.document.Document;
( A& \# Y+ k) u5 g/ Q. Oimport org.apache.lucene.document.Field;
! e: Z1 }2 H# [import org.apache.lucene.document.IntField;& f3 u! B6 N. Q
import org.apache.lucene.document.StringField;) `+ o" P6 x6 p
import org.apache.lucene.document.TextField;
) O0 Z! i4 E5 Y0 u, Nimport org.apache.lucene.index.IndexWriter;
; d: M# f' i9 O9 w4 y8 G. g5 Yimport org.apache.lucene.index.IndexWriterConfig;
8 [: K7 I) g4 Aimport org.apache.lucene.store.Directory;
1 i% H5 f" b# z: ?( Zimport org.apache.lucene.store.FSDirectory;8 O# _+ J/ {$ H; V8 T
3 M2 x+ u$ e* h# a: I
public class Indexer {; W# Y6 V3 n8 u* b+ P% L# K1 o, ]
) r4 i( k- n" D" x private Integer ids[]={1,2,3};- Z( A, V3 M: `/ U G1 @
private String citys[]={"青岛","南京","上海"};
' y q6 @9 r+ n) g& L, l3 D private String descs[]={9 F! e2 K9 E7 I8 _$ s% X
"青岛是一个美丽的城市。",9 l, {$ f. y. r* p
"南京是一个有文化的城市。南京是一个文化的城市南京,简称宁,是江苏省会,地处中国东部地区,长江下游,濒江近海。全市下辖11个区,总面积6597平方公里,2013年建成区面积752.83平方公里,常住人口818.78万,其中城镇人口659.1万人。[1-4] “江南佳丽地,金陵帝王州”,南京拥有着6000多年文明史、近2600年建城史和近500年的建都史,是中国四大古都之一,有“六朝古都”、“十朝都会”之称,是中华文明的重要发祥地,历史上曾数次庇佑华夏之正朔,长期是中国南方的政治、经济、文化中心,拥有厚重的文化底蕴和丰富的历史遗存。[5-7] 南京是国家重要的科教中心,自古以来就是一座崇文重教的城市,有“天下文枢”、“东南第一学”的美誉。截至2013年,南京有高等院校75所,其中211高校8所,仅次于北京上海;国家重点实验室25所、国家重点学科169个、两院院士83人,均居中国第三。[8-10] 。",
7 f% I5 t$ Q( k% p "上海是一个繁华的城市。"/ h; Q7 Y! B3 s. P1 G& j
};& Y6 e6 w. p1 u3 U9 C3 Y
- i9 S$ {' {, P0 M4 s( W1 f private Directory dir;
: _- J Q4 r& q$ {7 n! C+ W0 T+ I
, n) g- D6 p1 J /**
$ k, t2 t" k1 U0 p2 E5 i * 获取IndexWriter实例2 ?/ J6 d( a' {9 u7 s0 l( s" c
* @return
# X+ k5 h' k/ c/ ~* q * @throws Exception
( ]) r! y% U, l. V! W; S" h: { */
z7 n+ C3 x5 X: d H private IndexWriter getWriter()throws Exception{/ V/ F8 F: H7 `8 l |
//Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
7 ^/ i1 x% Q) R SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();2 o( m9 T: e1 M7 D h1 \
IndexWriterConfig iwc=new IndexWriterConfig(analyzer);1 x9 F2 i* S0 W. K+ q+ B o
IndexWriter writer=new IndexWriter(dir, iwc);
5 }& C: S5 G" C2 z return writer;* _; u; w- G5 S; }/ B3 i
}
) Q" h8 J ]" R6 c6 {' Q; S* s) t. z( y# j
/**
1 l& A @$ p* G * 生成索引
( e( \) H# ~6 C; | * @param indexDir: i! S0 R! r2 K, r
* @throws Exception
' H6 L. r( B9 g, W" }9 Z$ w */
" } u* {/ `1 X/ a0 z3 l% c" e private void index(String indexDir)throws Exception{
1 X% @& u* V3 W; _ dir=FSDirectory.open(Paths.get(indexDir));$ V3 d7 W0 q% j+ P% r6 X6 R
IndexWriter writer=getWriter();
/ f7 r! }# w0 o A9 L4 T# i for(int i=0;i<ids.length;i++){
5 M" X V. K! z* M) G# X4 z Document doc=new Document();
3 j4 y5 Z6 _' q1 h- M+ I% k. I* Q- P doc.add(new IntField("id", ids, Field.Store.YES));
: w' E* H5 n, c) M5 R; r$ Q9 b doc.add(new StringField("city",citys,Field.Store.YES));
* ^( Y j1 a( O* E+ h( h doc.add(new TextField("desc", descs, Field.Store.YES));( H$ Q- x- N/ V! {- ?5 g
writer.addDocument(doc); // 添加文档2 G* \; s# {( ^- H& `( m) }
}- y* w* E9 k* J [' D
writer.close();, S1 o. Y- p4 _
}. g4 g: j6 k% K1 i# i+ v
/ o" ^& y4 Y4 T& o# j% L* c$ x/ v# `2 ]. U0 q
public static void main(String[] args) throws Exception {3 R" D6 K/ Z* k0 Q9 n5 y8 l
new Indexer().index("D:\\lucene6");
2 b. i) l! i, d) S1 ? }
* I: Q7 m6 p: \; }& X+ S3 h0 c% G8 i- ]' k' B& B5 C
}
1 t: S( F& m) ~' c
2 }( n" B4 y, x( i% I# R) V1 B' N0 p( v+ ?* ]' A9 l! v
) X5 P' d& x/ W(2)
1 T/ m" w7 a7 D) r# M9 Yimport java.io.StringReader;
% b% O/ S) w" w8 S3 o4 c; M7 P1 timport java.nio.file.Paths;
: ?0 A) F; z" F* ]) J8 [' y7 r5 Y0 r% C' a' A. H
import org.apache.lucene.analysis.Analyzer;/ F! M, O- S+ }8 Q" B d" [; ^
import org.apache.lucene.analysis.TokenStream;+ N V* ]# ?8 w) `( R7 j
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;: P2 L$ C# ], Y( m; e. J7 e8 R5 L7 `
import org.apache.lucene.analysis.standard.StandardAnalyzer;5 @( s6 D; P. H5 K( L0 ?
import org.apache.lucene.document.Document;4 \5 J' x' E( Y
import org.apache.lucene.index.DirectoryReader;
- W) N. M8 Y% y* \ bimport org.apache.lucene.index.IndexReader;, }" z1 A3 T0 [0 E5 o
import org.apache.lucene.queryparser.classic.QueryParser;
2 b2 B6 S3 p0 W. k% v% F9 O/ ximport org.apache.lucene.search.IndexSearcher;9 m$ c$ F, j, Y
import org.apache.lucene.search.Query;
5 K+ m) P' h' {# |$ b1 zimport org.apache.lucene.search.ScoreDoc;5 U9 Z# c8 T3 N% I6 P3 w" O3 \
import org.apache.lucene.search.TopDocs;6 \! ~4 J( I! G# G2 G& p3 u7 |
import org.apache.lucene.search.highlight.Fragmenter;; q8 R% A5 K3 p+ A7 j7 a
import org.apache.lucene.search.highlight.Highlighter;
. A- s# [6 g0 timport org.apache.lucene.search.highlight.QueryScorer; p* M* j3 C3 ~8 g& r/ x
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
$ G) v5 O: R; _: V* V0 \$ Pimport org.apache.lucene.search.highlight.SimpleSpanFragmenter;% W2 Y. n8 k8 H; J8 N
import org.apache.lucene.store.Directory;) i; j' H: O; D& R( l3 O
import org.apache.lucene.store.FSDirectory;% g4 X, y" G3 A' C
* U7 q& H5 M0 c- Tpublic class Searcher {
/ K9 N' R0 g$ d, p* v& d
! a2 ~" n! p7 m5 _! d public static void search(String indexDir,String q)throws Exception{) K5 W! ?) A" p
Directory dir=FSDirectory.open(Paths.get(indexDir));$ f2 H* s' o$ j' Q$ k$ {
IndexReader reader=DirectoryReader.open(dir);
$ \! r, W( |- E6 Q. n3 x( E. V IndexSearcher is=new IndexSearcher(reader);' K1 N4 @0 w* `% v! B" K! r
// Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
1 s- S, P: ~% F0 v, ?' _0 b SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();
1 y y0 I* R: ^! l, ^7 \ QueryParser parser=new QueryParser("desc", analyzer);; p" W* L! z: O4 {% u) Y- o) k
Query query=parser.parse(q);
. F+ g7 C0 t: P# O long start=System.currentTimeMillis();
4 g% j7 e7 u& e2 w TopDocs hits=is.search(query, 10);
6 k) r7 W Y8 L: C. f9 l2 }: |4 c long end=System.currentTimeMillis();
$ a" ?0 R) ?" o' k5 J System.out.println("匹配 "+q+" ,总共花费"+(end-start)+"毫秒"+"查询到"+hits.totalHits+"个记录");1 I; w2 A4 Y3 @* J. @0 \* ?: Z
/ F6 v9 ?" W# x$ h
QueryScorer scorer=new QueryScorer(query);
$ n/ J z1 e0 ]+ @ Fragmenter fragmenter=new SimpleSpanFragmenter(scorer);
7 c& A9 y4 m. R) v/ Y SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>");
+ [/ ] Y: [4 ^/ ^ Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer);/ c( L( l9 O3 g) y0 ]/ R6 h
highlighter.setTextFragmenter(fragmenter);* q8 \0 `6 b4 d% z/ n
for(ScoreDoc scoreDoc:hits.scoreDocs){
1 y5 v$ t% m8 y, s* O: p Document doc=is.doc(scoreDoc.doc);! A' s/ z v2 Y
System.out.println(doc.get("city"));
6 K7 }6 a4 O$ C& } j& s System.out.println(doc.get("desc"));) E* ?+ p P0 G2 t& i
String desc=doc.get("desc");, ~/ t1 j' [) }4 q
if(desc!=null){5 H3 i- Z1 O: {) D
TokenStream tokenStream=analyzer.tokenStream("desc", new StringReader(desc));
7 i2 _5 D& v; o System.out.println(highlighter.getBestFragment(tokenStream, desc));0 u9 `% v) P1 ~( {4 [
}* g# W9 ~% q. [$ i' f9 N
}, g, m0 o1 d$ f
reader.close();
/ R5 Q' r e' ~7 G5 C% y6 t/ E }
# ~3 m2 h8 l6 C" s. Z
" J8 ]6 W( s1 R5 c public static void main(String[] args) {
6 s" U) |, \% _+ ]/ v: H1 D* c String indexDir="D:\\lucene6";" Y9 \, l( \0 s6 G7 B
String q="南京文明";
2 a* ^0 |6 }* V2 [6 q try {
$ e# H2 P, o/ E' l search(indexDir,q);
. I' f! I/ m8 b' s0 L4 w, s3 g( q } catch (Exception e) {
7 v. f" I; B/ q2 m% g // TODO Auto-generated catch block+ Q$ |$ g: O5 Y
e.printStackTrace();; V8 m% ]5 q- M9 k9 l; }; l2 W: w
}
5 w. W+ y9 t5 R. B2 v# { }
& _, @7 a4 ^$ k}5 _+ H5 p) I) P" [3 x; ?
O. u! |% _2 _- u7 P; s1 S9 f8 x
( Z5 C/ m5 z! x8 g5 q2 {3 i
2 n1 \+ p% L/ G$ ?% {4 ]# M; n$ J/ J' k7 f0 a
( {) @3 U% Z4 O# a: c |
|