|
1、中文分词(smartcn)2 I9 G+ z2 N: w5 s9 n* I. A
<dependency>
( x2 _$ w; g" l7 ]9 Z& ]( \5 I <groupId>org.apache.lucene</groupId>) Q. |% r8 c# t5 u& _4 M
<artifactId>lucene-analyzers-smartcn</artifactId>* G- w2 d, S) ?( Y2 r2 \# ~: p
<version>5.3.1</version>
. f7 O! d9 q$ @# u1 y9 B </dependency>$ ]8 b9 |. l$ Y# R0 g' V. f& D
m, V) e9 y5 a& F [7 C! A2 F) W- j5 V$ h! j
- W0 B# I! R. z/ i8 G8 V2、高亮显示1 q8 V$ Y+ a6 G/ T4 o9 b4 w
<dependency>, w7 H5 o' [9 W& _3 G
<groupId>org.apache.lucene</groupId>
" I" q. [+ B) e5 u <artifactId>lucene-highlighter</artifactId>( G4 U% S) ^/ D+ w
<version>5.3.1</version>6 J |/ W! Z' d0 Q
</dependency>: `& \; s5 z8 |; f4 P' `/ d
9 D8 ^# V- ]( q) Q8 t a" c2 s
源码:, ^; ~7 T1 i# I+ u4 z& e
(1)1 K! |: i7 i. I" a/ X
import java.nio.file.Paths;4 Q- r" ?6 x2 h5 G
/ q8 f3 e+ f/ ^1 s# s7 [: Zimport org.apache.lucene.analysis.Analyzer;
$ w ^( s2 J% e) rimport org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;4 b$ N1 _* N3 C" k8 G
import org.apache.lucene.analysis.standard.StandardAnalyzer;7 x" E, Q0 |( P/ D S4 J
import org.apache.lucene.document.Document;
, K1 {+ L) Z* m9 Z# Bimport org.apache.lucene.document.Field;4 B& n% g9 h5 B7 F8 k9 Y$ F, p! K, K
import org.apache.lucene.document.IntField;
: ]% }( Q' l- gimport org.apache.lucene.document.StringField;
0 [' E( k! X9 g5 U+ @4 I @+ Ximport org.apache.lucene.document.TextField;
* F, V( O0 r2 w4 A5 I0 mimport org.apache.lucene.index.IndexWriter;
0 x6 P# p5 V9 w/ @: limport org.apache.lucene.index.IndexWriterConfig;
2 O9 U. F. r' Aimport org.apache.lucene.store.Directory;3 r$ u" H) Y+ p, y9 s- B( U6 x5 B
import org.apache.lucene.store.FSDirectory;7 Q% l! H9 J2 G8 ^; D) k
% R$ h/ S1 m) X4 } z- V
public class Indexer {
+ l# h7 }4 H" U' e# d& A6 J K% u# c6 R
private Integer ids[]={1,2,3};
8 F& X% p4 P& M/ I- F. u; I private String citys[]={"青岛","南京","上海"};
3 x+ q& `& F0 J) `5 }8 P9 z2 x private String descs[]={1 C9 }5 C( `& N1 e& q
"青岛是一个美丽的城市。",
2 d6 h8 C- Q3 p* U1 L5 @+ B "南京是一个有文化的城市。南京是一个文化的城市南京,简称宁,是江苏省会,地处中国东部地区,长江下游,濒江近海。全市下辖11个区,总面积6597平方公里,2013年建成区面积752.83平方公里,常住人口818.78万,其中城镇人口659.1万人。[1-4] “江南佳丽地,金陵帝王州”,南京拥有着6000多年文明史、近2600年建城史和近500年的建都史,是中国四大古都之一,有“六朝古都”、“十朝都会”之称,是中华文明的重要发祥地,历史上曾数次庇佑华夏之正朔,长期是中国南方的政治、经济、文化中心,拥有厚重的文化底蕴和丰富的历史遗存。[5-7] 南京是国家重要的科教中心,自古以来就是一座崇文重教的城市,有“天下文枢”、“东南第一学”的美誉。截至2013年,南京有高等院校75所,其中211高校8所,仅次于北京上海;国家重点实验室25所、国家重点学科169个、两院院士83人,均居中国第三。[8-10] 。",
( m9 L8 N+ X" p; K- R3 Y. e "上海是一个繁华的城市。"
; }! e$ a) U' @" @& S8 N9 s. o! r };' }4 |5 d, w7 H! Y1 o3 W9 `
+ t6 G& l; S) z- ?9 M; i private Directory dir;' m6 F4 D0 h. k! ^0 a) R6 n2 M7 a
3 }' ?" V4 T: q* T' M0 s
/**
7 E9 W3 q& v/ J5 o3 c" J * 获取IndexWriter实例. y( E( c$ t8 v9 d
* @return
9 I$ ^; [& f! r0 O * @throws Exception# ]+ i7 K3 C d* w
*/
! p* m2 y( @% ~4 } private IndexWriter getWriter()throws Exception{
( V$ M( C& V" e0 ^0 s0 z% t( y; D //Analyzer analyzer=new StandardAnalyzer(); // 标准分词器9 n6 u- x( f) b% j
SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();
) M' t2 ^6 i6 h( o5 ]3 V N IndexWriterConfig iwc=new IndexWriterConfig(analyzer);5 W. k, Y+ B4 a% |1 K- V/ K
IndexWriter writer=new IndexWriter(dir, iwc);
" I8 w2 R- K6 L {$ g- T' h return writer;; J! |: a! O/ |" Z: O
}* Q2 \/ p3 N1 x' z9 Y; o* F
+ g0 _( q$ ~) {3 F1 s
/**6 z9 y6 n z5 N; O$ C7 ^
* 生成索引
9 c! ?; [" y5 [: }" E* L" X * @param indexDir) {5 t/ F. X1 R
* @throws Exception
7 |5 m# Y. O% M' \2 X */4 x2 G, x U8 {& W1 [
private void index(String indexDir)throws Exception{! P0 X) p2 b" r4 z
dir=FSDirectory.open(Paths.get(indexDir));" }) v' e4 u0 m
IndexWriter writer=getWriter();. E/ a6 g, ~2 a$ [: K# L8 |- ]' H
for(int i=0;i<ids.length;i++){' [( v! O0 C) w3 C+ Y$ S, F
Document doc=new Document();& F' y! D: [5 v/ n+ K0 I: e
doc.add(new IntField("id", ids, Field.Store.YES));
$ y4 `8 p$ z+ \% m: O; s doc.add(new StringField("city",citys,Field.Store.YES));; Z0 M$ L& y% L' r6 N/ S5 U) C
doc.add(new TextField("desc", descs, Field.Store.YES)); T3 J U% [7 M% R) H8 Y$ |
writer.addDocument(doc); // 添加文档) R' ~5 ~( {- o/ ~+ V8 G+ d
}2 I- I c) [; h3 `* r5 e
writer.close();
. n5 C# G( U5 [- d$ ?9 @0 [9 c }
9 V+ ^1 F; b" w) {: f2 F7 j
9 F; Q3 f! u! C% j- ]5 H' K) L1 {/ S
public static void main(String[] args) throws Exception {! n5 T! {# x! |- W0 R
new Indexer().index("D:\\lucene6");1 g# P j2 y% w/ y, J! C Y
}
% Y, F. r" V2 b t2 _3 G3 @' L0 o: E! o! s: f9 _
}
4 a- O6 f# c% L J
4 u" c1 p7 ~& S6 H. U! t5 m" F' k3 ~2 P" u
9 M& }8 t; p5 ]# P& ?. q) G(2)
: \2 Y6 l" z/ c* O2 ximport java.io.StringReader;
7 G2 L. b2 \" L+ limport java.nio.file.Paths;; Y' R9 y5 H4 G8 D7 U% l& h* j8 ?
# l9 r( g9 S5 B4 |/ c# t
import org.apache.lucene.analysis.Analyzer;4 d4 r; ]) f9 k3 O
import org.apache.lucene.analysis.TokenStream;
! y1 F: k/ @+ ^1 Z: eimport org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
# ?( q6 j2 ~: |; E$ N. Gimport org.apache.lucene.analysis.standard.StandardAnalyzer;1 p1 h1 X x+ U$ T) R
import org.apache.lucene.document.Document;
# I% N# |3 j. _import org.apache.lucene.index.DirectoryReader;
% o# O! |' _& V, p+ H& p3 A, iimport org.apache.lucene.index.IndexReader;
8 q, Z8 p* J( L3 H j- y: }$ ^2 Ximport org.apache.lucene.queryparser.classic.QueryParser;
3 X- m1 ~& r, Z$ T$ N* [import org.apache.lucene.search.IndexSearcher;1 f2 ]& t- X, D
import org.apache.lucene.search.Query;
' l2 G b1 Z5 Q8 timport org.apache.lucene.search.ScoreDoc;
1 W* p6 ]! a/ z N cimport org.apache.lucene.search.TopDocs; d& s8 \% i) s
import org.apache.lucene.search.highlight.Fragmenter;0 z0 s5 |* J7 m
import org.apache.lucene.search.highlight.Highlighter;
6 U! D% i5 r" Y1 b- cimport org.apache.lucene.search.highlight.QueryScorer;7 V" K- F' @& I# n) j& v" {
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;# [6 C. n7 E( Q+ T8 I& O
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;# D- b; G$ o0 U3 i& Q
import org.apache.lucene.store.Directory;
5 {/ H) ?6 Z! A3 g" ?import org.apache.lucene.store.FSDirectory; Y) L1 }4 j, x0 P1 W
9 Y& Y+ P/ t( Y, _public class Searcher {, ~: m( H, A3 ]" t( z0 |& k7 D; m
& i" \ i2 F( g3 X! ?& Z" i
public static void search(String indexDir,String q)throws Exception{
. L. f4 n4 u, {. b: K7 j% y4 p$ ^ Directory dir=FSDirectory.open(Paths.get(indexDir));4 ~7 }: F# g0 U" |
IndexReader reader=DirectoryReader.open(dir);
6 I+ s( K4 M+ W' z' k/ G IndexSearcher is=new IndexSearcher(reader); ]' O* Z" a* o! U
// Analyzer analyzer=new StandardAnalyzer(); // 标准分词器# J' C$ H7 O4 M4 `7 ]8 R7 l
SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();
+ o! ]5 `1 R4 N" J& n/ Y QueryParser parser=new QueryParser("desc", analyzer);
' \$ m, c' U9 x& G Query query=parser.parse(q);! {/ x K' A& r9 G; r" E4 j; j
long start=System.currentTimeMillis();+ z4 U/ }% d$ M
TopDocs hits=is.search(query, 10);7 A0 p3 d5 \) V) H5 m7 h
long end=System.currentTimeMillis();
5 p% e- g% L+ m. a* R/ } System.out.println("匹配 "+q+" ,总共花费"+(end-start)+"毫秒"+"查询到"+hits.totalHits+"个记录");
7 N0 _# W$ S2 E+ ^8 K0 e8 o* j
% X. E# O1 a+ I( i# r" I QueryScorer scorer=new QueryScorer(query);
2 Q3 @' _7 \/ f Fragmenter fragmenter=new SimpleSpanFragmenter(scorer);/ V/ f% L+ N0 e: m
SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>");1 B0 Y2 W |1 r) w2 @- s
Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer);. l( O" E/ e- x$ R; y* p; o
highlighter.setTextFragmenter(fragmenter);
& O# \+ C* o% v for(ScoreDoc scoreDoc:hits.scoreDocs){& g. c7 ^3 d; D
Document doc=is.doc(scoreDoc.doc);8 ]9 v; @3 H4 M' C# X6 ?" N
System.out.println(doc.get("city"));
3 k! _9 Y* j2 a# A System.out.println(doc.get("desc"));+ b6 D: O l4 C2 g2 _
String desc=doc.get("desc");
a& g$ }8 a; g0 T* L9 `+ J if(desc!=null){; q% o. \% c; n2 |$ q# l8 B
TokenStream tokenStream=analyzer.tokenStream("desc", new StringReader(desc));! b% |8 C6 u2 p/ F2 j
System.out.println(highlighter.getBestFragment(tokenStream, desc));
$ y8 j$ Q3 @. j3 B }
: X1 P: C: \# V6 m5 ]3 X2 y }
$ @2 @" u7 m; B" x( F9 W% b4 P" I reader.close();
0 V+ n& @) r. s }$ A) b) b. m7 t0 ]0 X% V
3 O3 }. J/ H" n- p' @2 B( I public static void main(String[] args) {. i! z8 f5 c" }4 H6 D, u
String indexDir="D:\\lucene6";; Z7 r; A& a$ A2 c- o+ B' u9 [. {
String q="南京文明";
, @4 A7 |6 t# r* s4 f5 F try {
. C9 u' q; E# ?$ I' W& d3 B search(indexDir,q);
' p: R1 ]( l6 u: [6 h8 _ } catch (Exception e) {( L! T" o4 f. a
// TODO Auto-generated catch block/ N! r3 y8 A+ G A# R
e.printStackTrace();) j% Y5 Q1 X G2 j
}- e! ?' X5 u5 f! o
}
" ?8 T3 e }' ` y}
0 a4 e3 N- O3 f8 e$ }
' o. ]2 H6 U) ?, f# g) ]. d% O. a/ \+ f, r0 }+ B4 U
& V4 c" M8 }- ~- g) I# [. g: r0 y% y7 w9 D3 |( \8 S E! p) F. C& D' C
7 R' A1 X6 n& Z$ G |
|