javazx 发表于 2016-6-20 14:49:36

【第三节】Lucene5文档域加权

1、源码
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

public class IndexingTest2 {

    private String ids[]={"1","2","3","4"};
    private String authors[]={"Jack","Marry","John","Json"};
    private String positions[]={"accounting","technician","salesperson","boss"};
    private String titles[]={"Java is a good language.","Java is a cross platform language","Java powerful","You should learn java"};
    private String contents[]={
            "If possible, use the same JRE major version at both index and search time.",
            "When upgrading to a different JRE major version, consider re-indexing. ",
            "Different JRE major versions may implement different versions of Unicode,",
            "For example: with Java 1.4, `LetterTokenizer` will split around the character U+02C6,"
    };

    private Directory dir;

    /**
   * 获取IndexWriter实例
   * @return
   * @throws Exception
   */
    private IndexWriter getWriter()throws Exception{
      Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
      IndexWriterConfig iwc=new IndexWriterConfig(analyzer);
      IndexWriter writer=new IndexWriter(dir, iwc);
      return writer;
    }

    /**
   * 生成索引
   * @throws Exception
   */
    @Test
    public void index()throws Exception{
      dir=FSDirectory.open(Paths.get("D:\\lucene3"));
      IndexWriter writer=getWriter();
      for(int i=0;i<ids.length;i++){
            Document doc=new Document();
            doc.add(new StringField("id", ids, Field.Store.YES));
            doc.add(new StringField("author",authors,Field.Store.YES));
            doc.add(new StringField("position",positions,Field.Store.YES));
            // 加权操作
            TextField field=new TextField("title", titles, Field.Store.YES);
            if("boss".equals(positions)){
                field.setBoost(1.5f);
            }
            doc.add(field);
            doc.add(new TextField("content", contents, Field.Store.NO));
            writer.addDocument(doc); // 添加文档
      }
      writer.close();
    }

    /**
   * 查询
   * @throws Exception
   */
    @Test
    public void search()throws Exception{
      dir=FSDirectory.open(Paths.get("D:\\lucene3"));
      IndexReader reader=DirectoryReader.open(dir);
      IndexSearcher is=new IndexSearcher(reader);
      String searchField="title";
      String q="java";
      Term t=new Term(searchField,q);
      Query query=new TermQuery(t);
      TopDocs hits=is.search(query, 10);
      System.out.println("匹配 '"+q+"',总共查询到"+hits.totalHits+"个文档");
      for(ScoreDoc scoreDoc:hits.scoreDocs){
            Document doc=is.doc(scoreDoc.doc);
            System.out.println(doc.get("author"));
      }
      reader.close();
    }

}



学java来尚学堂 发表于 2016-10-29 14:14:51

看了那么多,还是觉得我参加的北京尚学堂的教的好。包教包会,而且毕业就有1W的工资在手。一起来学习吧

wwwfasss 发表于 2016-10-30 12:24:00

java自学网给力 亲测资源可以
页: [1]
查看完整版本: 【第三节】Lucene5文档域加权