- A+
所属分类:全文检索
废话不多说,直接上代码,关键步骤注释里都有写。
/**
* 给指定的语句分词。
*
* @param sentence 被分词的语句
* @return 分词结果
*/
public List<String> getAnalysis(String sentence) {
String baseUrl = solrServerPath + "/new_core";
// 8.0版本里移除了SolrServer类,用SolrClient取代
// 其实按正常逻辑来说,用SolrClient更确切。
SolrClient solrClient = new HttpSolrClient.Builder(baseUrl).build();
FieldAnalysisRequest request = new FieldAnalysisRequest(
"/analysis/field");
request.addFieldName("bk_name");// 字段名,随便指定一个支持中文分词的字段
request.setFieldValue("");// 字段值,可以为空字符串,但是需要显式指定此参数
request.setQuery(sentence);
request.setMethod(SolrRequest.METHOD.POST);
FieldAnalysisResponse response = null;
try {
response = request.process(solrClient);
} catch (SolrServerException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
List<String> results = new ArrayList<String>();
Iterator<AnalysisResponseBase.AnalysisPhase> it = response.getFieldNameAnalysis("bk_name")
.getQueryPhases().iterator();
while (it.hasNext()) {
AnalysisPhase pharse = (AnalysisPhase) it.next();
List<TokenInfo> list = pharse.getTokens();
for (TokenInfo info : list) {
// 剔除结果里重复的词和单个文字
if (!results.contains(info.getText()) && info.getText().getBytes().length > 3) {
results.add(info.getText());
}
}
}
return results;
}
