package com.bxm.localnews.news.utils;

import com.bxm.newidea.component.tools.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.IOException;
import java.io.StringReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Created by hsq 17:40 2018/3/2
 */
public class LuceneWord {

    public static String parseStr(String str) {
        if (StringUtils.isEmpty(str)) {
            return null;
        }
        String regEx = "[`~!@#$%^&*()+=|{}':;',//[//].<>/?~！@#￥%……&*（）——+|{}【】‘；：”“’。，、？]";
        Pattern p = Pattern.compile(regEx);
        Matcher m = p.matcher(str);
        str = m.replaceAll("").trim();
        return participle(str);
    }

    private static String participle(String content) {
        Analyzer analyzer = new IKAnalyzer(true);
        TokenStream stream;
        StringBuilder str = new StringBuilder();
        try {
            stream = analyzer.tokenStream("content", new StringReader(content));
            // 获取分词信息
            CharTermAttribute cta = stream.addAttribute(CharTermAttribute.class);
            stream.reset();
            while (stream.incrementToken()) {
                str.append(cta.toString()).append("|");
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return str.toString().substring(0, str.length() - 1);
    }

    public static String participleTitle(String content) {
        Analyzer analyzer = new IKAnalyzer(true);
        TokenStream stream = null;
        StringBuffer str = new StringBuffer();
        try {
            stream = analyzer.tokenStream("content", new StringReader(content));
            // 获取分词信息
            CharTermAttribute cta = stream.addAttribute(CharTermAttribute.class);
            stream.reset();
            while (stream.incrementToken()) {
                if (cta.toString().length() <= 1) {
                    continue;
                }
                str.append(cta.toString() + ",");
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        return str.toString().substring(0, str.length() - 1);
    }

}
