package com.bxm.localnews.news.service.impl;

import com.bxm.localnews.news.domain.SensitiveWordMapper;
import com.bxm.localnews.news.service.SensitiveWordService;
import com.bxm.localnews.news.vo.SensitiveWordBean;
import com.bxm.localnews.news.vo.WordNode;
import com.bxm.newidea.component.tools.BCConvert;
import com.bxm.newidea.component.tools.StringUtils;
import com.google.common.collect.Sets;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Lazy;
import org.springframework.stereotype.Service;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;

@Service
@Slf4j
@Lazy
public class SensitiveWordServiceImpl implements SensitiveWordService {

    /**
     * 停用词数组，用于排除不需要检查的字符
     */
    private static final Set<Integer> STOP_WORD_SET = new HashSet<>();

    /**
     * 存储首字的ASCII码
     */
    private static Set<Integer> FIRST_WORD_SET = new HashSet<>();

    private AtomicBoolean initd = new AtomicBoolean(false);

    /**
     * 敏感词字典树
     */
    private static Map<Integer, WordNode> TRIE_TREE = new HashMap<>(2048, 1);

    private final SensitiveWordMapper sensitiveWordMapper;

    @Autowired
    public SensitiveWordServiceImpl(SensitiveWordMapper sensitiveWordMapper) {
        this.sensitiveWordMapper = sensitiveWordMapper;
    }

    /**
     * 大写转化为小写 全角转化为半角
     * @param src 原始字符串
     * @return 转换后的字符
     */
    private static int charConvert(char src) {
        //全角转半角
        char r = BCConvert.qj2bj(src);
        //如果是字母，进行特殊处理
        return (r >= 'A' && r <= 'Z') ? r + 32 : r;
    }

    @Override
    public boolean contains(String source) {
        if (initd.compareAndSet(false,true)) {
            init();
        }
        if (StringUtils.isBlank(source)) {
            return false;
        }

        char[] chs = source.toCharArray();
        int length = chs.length;
        int firstWord;
        int k;
        WordNode node;
        for (int i = 0; i < length; i++) {
            firstWord = charConvert(chs[i]);
            if (!FIRST_WORD_SET.contains(firstWord)) {
                continue;
            }
            node = TRIE_TREE.get(firstWord);
            if (node == null) {
                continue;
            }
            boolean couldMark = false;
            k = i;

            if (node.isLast()) {
                // 单字匹配
                couldMark = true;
            } else {
                // 继续匹配，以长的优先
                for (; ++k < length; ) {
                    int temp = charConvert(chs[k]);
                    if (STOP_WORD_SET.contains(temp)) {
                        break;
                        //continue;  如果是continue则会忽略停止词
                    }
                    node = node.querySub(temp);
                    if (node == null) {
                        // 没有了
                        break;
                    }
                    if (node.isLast()) {
                        couldMark = true;
                    }
                }
            }
            if (couldMark) {
                String sensitiveWord = source.substring(i, k);
                log.warn("[{}]中存在敏感词，敏感词为：[{}]", source, sensitiveWord);
                return true;
            }
        }

        return false;
    }

    @Override
    public void reload() {
        //从数据库加载敏感词库
        List<SensitiveWordBean> words = sensitiveWordMapper.getEnable();
        if (words != null && words.size() > 0) {

            Set<Integer> firstWordSet = Sets.newHashSet();
            Map<Integer, WordNode> nodes = new HashMap<>(2048, 1);

            char[] chs;
            int firstWordChar;
            int lastIndex;
            // 首字母节点
            WordNode firstNode;
            for (SensitiveWordBean word : words) {
                chs = word.getWord().toCharArray();
                firstWordChar = charConvert(chs[0]);
                // 没有首字定义
                if (!firstWordSet.contains(firstWordChar)) {
                    // 首字标志位
                    firstWordSet.add(firstWordChar);
                    firstNode = new WordNode(firstWordChar, chs.length == 1);
                    nodes.put(firstWordChar, firstNode);
                } else {
                    firstNode = nodes.get(firstWordChar);
                    if (!firstNode.isLast() && chs.length == 1) {
                        firstNode.setLast(true);
                    }
                }
                lastIndex = chs.length - 1;
                for (int i = 1; i < chs.length; i++) {
                    firstNode = firstNode.addIfNoExist(charConvert(chs[i]), i == lastIndex);
                }
            }

            FIRST_WORD_SET = firstWordSet;
            TRIE_TREE = nodes;
        }
    }

    public void init() {
        //加载停用词
        try (BufferedReader br = new BufferedReader(new InputStreamReader(
                getClass().getClassLoader().getResourceAsStream("stop_words.txt")))) {
            List<String> words = new ArrayList<>(1200);
            for (String buf; (buf = br.readLine()) != null; ) {
                if ("".equals(buf)) {
                    continue;
                }
                words.add(buf);
            }

            words.add(" ");

            if (words.size() > 0) {
                char[] chs;
                for (String curr : words) {
                    chs = curr.toCharArray();
                    for (char c : chs) {
                        STOP_WORD_SET.add(charConvert(c));
                    }
                }
            }
        } catch (Exception e) {
            log.error("加载停用词出错", e);
        }

        reload();
    }
}
