package com.bxm.localnews.processer.impl;

import com.alibaba.fastjson.JSONObject;
import com.bxm.localnews.analysis.TFIDFAnalyzesService;
import com.bxm.localnews.config.SyncProperties;
import com.bxm.localnews.processer.AbstractProcesser;
import com.bxm.localnews.processer.ProcesserContext;
import com.bxm.localnews.sync.vo.Keyword;
import com.bxm.localnews.sync.vo.local.AdminNews;
import com.bxm.localnews.sync.vo.local.News;
import com.bxm.localnews.sync.vo.spider.SpiderNews;
import com.bxm.localnews.sync.vo.spider.SpiderWechatNews;
import com.bxm.newidea.component.tools.StringUtils;
import com.google.common.collect.Lists;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import java.util.List;

/**
 * 对同步数据进行特征提取
 */
@Component
public class NewsKeywordProcesser extends AbstractProcesser {

    private final TFIDFAnalyzesService tfidfAnalyzesService;

    private SyncProperties syncProperties;

    @Autowired
    public NewsKeywordProcesser(TFIDFAnalyzesService tfidfAnalyzesService, SyncProperties syncProperties) {
        this.tfidfAnalyzesService = tfidfAnalyzesService;
        this.syncProperties = syncProperties;
    }

    @Override
    protected boolean execute(ProcesserContext context) {
        Object data = context.getData();
        if (data instanceof SpiderNews) {
            SpiderNews news = (SpiderNews) data;
            //根据标题和内容进行特征提取，获取关键标签
            List<Keyword> keywordList = null;

            if (StringUtils.isNotBlank(news.getKeyword())) {
                keywordList = convertKeywords(news.getKeyword());
            } else {
                if (StringUtils.isBlank(news.getTitle())||StringUtils.isBlank(news.getContent())) {
                    logger.error("同步出现标题和内容为空，导致空指针报错:[{}]", JSONObject.toJSON(news));
                    return false;
                }
                keywordList = tfidfAnalyzesService.analyze(news.getTitle(), news.getContent(),
                        syncProperties.getTagPickCount());
            }
            news.setKeywordList(keywordList);
        } else if (data instanceof SpiderWechatNews) {
            SpiderWechatNews wechatNews = (SpiderWechatNews) data;
            List<Keyword> keywordList = tfidfAnalyzesService.analyze(wechatNews.getTitle(), wechatNews.getContent(),
                    syncProperties.getTagPickCount());
            wechatNews.setKeywordList(keywordList);
        } else if (data instanceof News) {
            News localNews = (News) data;
            List<Keyword> keywordList = tfidfAnalyzesService.analyze(localNews.getTitle(), localNews.getContent(),
                    syncProperties.getTagPickCount());
            localNews.setKeywordList(keywordList);
        } else if (data instanceof AdminNews) {
            AdminNews adminNews = (AdminNews) data;
            List<Keyword> keywordList = tfidfAnalyzesService.analyze(adminNews.getTitle(), adminNews.getContent(),
                    syncProperties.getTagPickCount());
            adminNews.setKeywordList(keywordList);
        }
        return true;
    }

    /**
     * 如果原始新闻存在新闻标签，则使用对应的标签
     * 如果原始标签未提供标签，则进行分词处理
     * @param keywords 新闻分词
     * @return 转换结果
     */
    private List<Keyword> convertKeywords(String keywords) {
        keywords = StringUtils.replaceAll(keywords, "，", ",");
        keywords = StringUtils.replaceAll(keywords, " ", ",");

        String[] keywordArray = StringUtils.split(keywords, ",");

        List<Keyword> keywordList = Lists.newArrayList();
        Keyword keywordObj;
        for (String keyword : keywordArray) {
            keywordObj = new Keyword();
            keywordObj.setName(keyword);
            keywordObj.setTfidfvalue(1D);
            keywordList.add(keywordObj);
        }

        return keywordList;
    }

    @Override
    public boolean match(Object data) {

        return data instanceof SpiderNews
                || data instanceof SpiderWechatNews
                || data instanceof News
                || data instanceof AdminNews;
    }
}
