package com.bxm.spider.deal.service.impl;

import com.bxm.spider.cache.RedisClient;
import com.bxm.spider.cache.constant.SimHashConstant;
import com.bxm.spider.deal.service.RepeatService;
import com.bxm.spider.deal.utils.SimHashHelper;
import com.bxm.spider.utils.DateUtils;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import java.math.BigInteger;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArraySet;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

@Service
/* loaded from: input_file:BOOT-INF/classes/com/bxm/spider/deal/service/impl/RepeatServiceImpl.class */
public class RepeatServiceImpl implements RepeatService {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) RepeatServiceImpl.class);

    @Autowired
    private RedisClient redisClient;

    @Autowired
    SimHashHelper simHashHelper;

    @Override // com.bxm.spider.deal.service.RepeatService
    public Map<String, String> getUnRepeatSimHash(String str, String str2, String str3) {
        HashMap newHashMap = Maps.newHashMap();
        String preProcess = this.simHashHelper.preProcess(str2);
        if (StringUtils.isNotBlank(preProcess)) {
            int hashCode = preProcess.hashCode();
            if (this.redisClient.sismember(SimHashConstant.simHashTitleTotalKey(), String.valueOf(hashCode))) {
                LOGGER.info("[simHash repeat service]{} 的 title 已经存在", str);
                this.redisClient.hincrByOne(SimHashConstant.simHashRepeatTimesKey(), DateUtils.format(DateUtils.PATTERN_STR8));
                return null;
            }
            newHashMap.put(SimHashConstant.SIMHASH_TITLE, String.valueOf(hashCode));
        }
        if (StringUtils.isBlank(str3)) {
            LOGGER.info("[simHash repeat service]{} 的content 为空,不进行处理", str);
            return null;
        }
        List<String> simHash = this.simHashHelper.simHash(str3);
        BigInteger intSimHash = this.simHashHelper.getIntSimHash();
        if (null == simHash || simHash.size() == 0) {
            LOGGER.info("[simHash repeat service]{} 的content simHash值为{},不进行重复判断", str, intSimHash);
        } else {
            for (String str4 : simHash) {
                if (this.redisClient.sismember(SimHashConstant.simHashContentTotalKey(str4), String.valueOf(intSimHash))) {
                    LOGGER.info("[simHash repeat service]{} 的 content 已经存在", str);
                    this.redisClient.hincrByOne(SimHashConstant.simHashRepeatTimesKey(), DateUtils.format(DateUtils.PATTERN_STR8));
                    return null;
                }
                Set<String> smembers = this.redisClient.smembers(SimHashConstant.simHashContentTotalKey(str4));
                if (null != smembers && smembers.size() != 0) {
                    Iterator<String> it = smembers.iterator();
                    while (it.hasNext()) {
                        BigInteger bigInteger = new BigInteger(it.next());
                        if (this.simHashHelper.hammingDistance(intSimHash, bigInteger) < 4) {
                            LOGGER.info("[simHash repeat service]{} 的 content 和{}重复.", str, bigInteger);
                            this.redisClient.hincrByOne(SimHashConstant.simHashRepeatTimesKey(), DateUtils.format(DateUtils.PATTERN_STR8));
                            return null;
                        }
                    }
                }
            }
            newHashMap.put(SimHashConstant.SIMHASH_CONTENT, this.simHashHelper.getStrSimHash());
        }
        return newHashMap;
    }

    @Override // com.bxm.spider.deal.service.RepeatService
    public Set<String> reloadTitle() {
        String[] strArr = new String[15];
        Date date = new Date();
        strArr[0] = SimHashConstant.simHashContentKey(DateUtils.formatDate(date));
        for (int i = 1; i <= 14; i++) {
            strArr[i] = SimHashConstant.simHashTitleKey(DateUtils.formatDate(DateUtils.addField(date, 6, -i)));
        }
        CopyOnWriteArraySet copyOnWriteArraySet = new CopyOnWriteArraySet();
        try {
            copyOnWriteArraySet.addAll(this.redisClient.sunion(strArr));
            String[] strArr2 = new String[copyOnWriteArraySet.size()];
            this.redisClient.del(SimHashConstant.simHashTitleTotalKey());
            this.redisClient.sadd(SimHashConstant.simHashTitleTotalKey(), (String[]) copyOnWriteArraySet.toArray(strArr2));
        } catch (Exception e) {
            LOGGER.error("[simHash repeat service]每日 title simHash 加载出错", (Throwable) e);
        }
        return copyOnWriteArraySet;
    }

    @Override // com.bxm.spider.deal.service.RepeatService
    public Set<String> reloadContent() {
        CopyOnWriteArraySet<String> copyOnWriteArraySet;
        String[] strArr = new String[15];
        Date date = new Date();
        strArr[0] = SimHashConstant.simHashContentKey(DateUtils.formatDate(date));
        for (int i = 1; i <= 14; i++) {
            strArr[i] = SimHashConstant.simHashContentKey(DateUtils.formatDate(DateUtils.addField(date, 6, -i)));
        }
        HashSet newHashSet = Sets.newHashSet();
        try {
            copyOnWriteArraySet = new CopyOnWriteArraySet();
            copyOnWriteArraySet.addAll(this.redisClient.sunion(strArr));
        } catch (Exception e) {
            LOGGER.error("[simHash repeat service]content simHash 加载出错", (Throwable) e);
        }
        if (null == copyOnWriteArraySet || copyOnWriteArraySet.size() == 0) {
            LOGGER.error("[simHash repeat service]每日 content simHash 加载出错");
            return newHashSet;
        }
        LOGGER.error("[simHash repeat service]删除历史 content simHash 共计 {} 个", this.redisClient.dimDel(SimHashConstant.simHashContentTotalKey("")));
        for (String str : copyOnWriteArraySet) {
            if (null != str && str.length() == 64) {
                BigInteger intSimHash = this.simHashHelper.getIntSimHash(str);
                this.redisClient.sadd(SimHashConstant.simHashContentTotalKey(str.substring(0, 16)), String.valueOf(intSimHash));
                this.redisClient.sadd(SimHashConstant.simHashContentTotalKey(str.substring(16, 32)), String.valueOf(intSimHash));
                this.redisClient.sadd(SimHashConstant.simHashContentTotalKey(str.substring(32, 48)), String.valueOf(intSimHash));
                this.redisClient.sadd(SimHashConstant.simHashContentTotalKey(str.substring(48, 64)), String.valueOf(intSimHash));
                newHashSet.add(String.valueOf(intSimHash));
            }
        }
        return newHashSet;
    }

    @Override // com.bxm.spider.deal.service.RepeatService
    public void addSimHash(String str, Map<String, String> map, Date date) {
        if (null == map || map.isEmpty()) {
            return;
        }
        if (null == date) {
            date = new Date();
        }
        String str2 = map.get(SimHashConstant.SIMHASH_TITLE);
        String formatDate = DateUtils.formatDate(date);
        if (StringUtils.isNotBlank(str2)) {
            try {
                this.redisClient.sadd(SimHashConstant.simHashTitleKey(formatDate), SimHashConstant.simHashTitleTotalKey(), str2, null, Integer.valueOf(SimHashConstant.EXPIRE_SIMHASH));
            } catch (Exception e) {
                LOGGER.error("[simHash repeat service]新增title Hash出错", (Throwable) e);
            }
        }
        String str3 = map.get(SimHashConstant.SIMHASH_CONTENT);
        if (StringUtils.isNotBlank(str3) && str3.length() == 64) {
            try {
                this.redisClient.sadd(SimHashConstant.simHashContentKey(formatDate), null, Integer.valueOf(SimHashConstant.EXPIRE_SIMHASH), str3);
                BigInteger intSimHash = this.simHashHelper.getIntSimHash(str3);
                this.redisClient.sadd(SimHashConstant.simHashContentTotalKey(str3.substring(0, 16)), String.valueOf(intSimHash));
                this.redisClient.sadd(SimHashConstant.simHashContentTotalKey(str3.substring(16, 32)), String.valueOf(intSimHash));
                this.redisClient.sadd(SimHashConstant.simHashContentTotalKey(str3.substring(32, 48)), String.valueOf(intSimHash));
                this.redisClient.sadd(SimHashConstant.simHashContentTotalKey(str3.substring(48, 64)), String.valueOf(intSimHash));
            } catch (Exception e2) {
                LOGGER.error("[simHash repeat service]新增content simHash出错", (Throwable) e2);
            }
        }
    }

    @Override // com.bxm.spider.deal.service.RepeatService
    public void addSimHash(String str, String str2, String str3, Date date) {
        HashMap newHashMap = Maps.newHashMap();
        if (StringUtils.isNotBlank(str2)) {
            newHashMap.put(SimHashConstant.SIMHASH_TITLE, String.valueOf(str2.hashCode()));
        }
        if (StringUtils.isNotBlank(str3)) {
            this.simHashHelper.simHash(str3);
            if (StringUtils.isNotBlank(this.simHashHelper.getStrSimHash())) {
                newHashMap.put(SimHashConstant.SIMHASH_CONTENT, this.simHashHelper.getStrSimHash());
            } else {
                LOGGER.error("[simHash repeat service]simHash 新增 content的simHash为空,url", str);
            }
        }
        addSimHash(str, newHashMap, date);
    }

    @Override // com.bxm.spider.deal.service.RepeatService
    public Long removeSimHash(String str, String str2, String str3, String str4) {
        Long l = 0L;
        if (StringUtils.isBlank(str4)) {
            LOGGER.warn("the DateTime is null,url:{}", str);
            return null;
        }
        if (StringUtils.isNotBlank(str2)) {
            String simHashTitleKey = SimHashConstant.simHashTitleKey(str4);
            int hashCode = str2.hashCode();
            try {
                this.redisClient.srem(simHashTitleKey, String.valueOf(hashCode));
                l = this.redisClient.srem(SimHashConstant.simHashTitleTotalKey(), String.valueOf(hashCode));
            } catch (Exception e) {
                LOGGER.error("remove title simHash is error,the key:{}", simHashTitleKey);
            }
        }
        if (StringUtils.isNotBlank(str3)) {
            String simHashContentKey = SimHashConstant.simHashContentKey(str4);
            this.simHashHelper.simHash(str3);
            String strSimHash = this.simHashHelper.getStrSimHash();
            BigInteger intSimHash = this.simHashHelper.getIntSimHash();
            try {
                this.redisClient.srem(simHashContentKey, strSimHash);
                this.redisClient.srem(SimHashConstant.simHashContentTotalKey(strSimHash.substring(0, 16)), String.valueOf(intSimHash));
                this.redisClient.srem(SimHashConstant.simHashContentTotalKey(strSimHash.substring(16, 32)), String.valueOf(intSimHash));
                this.redisClient.srem(SimHashConstant.simHashContentTotalKey(strSimHash.substring(32, 48)), String.valueOf(intSimHash));
                l = this.redisClient.srem(SimHashConstant.simHashContentTotalKey(strSimHash.substring(48, 64)), String.valueOf(intSimHash));
            } catch (Exception e2) {
                LOGGER.error("remove content simHash is error,the key:{}", simHashContentKey);
            }
        }
        return l;
    }

    @Override // com.bxm.spider.deal.service.RepeatService
    public Long getCacheSize(String str) {
        Long l = 0L;
        if (SimHashConstant.SIMHASH_TITLE.equals(str)) {
            l = this.redisClient.scard(SimHashConstant.simHashTitleTotalKey());
        } else if (SimHashConstant.SIMHASH_CONTENT.equals(str)) {
            l = this.redisClient.dimCount(SimHashConstant.simHashContentTotalKey(str));
        }
        return l;
    }
}
