package com.bxm.spider.deal.utils;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.tokenizer.IndexTokenizer;
import com.lowagie.text.html.HtmlWriter;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.safety.Whitelist;
import org.springframework.stereotype.Component;

@Component
/* loaded from: input_file:BOOT-INF/classes/com/bxm/spider/deal/utils/SimHashHelper.class */
public class SimHashHelper {
    private int bitLength;
    private int overCount;
    private BigInteger intSimHash;
    private String strSimHash;
    private List<String> shortSimHashList;
    private static Set<String> stopNatures = Sets.newHashSet();
    private static Map<String, Integer> weightOfNature = Maps.newHashMap();

    public BigInteger getIntSimHash() {
        return this.intSimHash;
    }

    public String getStrSimHash() {
        return this.strSimHash;
    }

    public List<String> getShortSimHashList() {
        return this.shortSimHashList;
    }

    public int getBitLength() {
        return this.bitLength;
    }

    public SimHashHelper() {
        this.bitLength = 64;
        this.overCount = 5;
    }

    public SimHashHelper(int i) {
        this.bitLength = 64;
        this.overCount = 5;
        this.bitLength = i;
    }

    public static void main(String[] strArr) {
        System.out.println(new SimHashHelper().preProcess("<img src=\\\"https:\\/\\/p3.pstatp.com\\/large\\/pgc-image\\/RJ3JVrx57FKyAu\\\" img_width=\\\"750\\\" img_height=\\\"422\\\" inline=\\\"0\\\" alt=\\\"从哪儿来的？亚马逊丛林惊现10吨重11米长座头鲸尸体\\\" onerror=\\\"javascript:errorimg.call(this);\\\"><p>近日，巴西的亚马逊丛林惊现一条近11米长的座头鲸。这条座头鲸被发现时已经死亡，周围已开始出现秃鹫。发现地距离海洋大约15米，生物学家也很困惑为何大约10吨重的座头鲸会出现在丛林，猜测可能它在海洋中就已经死亡，被涨潮的海水冲上了岸。<\\/p><img src=\\\"https:\\/\\/p3.pstatp.com\\/large\\/pgc-image\\/RJ3KpJsFBeRj7H\\\" img_width=\\\"600\\\" img_height=\\\"450\\\" inline=\\\"0\\\" alt=\\\"从哪儿来的？亚马逊丛林惊现10吨重11米长座头鲸尸体\\\" onerror=\\\"javascript:errorimg.call(this);\\\"><p>座头鲸通常在8月至11月出现在巴西东北海岸。经过调查研究，这条座头鲸12个月大，可能是跟随母亲迁徙时和母亲失散。目前，生物学家正在调查它的死因，调查计划耗时十天。也有报纸称座头鲸死于吞食塑料，但最终死因等待尸检结果来确定。<\\/p><img src=\\\"https:\\/\\/p3.pstatp.com\\/large\\/pgc-image\\/RJ3KpK73Qu16HO\\\" img_width=\\\"600\\\" img_height=\\\"337\\\" inline=\\\"0\\\" alt=\\\"从哪儿来的？亚马逊丛林惊现10吨重11米长座头鲸尸体\\\" onerror=\\\"javascript:errorimg.call(this);\\\"><p>由于座头鲸的体积和重量过大，并且受制于发现地位置，研究人员不打算移动它，计划通过采取肌肉和体内寄生虫样本进行研究。该座头鲸的大部分尸体将会被掩埋，它的骨架则会被移至自然历史博物馆供未来进行研究。<\\/p><p>（编辑：刘清扬） <\\/p>"));
    }

    public String preProcess(String str) {
        if (StringUtils.isBlank(str)) {
            return "";
        }
        String clean = Jsoup.clean(str, Whitelist.none());
        for (String str2 : new String[]{"\n", "\r", "\\r", "\\n", "\\t", HtmlWriter.NBSP, " "}) {
            clean = clean.replace(str2, "");
        }
        return clean.replaceAll("[\\p{P}+~$`^=|<>～｀＄＾＋＝｜＜＞￥×]", "");
    }

    public List<String> simHash(String str) {
        String preProcess = preProcess(str);
        if (StringUtils.isBlank(preProcess)) {
            return Lists.newArrayList();
        }
        HashMap newHashMap = Maps.newHashMap();
        int[] iArr = new int[this.bitLength];
        for (Term term : IndexTokenizer.segment(preProcess)) {
            String str2 = term.word;
            String nature = term.nature.toString();
            if (!stopNatures.contains(nature)) {
                if (newHashMap.containsKey(str2)) {
                    int intValue = ((Integer) newHashMap.get(str2)).intValue();
                    if (intValue <= this.overCount) {
                        newHashMap.put(str2, Integer.valueOf(intValue + 1));
                    }
                } else {
                    newHashMap.put(str2, 1);
                }
                BigInteger hash = hash(str2);
                for (int i = 0; i < this.bitLength; i++) {
                    BigInteger shiftLeft = new BigInteger("1").shiftLeft(i);
                    int intValue2 = weightOfNature.containsKey(nature) ? weightOfNature.get(nature).intValue() : 1;
                    if (hash.and(shiftLeft).signum() != 0) {
                        int i2 = i;
                        iArr[i2] = iArr[i2] + intValue2;
                    } else {
                        int i3 = i;
                        iArr[i3] = iArr[i3] - intValue2;
                    }
                }
            }
        }
        BigInteger bigInteger = new BigInteger("0");
        StringBuffer stringBuffer = new StringBuffer();
        for (int i4 = 0; i4 < this.bitLength; i4++) {
            if (iArr[i4] >= 0) {
                bigInteger = bigInteger.add(new BigInteger("1").shiftLeft(i4));
                stringBuffer.append("1");
            } else {
                stringBuffer.append("0");
            }
        }
        this.strSimHash = stringBuffer.toString();
        this.intSimHash = bigInteger;
        ArrayList arrayList = new ArrayList();
        arrayList.add(stringBuffer.substring(0, this.bitLength / 4));
        arrayList.add(stringBuffer.substring(this.bitLength / 4, (this.bitLength / 4) * 2));
        arrayList.add(stringBuffer.substring((this.bitLength / 4) * 2, (this.bitLength / 4) * 3));
        arrayList.add(stringBuffer.substring((this.bitLength / 4) * 3, this.bitLength));
        this.shortSimHashList = arrayList;
        return arrayList;
    }

    private BigInteger hash(String str) {
        if (null == str || str.length() == 0) {
            return new BigInteger("0");
        }
        char[] charArray = str.toCharArray();
        BigInteger valueOf = BigInteger.valueOf(charArray[0] << 7);
        BigInteger bigInteger = new BigInteger("1000003");
        BigInteger subtract = new BigInteger("2").pow(this.bitLength).subtract(new BigInteger("1"));
        for (char c : charArray) {
            valueOf = valueOf.multiply(bigInteger).xor(BigInteger.valueOf(c)).and(subtract);
        }
        BigInteger xor = valueOf.xor(new BigInteger(String.valueOf(str.length())));
        if (xor.equals(new BigInteger("-1"))) {
            xor = new BigInteger("-2");
        }
        return xor;
    }

    public int hammingDistance(BigInteger bigInteger, BigInteger bigInteger2) {
        int i = 0;
        for (BigInteger xor = bigInteger.xor(bigInteger2); xor.signum() != 0; xor = xor.and(xor.subtract(new BigInteger("1")))) {
            i++;
        }
        return i;
    }

    public int hammingDistance(String str, String str2) {
        int i;
        if (str.length() != str2.length()) {
            i = -1;
        } else {
            i = 0;
            for (int i2 = 0; i2 < str.length(); i2++) {
                if (str.charAt(i2) != str2.charAt(i2)) {
                    i++;
                }
            }
        }
        return i;
    }

    public BigInteger getIntSimHash(String str) {
        BigInteger bigInteger = new BigInteger("0");
        new StringBuffer();
        char[] charArray = str.toCharArray();
        for (int i = 0; i < this.bitLength; i++) {
            if (charArray[i] == '1') {
                bigInteger = bigInteger.add(new BigInteger("1").shiftLeft(i));
            }
        }
        return bigInteger;
    }

    static {
        stopNatures.add("w");
        weightOfNature.put("n", 2);
    }
}
