package com.bxm.spider.deal.utils;

import com.bxm.spider.deal.model.dao.UrlRuler;
import com.lowagie.text.html.HtmlTags;
import com.lowagie.text.html.HtmlWriter;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;

@Component
/* loaded from: input_file:BOOT-INF/classes/com/bxm/spider/deal/utils/ContentUtil.class */
public class ContentUtil {
    public String wechatDetailPreproccess(String str, UrlRuler urlRuler) {
        if (StringUtils.isBlank(str) || null == urlRuler) {
            return null;
        }
        String parseHtmlStr = StringUtils.isNotBlank(urlRuler.getRuler()) ? AnalyzeUtils.parseHtmlStr(str, urlRuler) : "";
        return StringUtils.isBlank(parseHtmlStr) ? str : parseHtmlStr.replaceAll("style=\".*?\"", "");
    }

    public String weChatFormatContent(String str) {
        List<String> matchingList = RegexUtils.getMatchingList(str, "<img.*?src=[\"|'](.*?)[\"|'].*?>", "1", true);
        String replaceAll = str.replaceAll("<(?!/)(\\w*)\\s*.*?>", "<$1>");
        StringBuffer stringBuffer = new StringBuffer();
        String[] split = replaceAll.split("<img>");
        if (split.length - 1 == matchingList.size()) {
            for (int i = 0; i < split.length - 1; i++) {
                stringBuffer.append(split[i]).append("<img src=\"").append(matchingList.get(i)).append("\" />");
            }
            stringBuffer.append(split[split.length - 1]);
        }
        Document parse = Jsoup.parse(stringBuffer.toString().replaceAll("</?section>", "").replaceAll(HtmlWriter.NBSP, "").replaceAll("<svg>.*?</svg>", "").replaceAll("</?span>", "").replaceAll("<br>", "").trim());
        String document = parse.toString();
        Elements elementsByTag = parse.getElementsByTag(HtmlTags.PARAGRAPH);
        int i2 = 0;
        Iterator<Element> it = elementsByTag.iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Element next = it.next();
            if (next.hasText()) {
                i2 = document.indexOf(next.toString());
                break;
            }
        }
        int i3 = 0;
        int i4 = 0;
        int size = elementsByTag.size() - 1;
        while (true) {
            if (size < 0) {
                break;
            }
            if (elementsByTag.get(size).hasText()) {
                i3 = document.indexOf(elementsByTag.get(size).toString());
                i4 = elementsByTag.get(size).toString().length();
                break;
            }
            size--;
        }
        StringBuffer stringBuffer2 = new StringBuffer(document);
        stringBuffer2.replace(0, i2, "");
        stringBuffer2.replace((i3 + i4) - i2, document.length() - i2, "");
        return stringBuffer2.toString().replaceAll("\r|\n|\t", " ");
    }

    public String AbstractFormatContent(String str) {
        return StringUtils.isBlank(str) ? "" : str.replaceAll("height=[\"|'].*?[\"|']", "").replaceAll("width=[\"|'].*?[\"|']", "").replaceAll("style=[\"|'].*?[\"|']", "").replaceAll("&nbps;", " ");
    }
}
