package com.bxm.spider.deal.utils;

import com.bxm.spider.deal.common.constant.NewsConstant;
import com.bxm.spider.deal.model.dao.UrlRuler;
import com.lowagie.text.html.HtmlTags;
import java.util.Iterator;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.safety.Whitelist;
import org.jsoup.select.Elements;
import org.springframework.cloud.commons.httpclient.ApacheHttpClientConnectionManagerFactory;
import org.springframework.stereotype.Component;

@Component
/* loaded from: input_file:BOOT-INF/classes/com/bxm/spider/deal/utils/ContentUtil.class */
public class ContentUtil {
    public String wechatDetailPreproccess(String str, UrlRuler urlRuler) {
        if (StringUtils.isBlank(str) || null == urlRuler) {
            return null;
        }
        String parseHtmlStr = StringUtils.isNotBlank(urlRuler.getRuler()) ? AnalyzeUtils.parseHtmlStr(str, urlRuler) : "";
        return StringUtils.isBlank(parseHtmlStr) ? str : parseHtmlStr.replaceAll("style=\".*?\"", "");
    }

    public String weChatFormatContent(String str) {
        String clean = Jsoup.clean(str, Whitelist.basic().addTags("img", "video", NewsConstant.SOURCE).addAttributes("img", "data-src", "src").addAttributes("video", "controls").addAttributes(NewsConstant.SOURCE, "src").addProtocols("img", "data-src", "http", ApacheHttpClientConnectionManagerFactory.HTTPS_SCHEME).addProtocols("img", "src", "http", ApacheHttpClientConnectionManagerFactory.HTTPS_SCHEME).addProtocols(NewsConstant.SOURCE, "src", "http", ApacheHttpClientConnectionManagerFactory.HTTPS_SCHEME).removeTags("a", "span"));
        if (clean.contains("<video")) {
            return clean;
        }
        Document parse = Jsoup.parse(clean);
        String document = parse.toString();
        Elements elementsByTag = parse.getElementsByTag(HtmlTags.PARAGRAPH);
        int i = 0;
        Iterator<Element> it = elementsByTag.iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Element next = it.next();
            if (next.hasText()) {
                i = document.indexOf(next.toString());
                break;
            }
        }
        int i2 = 0;
        int i3 = 0;
        int size = elementsByTag.size() - 1;
        while (true) {
            if (size < 0) {
                break;
            }
            if (elementsByTag.get(size).hasText()) {
                i2 = document.indexOf(elementsByTag.get(size).toString());
                i3 = elementsByTag.get(size).toString().length();
                break;
            }
            size--;
        }
        StringBuffer stringBuffer = new StringBuffer(document);
        stringBuffer.replace(0, i, "");
        stringBuffer.replace((i2 + i3) - i, document.length() - i, "");
        return stringBuffer.toString().replaceAll("\r|\n|\t", " ");
    }

    public String AbstractFormatContent(String str) {
        return StringUtils.isBlank(str) ? "" : Jsoup.clean(str, Whitelist.basic().addTags("img").addAttributes("img", "src").addProtocols("img", "src", "http", ApacheHttpClientConnectionManagerFactory.HTTPS_SCHEME).removeTags("a", "span")).replaceAll("[\r\n\t]", " ").replaceAll("<img>", "");
    }
}
