package com.bxm.localnews.sync.service.impl;

import com.bxm.localnews.sync.dto.ExtractTagsRequestDto;
import com.bxm.localnews.sync.dto.ExtractTagsResponseDto;
import com.bxm.localnews.sync.facade.PythonFeignService;
import com.bxm.localnews.sync.service.SegmentIntegrationService;
import com.bxm.localnews.sync.vo.Keyword;
import com.google.common.collect.Lists;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;

@Service
/* loaded from: input_file:com/bxm/localnews/sync/service/impl/WordSegmentIntegrationServiceImpl.class */
public class WordSegmentIntegrationServiceImpl implements SegmentIntegrationService {
    private static final Logger log = LogManager.getLogger(WordSegmentIntegrationServiceImpl.class);
    private final PythonFeignService pythonFeignService;

    @Autowired
    public WordSegmentIntegrationServiceImpl(PythonFeignService pythonFeignService) {
        this.pythonFeignService = pythonFeignService;
    }

    @Override // com.bxm.localnews.sync.service.SegmentIntegrationService
    public List<Keyword> segTags(String str, Integer num) {
        String replaceHTML = replaceHTML(str);
        if (StringUtils.isBlank(replaceHTML)) {
            return Lists.newArrayList();
        }
        List<ExtractTagsResponseDto> extractTags = this.pythonFeignService.extractTags(ExtractTagsRequestDto.builder().content(replaceHTML).topN(num).build());
        return CollectionUtils.isEmpty(extractTags) ? Lists.newArrayList() : (List) extractTags.stream().map(this::covert).filter(keyword -> {
            return satisfyLength(keyword.getName());
        }).collect(Collectors.toList());
    }

    private Keyword covert(ExtractTagsResponseDto extractTagsResponseDto) {
        Keyword keyword = new Keyword();
        keyword.setName(extractTagsResponseDto.getWord());
        keyword.setTfidfvalue(extractTagsResponseDto.getWeight().doubleValue());
        return keyword;
    }

    @Override // com.bxm.localnews.sync.service.SegmentIntegrationService
    public List<String> seg(String str) {
        String replaceHTML = replaceHTML(str);
        if (StringUtils.isBlank(replaceHTML)) {
            return Lists.newArrayList();
        }
        List<String> cut = this.pythonFeignService.cut(replaceHTML);
        return CollectionUtils.isEmpty(cut) ? Lists.newArrayList() : (List) cut.stream().filter(this::satisfyLength).collect(Collectors.toList());
    }

    private boolean satisfyLength(String str) {
        return str.length() < 12;
    }

    private String replaceHTML(String str) {
        return str.replaceAll("<[^>]*>", "").replaceAll("[\\p{P}+~$`^=|<>～｀＄＾＋＝｜＜＞￥×]", "").replaceAll("&nbsp;", "").replace((char) 8195, ' ');
    }
}
