package com.bxm.localnews.analysis.impl;

import com.bxm.localnews.analysis.SegmenterService;
import com.google.common.collect.Lists;
import java.util.List;
import java.util.stream.Collectors;
import org.apdplat.word.WordSegmenter;
import org.apdplat.word.segmentation.SegmentationAlgorithm;
import org.springframework.stereotype.Service;

@Service
/* loaded from: input_file:com/bxm/localnews/analysis/impl/WordSegmenterServiceImpl.class */
public class WordSegmenterServiceImpl implements SegmenterService {
    @Override // com.bxm.localnews.analysis.SegmenterService
    public List<String> seg(String str) {
        List seg = WordSegmenter.seg(replaceHTML(str), SegmentationAlgorithm.MinimalWordCount);
        return seg.size() > 0 ? (List) seg.stream().map((v0) -> {
            return v0.getText();
        }).collect(Collectors.toList()) : Lists.newArrayList();
    }

    private String replaceHTML(String str) {
        return str.replaceAll("<[^>]*>", "").replaceAll("[\\p{P}+~$`^=|<>～｀＄＾＋＝｜＜＞￥×]", "").replaceAll("&nbsp", "").replace(" ", "");
    }
}
