/*
 * Decompiled with CFR 0.152.
 */
package com.bxm.localnews.analysis.impl;

import com.bxm.localnews.analysis.SegmenterService;
import com.google.common.collect.Lists;
import java.util.List;
import java.util.stream.Collectors;
import org.apdplat.word.WordSegmenter;
import org.apdplat.word.segmentation.SegmentationAlgorithm;
import org.apdplat.word.segmentation.Word;
import org.springframework.stereotype.Service;

@Service
public class WordSegmenterServiceImpl
implements SegmenterService {
    @Override
    public List<String> seg(String content) {
        List words = WordSegmenter.seg((String)this.replaceHTML(content), (SegmentationAlgorithm)SegmentationAlgorithm.MinimalWordCount);
        if (words.size() > 0) {
            return words.stream().map(Word::getText).collect(Collectors.toList());
        }
        return Lists.newArrayList();
    }

    private String replaceHTML(String content) {
        content = content.replaceAll("<[^>]*>", "");
        content = content.replaceAll("[\\p{P}+~$`^=|<>\uff5e\uff40\uff04\uff3e\uff0b\uff1d\uff5c\uff1c\uff1e\uffe5\u00d7]", "");
        return content.replaceAll("&nbsp", "").replace(" ", "");
    }
}

