/*
 * Decompiled with CFR 0.152.
 */
package org.thunlp.thulac.postprocess;

import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.thunlp.thulac.data.Dat;
import org.thunlp.thulac.data.TaggedWord;
import org.thunlp.thulac.postprocess.IPostprocessPass;
import org.thunlp.thulac.util.CodePointUtils;
import org.thunlp.thulac.util.StringUtils;

public class FilterPass
implements IPostprocessPass {
    private static final Set<String> ALLOWED_TAGS = new HashSet<String>(Arrays.asList("n", "np", "ns", "ni", "nz", "v", "a", "id", "t", "uw"));
    private Dat xuDat;
    private Dat timeDat;

    public FilterPass(String xuDatFile, String timeDatFile) throws IOException {
        this.xuDat = new Dat(xuDatFile);
        this.timeDat = new Dat(timeDatFile);
    }

    private boolean hasNumber(String word) {
        int count = 0;
        for (int c : StringUtils.toCodePoints(word)) {
            if (CodePointUtils.DIGITS.indexOf(c) != -1) {
                return true;
            }
            if (CodePointUtils.CHINESE_DIGITS.indexOf(c) == -1 || count++ == 0) continue;
            return true;
        }
        return this.timeDat.contains(word);
    }

    @Override
    public void process(List<TaggedWord> sentence) {
        if (this.xuDat == null || this.timeDat == null || sentence.isEmpty()) {
            return;
        }
        for (int i = sentence.size() - 1; i >= 0; --i) {
            String word = sentence.get((int)i).word;
            String tag = sentence.get((int)i).tag;
            if (ALLOWED_TAGS.contains(tag) && !this.xuDat.contains(word) && (!"t".equals(tag) || !this.hasNumber(word))) continue;
            sentence.remove(i);
        }
    }
}

