package io.github.yizhiru.thulac4j.process;

import io.github.yizhiru.thulac4j.term.TokenItem;
import io.github.yizhiru.thulac4j.util.CharUtils;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/* loaded from: input_file:io/github/yizhiru/thulac4j/process/SpecifiedWordCementer.class */
public class SpecifiedWordCementer {
    private static final String YEAR_TIME_UNIT_WORD = "年";
    private static final Set<String> TIME_UNIT_WORDS = new HashSet(Arrays.asList(YEAR_TIME_UNIT_WORD, "月", "日", "号", "时", "点", "分", "秒"));
    private static final Set<String> CAN_FORM_REPEATED_WORDS = new HashSet(Arrays.asList("—", "…"));

    public static void cementWord(List<TokenItem> list) {
        int size = list.size() - 1;
        while (size > 0) {
            TokenItem tokenItem = list.get(size);
            String str = tokenItem.word;
            if (TIME_UNIT_WORDS.contains(str)) {
                size = cementTimeWord(list, tokenItem, size);
            } else if (CAN_FORM_REPEATED_WORDS.contains(str)) {
                size = cementRepeatedWord(list, tokenItem, size);
            }
            size--;
        }
    }

    private static int cementTimeWord(List<TokenItem> list, TokenItem tokenItem, int i) {
        String str = tokenItem.word;
        if (i - 1 >= 0) {
            String str2 = list.get(i - 1).word;
            if (isNumeralWord(str2)) {
                if (str.equals(YEAR_TIME_UNIT_WORD) && str2.length() < 4) {
                    return i;
                }
                list.remove(i);
                StringBuilder sb = new StringBuilder(str2 + tokenItem.word);
                int i2 = i - 2;
                while (i2 >= 0) {
                    String str3 = list.get(i2).word;
                    if (!isNumeralWord(str3)) {
                        break;
                    }
                    list.remove(i2 + 1);
                    sb.insert(0, str3);
                    i2--;
                }
                list.set(i2 + 1, new TokenItem(sb.toString(), "t"));
                return i2 + 1;
            }
        }
        return i;
    }

    private static int cementRepeatedWord(List<TokenItem> list, TokenItem tokenItem, int i) {
        String str = tokenItem.word;
        int i2 = i - 1;
        if (i2 >= 0 && list.get(i2).word.equals(str)) {
            StringBuilder sb = new StringBuilder(str + str);
            list.remove(i);
            while (true) {
                i2--;
                if (i2 < 0 || !list.get(i2).word.equals(str)) {
                    break;
                }
                sb.insert(0, str);
                list.remove(i2 + 1);
            }
            list.set(i2 + 1, new TokenItem(sb.toString(), tokenItem.pos));
        }
        return i2 + 1;
    }

    private static boolean isNumeralWord(String str) {
        for (char c : str.toCharArray()) {
            if (!CharUtils.isNumeral(c)) {
                return false;
            }
        }
        return true;
    }
}
