package io.github.yizhiru.thulac4j.process;

import io.github.yizhiru.thulac4j.term.AnnotatedTerms;
import io.github.yizhiru.thulac4j.term.CharType;
import io.github.yizhiru.thulac4j.term.POC;
import java.util.function.Predicate;

/* loaded from: input_file:io/github/yizhiru/thulac4j/process/RuleAnnotator.class */
public final class RuleAnnotator {
    public static AnnotatedTerms annotate(String str, boolean z) {
        int length = str.length();
        AnnotatedTerms annotatedTerms = new AnnotatedTerms(str.toCharArray());
        boolean z2 = false;
        int i = 0;
        int i2 = 0;
        while (i2 < length) {
            CharType charTypeByIndex = annotatedTerms.getCharTypeByIndex(i2);
            if (charTypeByIndex == CharType.SPACE_OR_CONTROL_CHAR) {
                annotatedTerms.intersectLastPoc(POC.END_OR_SINGLE_POC);
                do {
                    i2++;
                    if (i2 >= length) {
                        break;
                    }
                } while (annotatedTerms.getCharTypeByIndex(i2) == CharType.SPACE_OR_CONTROL_CHAR);
                if (i2 < length) {
                    annotatedTerms.appendAhead(i2, POC.BEGIN_OR_SINGLE_POC);
                }
            } else if (charTypeByIndex == CharType.SINGLE_PUNCTUATION_CHAR) {
                annotatedTerms.intersectLastPoc(POC.END_OR_SINGLE_POC);
                annotatedTerms.append(i2, POC.PUNCTUATION_POC);
                if (z) {
                    char rawCharByIndex = annotatedTerms.getRawCharByIndex(i2);
                    if (rawCharByIndex == 12298) {
                        z2 = true;
                        i = i2;
                    } else if (z2 && rawCharByIndex == 12299) {
                        if (isPossibleTitle(annotatedTerms, i + 1, i2 - 1)) {
                            setTitleWordPoc(annotatedTerms, i + 1, i2 - 1, annotatedTerms.getAnnotatedLength() - 2);
                        }
                        z2 = false;
                    }
                }
                i2++;
                if (i2 < length && annotatedTerms.getCharTypeByIndex(i2) != CharType.SPACE_OR_CONTROL_CHAR) {
                    annotatedTerms.appendAhead(i2, POC.BEGIN_OR_SINGLE_POC);
                }
            } else if (charTypeByIndex == CharType.ENGLISH_LETTER_CHAR) {
                i2 = processWord(annotatedTerms, i2, RuleAnnotator::isPartOfLetterWord, false);
            } else if (charTypeByIndex == CharType.ARABIC_NUMERAL_CHAR) {
                i2 = processWord(annotatedTerms, i2, RuleAnnotator::isPartOfNumeral, true);
            } else if (charTypeByIndex == CharType.EX_SINGLE_PUNCTUATION_CHAR || charTypeByIndex == CharType.NUMERAL_PUNCTUATION_CHAR) {
                setCurrentAsSingle(i2, annotatedTerms, POC.PUNCTUATION_POC);
                i2++;
            } else if (charTypeByIndex == CharType.HAN_ZI_CHAR || charTypeByIndex == CharType.CHINESE_NUMERAL_CHAR) {
                annotatedTerms.append(i2, POC.DEFAULT_POC);
                i2++;
            } else {
                setCurrentAsSingle(i2, annotatedTerms, POC.SINGLE_POC);
                i2++;
            }
        }
        annotatedTerms.intersectPocByIndex(0, POC.BEGIN_OR_SINGLE_POC);
        annotatedTerms.intersectLastPoc(POC.END_OR_SINGLE_POC);
        return annotatedTerms;
    }

    private static void setCurrentAsSingle(int i, AnnotatedTerms annotatedTerms, POC poc) {
        annotatedTerms.intersectLastPoc(POC.END_OR_SINGLE_POC);
        annotatedTerms.append(i, poc);
        int i2 = i + 1;
        if (i2 >= annotatedTerms.getRawCharsLength() || annotatedTerms.getCharTypeByIndex(i2) == CharType.SPACE_OR_CONTROL_CHAR) {
            return;
        }
        annotatedTerms.appendAhead(i2, POC.BEGIN_OR_SINGLE_POC);
    }

    private static boolean isPossibleTitle(AnnotatedTerms annotatedTerms, int i, int i2) {
        if (i2 - i > 8 || i2 - i <= 0) {
            return false;
        }
        for (int i3 = i; i3 <= i2; i3++) {
            CharType charTypeByIndex = annotatedTerms.getCharTypeByIndex(i3);
            if (charTypeByIndex == CharType.SINGLE_PUNCTUATION_CHAR || charTypeByIndex == CharType.SPACE_OR_CONTROL_CHAR) {
                return false;
            }
        }
        return true;
    }

    private static void setTitleWordPoc(AnnotatedTerms annotatedTerms, int i, int i2, int i3) {
        if (i == i2) {
            annotatedTerms.intersectPocByIndex(i3, POC.SINGLE_POC);
            return;
        }
        int i4 = (i3 - i2) + i;
        annotatedTerms.setPocByIndex(i4, POC.BEGIN_POC);
        for (int i5 = i4 + 1; i5 < i3; i5++) {
            annotatedTerms.setPocByIndex(i5, POC.MIDDLE_POC);
        }
        annotatedTerms.setPocByIndex(i3, POC.END_POC);
    }

    public static boolean isPartOfLetterWord(CharType charType) {
        return charType == CharType.ENGLISH_LETTER_CHAR || charType == CharType.ARABIC_NUMERAL_CHAR || charType == CharType.EX_SINGLE_PUNCTUATION_CHAR;
    }

    public static boolean isPartOfNumeral(CharType charType) {
        return charType == CharType.CHINESE_NUMERAL_CHAR || charType == CharType.ARABIC_NUMERAL_CHAR || charType == CharType.NUMERAL_PUNCTUATION_CHAR;
    }

    private static int processWord(AnnotatedTerms annotatedTerms, int i, Predicate<CharType> predicate, boolean z) {
        POC poc;
        POC poc2;
        POC poc3;
        POC poc4;
        if (z) {
            poc = POC.BEGIN_NUMERAL_POC;
            poc2 = POC.MIDDLE_NUMERAL_POC;
            poc3 = POC.END_NUMERAL_POC;
            poc4 = POC.SINGLE_NUMERAL_POC;
        } else {
            poc = POC.BEGIN_POC;
            poc2 = POC.MIDDLE_POC;
            poc3 = POC.END_POC;
            poc4 = POC.SINGLE_POC;
        }
        annotatedTerms.intersectLastPoc(POC.END_OR_SINGLE_POC);
        int rawCharsLength = annotatedTerms.getRawCharsLength();
        int i2 = i + 1;
        if (i2 == rawCharsLength || (i2 < rawCharsLength && !predicate.test(annotatedTerms.getCharTypeByIndex(i2)))) {
            annotatedTerms.append(i2 - 1, poc4);
        } else {
            annotatedTerms.append(i2 - 1, poc);
            while (i2 + 1 < rawCharsLength && predicate.test(annotatedTerms.getCharTypeByIndex(i2 + 1))) {
                annotatedTerms.append(i2, poc2);
                i2++;
            }
            annotatedTerms.append(i2, poc3);
            i2++;
        }
        if (i2 < rawCharsLength && annotatedTerms.getCharTypeByIndex(i2) != CharType.SPACE_OR_CONTROL_CHAR) {
            annotatedTerms.appendAhead(i2, POC.BEGIN_OR_SINGLE_POC);
        }
        return i2;
    }
}
