package com.hankcs.hanlp.mining.word2vec;

import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Map;

/* loaded from: input_file:BOOT-INF/lib/hanlp-portable-1.7.1.jar:com/hankcs/hanlp/mining/word2vec/Corpus.class */
public abstract class Corpus {
    protected File cacheFile;
    protected Config config;
    protected int trainWords;
    protected int vocabSize;
    protected int vocabMaxSize;
    protected VocabWord[] vocab;
    protected Map<String, Integer> vocabIndexMap;
    protected boolean eoc;
    protected Charset encoding;
    protected int[] table;

    public Corpus(Config config) throws IOException {
        this.trainWords = 0;
        this.vocabMaxSize = 1000;
        this.eoc = true;
        this.encoding = Charset.forName("UTF-8");
        this.config = config;
    }

    public Corpus(Corpus corpus) throws IOException {
        this.trainWords = 0;
        this.vocabMaxSize = 1000;
        this.eoc = true;
        this.encoding = Charset.forName("UTF-8");
        this.trainWords = corpus.trainWords;
        this.vocabSize = corpus.vocabSize;
        this.vocab = corpus.vocab;
        this.vocabIndexMap = corpus.vocabIndexMap;
        this.table = corpus.table;
    }

    public boolean endOfCorpus() {
        return this.eoc;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public int addWordToVocab(String str) {
        this.vocab[this.vocabSize] = new VocabWord(str);
        this.vocabSize++;
        if (this.vocabSize + 2 >= this.vocabMaxSize) {
            this.vocabMaxSize += 1000;
            VocabWord[] vocabWordArr = new VocabWord[this.vocabMaxSize];
            System.arraycopy(this.vocab, 0, vocabWordArr, 0, this.vocabSize);
            this.vocab = vocabWordArr;
        }
        this.vocabIndexMap.put(str, Integer.valueOf(this.vocabSize - 1));
        return this.vocabSize - 1;
    }

    public int getTrainWords() {
        return this.trainWords;
    }

    public int getVocabSize() {
        return this.vocabSize;
    }

    public VocabWord[] getVocab() {
        return this.vocab;
    }

    public Map<String, Integer> getVocabIndexMap() {
        return this.vocabIndexMap;
    }

    public void rewind(int i, int i2) throws IOException {
        this.eoc = false;
    }

    public int readWordIndex() throws IOException {
        String nextWord = nextWord();
        return nextWord == null ? this.eoc ? -2 : -3 : searchVocab(nextWord);
    }

    public abstract String nextWord() throws IOException;

    public void close() throws IOException {
        shutdown();
        this.cacheFile.delete();
    }

    public void shutdown() throws IOException {
        this.table = null;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public int searchVocab(String str) {
        Integer num;
        if (str == null || (num = this.vocabIndexMap.get(str)) == null) {
            return -1;
        }
        return num.intValue();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void sortVocab() {
        Arrays.sort(this.vocab, 0, this.vocabSize);
        int i = this.vocabSize;
        this.trainWords = 0;
        this.table = new int[i];
        for (int i2 = 0; i2 < i; i2++) {
            VocabWord vocabWord = this.vocab[i2];
            if (vocabWord.cn < this.config.getMinCount()) {
                this.table[this.vocabIndexMap.get(vocabWord.word).intValue()] = -4;
                this.vocabSize--;
            } else {
                this.table[this.vocabIndexMap.get(vocabWord.word).intValue()] = i2;
                setVocabIndexMap(vocabWord, i2);
            }
        }
        this.vocabIndexMap = null;
        System.arraycopy(this.vocab, 0, new VocabWord[this.vocabSize], 0, this.vocabSize);
    }

    void setVocabIndexMap(VocabWord vocabWord, int i) {
        this.trainWords += vocabWord.cn;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void createBinaryTree() {
        int i;
        int i2;
        int[] iArr = new int[40];
        char[] cArr = new char[40];
        int[] iArr2 = new int[(this.vocabSize * 2) + 1];
        char[] cArr2 = new char[(this.vocabSize * 2) + 1];
        int[] iArr3 = new int[(this.vocabSize * 2) + 1];
        for (int i3 = 0; i3 < this.vocabSize; i3++) {
            iArr2[i3] = this.vocab[i3].cn;
        }
        for (int i4 = this.vocabSize; i4 < this.vocabSize * 2; i4++) {
            iArr2[i4] = Integer.MAX_VALUE;
        }
        int i5 = this.vocabSize - 1;
        int i6 = this.vocabSize;
        for (int i7 = 0; i7 < this.vocabSize - 1; i7++) {
            if (i5 < 0) {
                i = i6;
                i6++;
            } else if (iArr2[i5] < iArr2[i6]) {
                i = i5;
                i5--;
            } else {
                i = i6;
                i6++;
            }
            if (i5 < 0) {
                i2 = i6;
                i6++;
            } else if (iArr2[i5] < iArr2[i6]) {
                i2 = i5;
                i5--;
            } else {
                i2 = i6;
                i6++;
            }
            iArr2[this.vocabSize + i7] = iArr2[i] + iArr2[i2];
            iArr3[i] = this.vocabSize + i7;
            iArr3[i2] = this.vocabSize + i7;
            cArr2[i2] = 1;
        }
        for (int i8 = 0; i8 < this.vocabSize; i8++) {
            int i9 = i8;
            int i10 = 0;
            do {
                cArr[i10] = cArr2[i9];
                iArr[i10] = i9;
                i10++;
                i9 = iArr3[i9];
            } while (i9 != (this.vocabSize * 2) - 2);
            this.vocab[i8].codelen = i10;
            this.vocab[i8].point[0] = this.vocabSize - 2;
            for (int i11 = 0; i11 < i10; i11++) {
                this.vocab[i8].code[(i10 - i11) - 1] = cArr[i11];
                this.vocab[i8].point[i10 - i11] = iArr[i11] - this.vocabSize;
            }
        }
    }
}
