package org.deeplearning4j.text.tokenization.tokenizerfactory;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.Collections;
import java.util.Map;
import java.util.NavigableMap;
import java.util.TreeMap;
import lombok.NonNull;
import org.deeplearning4j.text.tokenization.tokenizer.BertWordPieceStreamTokenizer;
import org.deeplearning4j.text.tokenization.tokenizer.BertWordPieceTokenizer;
import org.deeplearning4j.text.tokenization.tokenizer.TokenPreProcess;
import org.deeplearning4j.text.tokenization.tokenizer.Tokenizer;
import org.deeplearning4j.text.tokenization.tokenizer.preprocessor.BertWordPiecePreProcessor;

/* loaded from: input_file:org/deeplearning4j/text/tokenization/tokenizerfactory/BertWordPieceTokenizerFactory.class */
public class BertWordPieceTokenizerFactory implements TokenizerFactory {
    private final NavigableMap<String, Integer> vocab;
    private TokenPreProcess preTokenizePreProcessor;
    private TokenPreProcess tokenPreProcessor;
    private Charset charset;

    public BertWordPieceTokenizerFactory(NavigableMap<String, Integer> navigableMap, boolean z, boolean z2) {
        this(navigableMap, new BertWordPiecePreProcessor(z, z2, navigableMap));
    }

    public BertWordPieceTokenizerFactory(NavigableMap<String, Integer> navigableMap, TokenPreProcess tokenPreProcess) {
        this.vocab = navigableMap;
        this.preTokenizePreProcessor = tokenPreProcess;
    }

    public BertWordPieceTokenizerFactory(File file, boolean z, boolean z2, @NonNull Charset charset) throws IOException {
        this(loadVocab(file, charset), z, z2);
        if (charset == null) {
            throw new NullPointerException("charset is marked non-null but is null");
        }
        this.charset = charset;
    }

    public BertWordPieceTokenizerFactory(InputStream inputStream, boolean z, boolean z2, @NonNull Charset charset) throws IOException {
        this(loadVocab(inputStream, charset), z, z2);
        if (charset == null) {
            throw new NullPointerException("charset is marked non-null but is null");
        }
        this.charset = charset;
    }

    @Override // org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory
    public Tokenizer create(String str) {
        return new BertWordPieceTokenizer(str, this.vocab, this.preTokenizePreProcessor, this.tokenPreProcessor);
    }

    @Override // org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory
    public Tokenizer create(InputStream inputStream) {
        return new BertWordPieceStreamTokenizer(inputStream, this.charset, this.vocab, this.preTokenizePreProcessor, this.tokenPreProcessor);
    }

    public Map<String, Integer> getVocab() {
        return Collections.unmodifiableMap(this.vocab);
    }

    public static NavigableMap<String, Integer> loadVocab(InputStream inputStream, Charset charset) throws IOException {
        TreeMap treeMap = new TreeMap(Collections.reverseOrder());
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, charset));
        Throwable th = null;
        int i = 0;
        while (true) {
            try {
                try {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    int i2 = i;
                    i++;
                    treeMap.put(readLine, Integer.valueOf(i2));
                } finally {
                }
            } catch (Throwable th2) {
                if (bufferedReader != null) {
                    if (th != null) {
                        try {
                            bufferedReader.close();
                        } catch (Throwable th3) {
                            th.addSuppressed(th3);
                        }
                    } else {
                        bufferedReader.close();
                    }
                }
                throw th2;
            }
        }
        if (bufferedReader != null) {
            if (0 != 0) {
                try {
                    bufferedReader.close();
                } catch (Throwable th4) {
                    th.addSuppressed(th4);
                }
            } else {
                bufferedReader.close();
            }
        }
        return treeMap;
    }

    public static NavigableMap<String, Integer> loadVocab(File file, Charset charset) throws IOException {
        return loadVocab(new FileInputStream(file), charset);
    }

    public TokenPreProcess getPreTokenizePreProcessor() {
        return this.preTokenizePreProcessor;
    }

    public void setPreTokenizePreProcessor(TokenPreProcess tokenPreProcess) {
        this.preTokenizePreProcessor = tokenPreProcess;
    }

    @Override // org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory
    public TokenPreProcess getTokenPreProcessor() {
        return this.tokenPreProcessor;
    }

    @Override // org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory
    public void setTokenPreProcessor(TokenPreProcess tokenPreProcess) {
        this.tokenPreProcessor = tokenPreProcess;
    }
}
