package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.Tag;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.logging.Redwood;
import java.util.Map;
import java.util.Set;

/* loaded from: input_file:edu/stanford/nlp/parser/lexparser/BaseUnknownWordModelTrainer.class */
public class BaseUnknownWordModelTrainer extends AbstractUnknownWordModelTrainer {
    private static Redwood.RedwoodChannels log = Redwood.channels(BaseUnknownWordModelTrainer.class);
    ClassicCounter<IntTaggedWord> seenCounter;
    ClassicCounter<Label> tc;
    Map<Label, ClassicCounter<String>> c;
    ClassicCounter<IntTaggedWord> unSeenCounter;
    Map<Label, ClassicCounter<String>> tagHash;
    Set<String> seenEnd;
    double indexToStartUnkCounting = 0.0d;
    UnknownGTTrainer unknownGTTrainer;
    boolean useEnd;
    boolean useFirst;
    boolean useFirstCap;
    boolean useGT;
    UnknownWordModel model;

    @Override // edu.stanford.nlp.parser.lexparser.AbstractUnknownWordModelTrainer, edu.stanford.nlp.parser.lexparser.UnknownWordModelTrainer
    public void initializeTraining(Options options, Lexicon lexicon, Index<String> index, Index<String> index2, double d) {
        super.initializeTraining(options, lexicon, index, index2, d);
        this.seenCounter = new ClassicCounter<>();
        this.unSeenCounter = new ClassicCounter<>();
        this.tagHash = Generics.newHashMap();
        this.tc = new ClassicCounter<>();
        this.c = Generics.newHashMap();
        this.seenEnd = Generics.newHashSet();
        this.useEnd = options.lexOptions.unknownSuffixSize > 0 && options.lexOptions.useUnknownWordSignatures > 0;
        this.useFirstCap = options.lexOptions.useUnknownWordSignatures > 0;
        this.useGT = options.lexOptions.useUnknownWordSignatures == 0;
        this.useFirst = false;
        if (this.useFirst) {
            log.info("Including first letter for unknown words.");
        }
        if (this.useFirstCap) {
            log.info("Including whether first letter is capitalized for unknown words");
        }
        if (this.useEnd) {
            log.info("Classing unknown word as the average of their equivalents by identity of last " + options.lexOptions.unknownSuffixSize + " letters.");
        }
        if (this.useGT) {
            log.info("Using Good-Turing smoothing for unknown words.");
        }
        this.indexToStartUnkCounting = d * options.trainOptions.fractionBeforeUnseenCounting;
        this.unknownGTTrainer = this.useGT ? new UnknownGTTrainer() : null;
        this.model = buildUWM();
    }

    @Override // edu.stanford.nlp.parser.lexparser.UnknownWordModelTrainer
    public void train(TaggedWord taggedWord, int i, double d) {
        if (this.useGT) {
            this.unknownGTTrainer.train(taggedWord, d);
        }
        String word = taggedWord.word();
        String signature = this.model.getSignature(word, i);
        Tag tag = new Tag(taggedWord.tag());
        if (!this.c.containsKey(tag)) {
            this.c.put(tag, new ClassicCounter<>());
        }
        this.c.get(tag).incrementCount(signature, d);
        this.tc.incrementCount(tag, d);
        this.seenEnd.add(signature);
        String tag2 = taggedWord.tag();
        IntTaggedWord intTaggedWord = new IntTaggedWord(word, IntTaggedWord.ANY, this.wordIndex, this.tagIndex);
        this.seenCounter.incrementCount(intTaggedWord, d);
        if (this.treesRead <= this.indexToStartUnkCounting || this.seenCounter.getCount(intTaggedWord) >= 2.0d) {
            return;
        }
        this.unSeenCounter.incrementCount(new IntTaggedWord(IntTaggedWord.ANY, tag2, this.wordIndex, this.tagIndex), d);
        this.unSeenCounter.incrementCount(NULL_ITW, d);
    }

    @Override // edu.stanford.nlp.parser.lexparser.UnknownWordModelTrainer
    public UnknownWordModel finishTraining() {
        if (this.useGT) {
            this.unknownGTTrainer.finishTraining();
        }
        for (Map.Entry<Label, ClassicCounter<String>> entry : this.c.entrySet()) {
            Label key = entry.getKey();
            ClassicCounter<String> value = entry.getValue();
            if (!this.tagHash.containsKey(key)) {
                this.tagHash.put(key, new ClassicCounter<>());
            }
            this.tc.incrementCount(key);
            value.setCount("UNK", 1.0d);
            for (Map.Entry<String, Double> entry2 : value.entrySet()) {
                this.tagHash.get(key).setCount(entry2.getKey(), Math.log(entry2.getValue().doubleValue() / this.tc.getCount(key)));
            }
        }
        return this.model;
    }

    protected UnknownWordModel buildUWM() {
        Map<String, Float> map = null;
        if (this.useGT) {
            map = this.unknownGTTrainer.unknownGT;
        }
        return new BaseUnknownWordModel(this.op, this.lex, this.wordIndex, this.tagIndex, this.unSeenCounter, this.tagHash, map, this.seenEnd);
    }
}
