package com.clearnlp.component.morph;

import com.clearnlp.constant.universal.UNPunct;
import com.clearnlp.constituent.CTLibEn;
import com.clearnlp.dependency.DEPNode;
import com.clearnlp.morphology.AbstractAffixMatcher;
import com.clearnlp.morphology.MPLib;
import com.clearnlp.morphology.MPLibEn;
import com.clearnlp.morphology.MPTag;
import com.clearnlp.morphology.english.EnglishAffixMatcherFactory;
import com.clearnlp.morphology.english.EnglishInflection;
import com.clearnlp.morphology.english.EnglishSuffixMatcher;
import com.clearnlp.pattern.PTLib;
import com.clearnlp.propbank.verbnet.PVMap;
import com.clearnlp.util.UTInput;
import com.clearnlp.util.UTOutput;
import com.clearnlp.util.UTXml;
import com.clearnlp.util.map.Prob2DMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;
import org.w3c.dom.Element;

/* loaded from: input_file:com/clearnlp/component/morph/EnglishMPAnalyzer.class */
public class EnglishMPAnalyzer extends AbstractMPAnalyzer {
    final String PATH = "dictionary/english/";
    final String VERB = PVMap.E_VERB;
    final String NOUN = "noun";
    final String ADJECTIVE = "adjective";
    final String ADVERB = "adverb";
    final String EXT_BASE = ".base";
    final String EXT_EXCEPTION = ".exc";
    final String INFLECTION_SUFFIX = "dictionary/english/inflection_suffix.xml";
    final String DERIVATION_SUFFIX = "dictionary/english/derivation_suffix.xml";
    final String ABBREVIATOIN_RULE = "dictionary/english/abbreviation.rule";
    final String CARDINAL_BASE = "dictionary/english/cardinal.base";
    final String ORDINAL_BASE = "dictionary/english/ordinal.base";
    final String FIELD_DELIM = "_";
    private EnglishInflection inf_verb;
    private EnglishInflection inf_noun;
    private EnglishInflection inf_adjective;
    private EnglishInflection inf_adverb;
    private Map<String, String> rule_abbreviation;
    private Set<String> base_cardinal;
    private Set<String> base_ordinal;

    public EnglishMPAnalyzer() {
        Element documentElement = UTXml.getDocumentElement(UTInput.getInputStreamsFromClasspath("dictionary/english/inflection_suffix.xml"));
        try {
            this.inf_verb = getInflectionRules(documentElement, PVMap.E_VERB, CTLibEn.POS_VB);
            this.inf_noun = getInflectionRules(documentElement, "noun", CTLibEn.POS_NN);
            this.inf_adjective = getInflectionRules(documentElement, "adjective", CTLibEn.POS_JJ);
            this.inf_adverb = getInflectionRules(documentElement, "adverb", CTLibEn.POS_RB);
            this.base_cardinal = UTInput.getStringSet(UTInput.getInputStreamsFromClasspath("dictionary/english/cardinal.base"));
            this.base_ordinal = UTInput.getStringSet(UTInput.getInputStreamsFromClasspath("dictionary/english/ordinal.base"));
            this.rule_abbreviation = getAbbreviationMap(UTInput.getInputStreamsFromClasspath("dictionary/english/abbreviation.rule"));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public EnglishMPAnalyzer(ZipFile zipFile) {
        try {
            Element documentElement = UTXml.getDocumentElement(zipFile.getInputStream(new ZipEntry("dictionary/english/inflection_suffix.xml")));
            this.inf_verb = getInflectionRules(zipFile, documentElement, PVMap.E_VERB, CTLibEn.POS_VB);
            this.inf_noun = getInflectionRules(zipFile, documentElement, "noun", CTLibEn.POS_NN);
            this.inf_adjective = getInflectionRules(zipFile, documentElement, "adjective", CTLibEn.POS_JJ);
            this.inf_adverb = getInflectionRules(zipFile, documentElement, "adverb", CTLibEn.POS_RB);
            this.base_cardinal = UTInput.getStringSet(zipFile.getInputStream(new ZipEntry("dictionary/english/cardinal.base")));
            this.base_ordinal = UTInput.getStringSet(zipFile.getInputStream(new ZipEntry("dictionary/english/ordinal.base")));
            this.rule_abbreviation = getAbbreviationMap(zipFile.getInputStream(new ZipEntry("dictionary/english/abbreviation.rule")));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public EnglishMPAnalyzer(InputStream inputStream) {
        try {
            Map<String, byte[]> byteMap = UTInput.toByteMap(new ZipInputStream(inputStream));
            Element documentElement = UTXml.getDocumentElement(new ByteArrayInputStream(byteMap.get("dictionary/english/inflection_suffix.xml")));
            this.inf_verb = getInflectionRules(byteMap, documentElement, PVMap.E_VERB, CTLibEn.POS_VB);
            this.inf_noun = getInflectionRules(byteMap, documentElement, "noun", CTLibEn.POS_NN);
            this.inf_adjective = getInflectionRules(byteMap, documentElement, "adjective", CTLibEn.POS_JJ);
            this.inf_adverb = getInflectionRules(byteMap, documentElement, "adverb", CTLibEn.POS_RB);
            this.base_cardinal = UTInput.getStringSet(new ByteArrayInputStream(byteMap.get("dictionary/english/cardinal.base")));
            this.base_ordinal = UTInput.getStringSet(new ByteArrayInputStream(byteMap.get("dictionary/english/ordinal.base")));
            this.rule_abbreviation = getAbbreviationMap(new ByteArrayInputStream(byteMap.get("dictionary/english/abbreviation.rule")));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private EnglishInflection getInflectionRules(Element element, String str, String str2) throws IOException {
        return getInflection(UTInput.getInputStreamsFromClasspath("dictionary/english/" + str + ".base"), UTInput.getInputStreamsFromClasspath("dictionary/english/" + str + ".exc"), UTXml.getFirstElementByTagName(element, str), str2);
    }

    private EnglishInflection getInflectionRules(ZipFile zipFile, Element element, String str, String str2) throws IOException {
        return getInflection(zipFile.getInputStream(new ZipEntry("dictionary/english/" + str + ".base")), zipFile.getInputStream(new ZipEntry("dictionary/english/" + str + ".exc")), UTXml.getFirstElementByTagName(element, str), str2);
    }

    private EnglishInflection getInflectionRules(Map<String, byte[]> map, Element element, String str, String str2) throws IOException {
        return getInflection(new ByteArrayInputStream(map.get("dictionary/english/" + str + ".base")), new ByteArrayInputStream(map.get("dictionary/english/" + str + ".exc")), UTXml.getFirstElementByTagName(element, str), str2);
    }

    private EnglishInflection getInflection(InputStream inputStream, InputStream inputStream2, Element element, String str) throws IOException {
        return new EnglishInflection(str, UTInput.getStringSet(inputStream), inputStream2 != null ? UTInput.getStringMap(inputStream2, PTLib.SPACE) : null, new EnglishAffixMatcherFactory().createAffixMatchers(element));
    }

    private Map<String, String> getAbbreviationMap(InputStream inputStream) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
        HashMap newHashMap = Maps.newHashMap();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return newHashMap;
            }
            String[] splitSpace = PTLib.splitSpace(readLine.trim());
            newHashMap.put(splitSpace[0] + "_" + splitSpace[1], splitSpace[2]);
        }
    }

    @Override // com.clearnlp.component.morph.AbstractMPAnalyzer
    public void analyze(DEPNode dEPNode) {
        if (dEPNode.lowerSimplifiedForm == null) {
            dEPNode.lowerSimplifiedForm = MPLib.getSimplifiedLowercaseWordForm(dEPNode.form);
        }
        if (dEPNode.pos.equals(CTLibEn.POS_NNP)) {
            dEPNode.lemma = dEPNode.form.toLowerCase();
            return;
        }
        String abbreviation = getAbbreviation(dEPNode.lowerSimplifiedForm, dEPNode.pos);
        dEPNode.lemma = abbreviation;
        if (abbreviation != null) {
            return;
        }
        String baseFormFromInflection = getBaseFormFromInflection(dEPNode.lowerSimplifiedForm, dEPNode.pos);
        dEPNode.lemma = baseFormFromInflection;
        if (baseFormFromInflection == null) {
            dEPNode.lemma = dEPNode.lowerSimplifiedForm;
        }
        if (dEPNode.isPos(CTLibEn.POS_NNPS)) {
            return;
        }
        if (isCardinal(dEPNode.lemma)) {
            dEPNode.setLemma(MPTag.LEMMA_CARDINAL);
        } else if (isOrdinal(dEPNode.lemma)) {
            dEPNode.setLemma(MPTag.LEMMA_ORDINAL);
        }
    }

    private String getAbbreviation(String str, String str2) {
        return this.rule_abbreviation.get(str + "_" + str2);
    }

    private String getBaseFormFromInflection(String str, String str2) {
        if (MPLibEn.isVerb(str2)) {
            return this.inf_verb.getBaseForm(str, str2);
        }
        if (MPLibEn.isNoun(str2)) {
            return this.inf_noun.getBaseForm(str, str2);
        }
        if (MPLibEn.isAdjective(str2)) {
            return this.inf_adjective.getBaseForm(str, str2);
        }
        if (MPLibEn.isAdverb(str2)) {
            return this.inf_adverb.getBaseForm(str, str2);
        }
        return null;
    }

    private boolean isCardinal(String str) {
        return this.base_cardinal.contains(str);
    }

    private boolean isOrdinal(String str) {
        return str.equals("0st") || str.equals("0nd") || str.equals("0rd") || str.equals("0th") || this.base_ordinal.contains(str);
    }

    public void check(String str) {
        try {
            check(str, PVMap.E_VERB);
            check(str, "noun");
            check(str, "adjective");
            check(str, "adverb");
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void check(String str, String str2) throws IOException {
    }

    public void trim(String str) {
        try {
            trim(str, PVMap.E_VERB, this.inf_verb);
            trim(str, "noun", this.inf_noun);
            trim(str, "adjective", this.inf_adjective);
            trim(str, "adverb", this.inf_adverb);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private void trim(String str, String str2, EnglishInflection englishInflection) throws Exception {
        String str3;
        PrintStream createPrintBufferedFileStream = UTOutput.createPrintBufferedFileStream(str + UNPunct.FORWARD_SLASH + str2 + ".base.removed");
        PrintStream createPrintBufferedFileStream2 = UTOutput.createPrintBufferedFileStream(str + UNPunct.FORWARD_SLASH + str2 + ".exc.removed");
        PrintStream createPrintBufferedFileStream3 = UTOutput.createPrintBufferedFileStream(str + UNPunct.FORWARD_SLASH + str2 + ".base");
        PrintStream createPrintBufferedFileStream4 = UTOutput.createPrintBufferedFileStream(str + UNPunct.FORWARD_SLASH + str2 + ".exc");
        Set<String> stringSet = UTInput.getStringSet(UTInput.getInputStreamsFromClasspath("dictionary/english/" + str2 + ".accept"));
        Set<String> baseSet = englishInflection.getBaseSet();
        Map<String, String> exceptionMap = englishInflection.getExceptionMap();
        System.out.println(str2 + ":");
        System.out.println("  original      : " + baseSet.size() + " " + exceptionMap.size());
        baseSet.addAll(exceptionMap.values());
        System.out.println("+ from exception: " + baseSet.size() + " " + exceptionMap.size());
        Iterator it = Sets.newHashSet(baseSet).iterator();
        while (it.hasNext()) {
            String str4 = (String) it.next();
            if (!stringSet.contains(str4) && (str3 = exceptionMap.get(str4)) != null && !str3.equals(str4)) {
                baseSet.remove(str4);
                createPrintBufferedFileStream.println(str4);
            }
        }
        createPrintBufferedFileStream.close();
        System.out.println("- from exception: " + baseSet.size() + " " + exceptionMap.size());
        Iterator it2 = Sets.newHashSet(exceptionMap.keySet()).iterator();
        while (it2.hasNext()) {
            exceptionMap.get((String) it2.next());
        }
        createPrintBufferedFileStream2.close();
        System.out.println("- inflected excs: " + baseSet.size() + " " + exceptionMap.size());
        ArrayList newArrayList = Lists.newArrayList(baseSet);
        Collections.sort(newArrayList);
        Iterator it3 = newArrayList.iterator();
        while (it3.hasNext()) {
            createPrintBufferedFileStream3.println((String) it3.next());
        }
        createPrintBufferedFileStream3.close();
        ArrayList<String> newArrayList2 = Lists.newArrayList(exceptionMap.keySet());
        Collections.sort(newArrayList2);
        for (String str5 : newArrayList2) {
            createPrintBufferedFileStream4.println(str5 + " " + exceptionMap.get(str5));
        }
        createPrintBufferedFileStream4.close();
    }

    public void evaluateInflection(InputStream inputStream) throws Exception {
        EnglishInflection englishInflection;
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
        HashMap newHashMap = Maps.newHashMap();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                printEvaluation(newHashMap);
                return;
            }
            String[] splitTabs = PTLib.splitTabs(readLine);
            String str = splitTabs[0];
            String str2 = splitTabs[1];
            String str3 = splitTabs[2];
            if (MPLibEn.isVerb(str3)) {
                englishInflection = this.inf_verb;
            } else if (MPLibEn.isNoun(str3)) {
                englishInflection = this.inf_noun;
            } else if (MPLibEn.isAdjective(str3)) {
                englishInflection = this.inf_adjective;
            } else if (MPLibEn.isAdverb(str3)) {
                englishInflection = this.inf_adverb;
            }
            Iterator<AbstractAffixMatcher> it = englishInflection.getSuffixMatchers().iterator();
            while (it.hasNext()) {
                ((EnglishSuffixMatcher) it.next()).evaluateInflection(newHashMap, englishInflection.getBaseSet(), str2, str, str3);
            }
        }
    }

    private void printEvaluation(Map<String, Map<String, Prob2DMap>> map) {
        ArrayList<String> newArrayList = Lists.newArrayList(map.keySet());
        Collections.sort(newArrayList);
        for (String str : newArrayList) {
            System.out.println(str);
            Map<String, Prob2DMap> map2 = map.get(str);
            ArrayList<String> newArrayList2 = Lists.newArrayList(map2.keySet());
            Collections.sort(newArrayList2);
            for (String str2 : newArrayList2) {
                Prob2DMap prob2DMap = map2.get(str2);
                for (String str3 : prob2DMap.keySet()) {
                    System.out.printf("%s\t%s\t%s\t%d\n", str2, str3, Arrays.toString(prob2DMap.getProb1D(str3)), Integer.valueOf(prob2DMap.getTotal1D(str3)));
                }
            }
        }
    }
}
