package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.classify.LinearClassifier;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.ling.tokensregex.types.Expressions;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.semgraph.SemanticGraphFactory;
import edu.stanford.nlp.trees.GrammaticalRelation;
import edu.stanford.nlp.util.CoreMap;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

/* loaded from: input_file:edu/stanford/nlp/pipeline/CoNLLUReader.class */
public class CoNLLUReader {
    public static final int CoNLLU_IndexField = 0;
    public static final int CoNLLU_WordField = 1;
    public static final int CoNLLU_LemmaField = 2;
    public static final int CoNLLU_UPOSField = 3;
    public static final int CoNLLU_XPOSField = 4;
    public static final int CoNLLU_GovField = 6;
    public static final int CoNLLU_RelnField = 7;
    public static final int CoNLLU_MiscField = 9;
    public int columnCount;
    public static Pattern COMMENT_LINE = Pattern.compile("^#.*");
    public static Pattern DOCUMENT_LINE = Pattern.compile("^# newdoc");
    public static Pattern MWT_LINE = Pattern.compile("^[0-9]+-[0-9]+.*");
    public static Pattern TOKEN_LINE = Pattern.compile("^[0-9]+\t.*");
    public static HashMap<String, String> classShorthandToFull = new HashMap<>();
    private HashMap<Integer, Class> extraColumns;

    /* loaded from: input_file:edu/stanford/nlp/pipeline/CoNLLUReader$CoNLLUDocument.class */
    public class CoNLLUDocument {
        public List<CoNLLUSentence> sentences = new ArrayList();
        public HashMap<String, String> docData = new HashMap<>();
        public String docText = "";

        public CoNLLUDocument() {
            this.sentences.add(new CoNLLUSentence());
        }

        public CoNLLUSentence lastSentence() {
            return this.sentences.get(this.sentences.size() - 1);
        }
    }

    /* loaded from: input_file:edu/stanford/nlp/pipeline/CoNLLUReader$CoNLLUSentence.class */
    public class CoNLLUSentence {
        public List<String> tokenLines = new ArrayList();
        public HashMap<String, String> sentenceData = new HashMap<>();
        HashMap<Integer, Integer> mwtData = new HashMap<>();
        List<String> mwtTokens = new ArrayList();
        List<String> mwtMiscs = new ArrayList();
        List<Integer> mwtLastCoreLabels = new ArrayList();

        public CoNLLUSentence() {
        }

        public boolean processLine(String str) {
            if (CoNLLUReader.COMMENT_LINE.matcher(str).matches()) {
                addSentenceData(str);
                return false;
            }
            if (CoNLLUReader.MWT_LINE.matcher(str).matches()) {
                addMWTData(str);
                return false;
            }
            if (!CoNLLUReader.TOKEN_LINE.matcher(str).matches()) {
                return true;
            }
            this.tokenLines.add(str);
            return false;
        }

        public void addSentenceData(String str) {
            if (CoNLLUReader.COMMENT_LINE.matcher(str).matches() && str.contains("=")) {
                str.substring(1).split("=");
                this.sentenceData.put(str.substring(1, str.indexOf(61)), str.substring(str.indexOf(61)));
            }
        }

        void addMWTData(String str) {
            String[] split = str.split(LinearClassifier.TEXT_SERIALIZATION_DELIMITER);
            String[] split2 = split[0].split("-");
            String str2 = split[1];
            int parseInt = Integer.parseInt(split2[0]);
            int parseInt2 = Integer.parseInt(split2[1]);
            for (int i = parseInt - 1; i < parseInt2; i++) {
                this.mwtData.put(Integer.valueOf(i), Integer.valueOf(this.mwtTokens.size()));
            }
            this.mwtTokens.add(str2);
            this.mwtMiscs.add(split[9]);
            this.mwtLastCoreLabels.add(Integer.valueOf(parseInt2 - 1));
        }
    }

    public CoNLLUReader() throws ClassNotFoundException {
        this(new Properties());
    }

    public CoNLLUReader(Properties properties) throws ClassNotFoundException {
        this.columnCount = 10;
        this.extraColumns = new HashMap<>();
        if (properties.getProperty("conllu.extraColumns", "").equals("")) {
            this.extraColumns.put(10, CoreAnnotations.NamedEntityTagAnnotation.class);
        } else {
            for (String str : properties.getProperty("conllu.extraColumns").split(",")) {
                if (classShorthandToFull.containsKey(str)) {
                    str = classShorthandToFull.get(str) + str;
                }
                this.extraColumns.put(10, Class.forName(str));
            }
        }
        this.columnCount += this.extraColumns.size();
    }

    public List<Annotation> readCoNLLUFile(String str) throws IOException {
        return (List) readCoNLLUFileCreateCoNLLUDocuments(str).stream().map(coNLLUDocument -> {
            return convertCoNLLUDocumentToAnnotation(coNLLUDocument);
        }).collect(Collectors.toList());
    }

    public List<String> readCoNLLUFileCreateCoNLLXLines(String str) throws IOException {
        List<CoNLLUDocument> readCoNLLUFileCreateCoNLLUDocuments = readCoNLLUFileCreateCoNLLUDocuments(str);
        ArrayList arrayList = new ArrayList();
        Iterator<CoNLLUDocument> it = readCoNLLUFileCreateCoNLLUDocuments.iterator();
        while (it.hasNext()) {
            Iterator<CoNLLUSentence> it2 = it.next().sentences.iterator();
            while (it2.hasNext()) {
                arrayList.addAll(it2.next().tokenLines);
                arrayList.add("");
            }
        }
        return arrayList;
    }

    public List<CoNLLUDocument> readCoNLLUFileCreateCoNLLUDocuments(String str) throws IOException {
        Iterable<String> lineIterable = IOUtils.getLineIterable(IOUtils.readerFromString(str), false);
        ArrayList arrayList = new ArrayList();
        arrayList.add(new CoNLLUDocument());
        for (String str2 : lineIterable) {
            if (DOCUMENT_LINE.matcher(str2).matches()) {
                arrayList.add(new CoNLLUDocument());
                ((CoNLLUDocument) arrayList.get(arrayList.size() - 1)).sentences.add(new CoNLLUSentence());
            }
            if (((CoNLLUDocument) arrayList.get(arrayList.size() - 1)).lastSentence().processLine(str2)) {
                ((CoNLLUDocument) arrayList.get(arrayList.size() - 1)).sentences.add(new CoNLLUSentence());
            }
        }
        ((CoNLLUDocument) arrayList.get(arrayList.size() - 1)).sentences.remove(((CoNLLUDocument) arrayList.get(arrayList.size() - 1)).sentences.size() - 1);
        return arrayList;
    }

    public Annotation convertCoNLLUDocumentToAnnotation(CoNLLUDocument coNLLUDocument) {
        Annotation annotation = new Annotation("");
        ArrayList arrayList = new ArrayList();
        Iterator<CoNLLUSentence> it = coNLLUDocument.sentences.iterator();
        while (it.hasNext()) {
            arrayList.add(convertCoNLLUSentenceToCoreMap(coNLLUDocument, it.next()));
        }
        annotation.set(CoreAnnotations.SentencesAnnotation.class, arrayList);
        ArrayList arrayList2 = new ArrayList();
        annotation.set(CoreAnnotations.TokensAnnotation.class, arrayList2);
        int i = 0;
        int i2 = 0;
        for (CoreMap coreMap : (List) annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
            coreMap.set(CoreAnnotations.SentenceIndexAnnotation.class, Integer.valueOf(i2));
            if (i2 > 0) {
                ((CoreLabel) ((List) coreMap.get(CoreAnnotations.TokensAnnotation.class)).get(0)).setBefore("\n");
            }
            for (CoreLabel coreLabel : (List) coreMap.get(CoreAnnotations.TokensAnnotation.class)) {
                coreLabel.set(CoreAnnotations.TokenBeginAnnotation.class, Integer.valueOf(i));
                coreLabel.set(CoreAnnotations.TokenEndAnnotation.class, Integer.valueOf(i + 1));
                coreLabel.set(CoreAnnotations.SentenceIndexAnnotation.class, Integer.valueOf(i2));
                arrayList2.add(coreLabel);
                i++;
            }
            i2++;
        }
        annotation.set(CoreAnnotations.TextAnnotation.class, coNLLUDocument.docText);
        return annotation;
    }

    public CoreMap convertCoNLLUSentenceToCoreMap(CoNLLUDocument coNLLUDocument, CoNLLUSentence coNLLUSentence) {
        List<String> list = coNLLUSentence.tokenLines;
        ArrayList<CoreLabel> arrayList = new ArrayList();
        int i = 1;
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            List asList = Arrays.asList(it.next().split(LinearClassifier.TEXT_SERIALIZATION_DELIMITER));
            CoreLabel coreLabel = new CoreLabel();
            coreLabel.setWord((String) asList.get(1));
            coreLabel.setValue((String) asList.get(1));
            coreLabel.setOriginalText((String) asList.get(1));
            coreLabel.setIsNewline(false);
            if (!((String) asList.get(2)).equals(Expressions.VAR_SELF)) {
                coreLabel.setLemma((String) asList.get(2));
            }
            if (!((String) asList.get(3)).equals(Expressions.VAR_SELF)) {
                coreLabel.setTag((String) asList.get(3));
            }
            for (int i2 = 10; i2 < this.columnCount && i2 < asList.size(); i2++) {
                coreLabel.set(this.extraColumns.get(Integer.valueOf(i2)), (String) asList.get(i2));
            }
            coreLabel.setIndex(i);
            if (((String) asList.get(9)).equals(Expressions.VAR_SELF)) {
                coreLabel.setAfter(" ");
            } else {
                HashMap hashMap = new HashMap();
                Arrays.stream(((String) asList.get(9)).split("\\|")).forEach(str -> {
                    hashMap.put(str.split("=")[0], str.split("=")[1]);
                });
                if (((String) hashMap.getOrDefault("SpaceAfter", "Yes")).equals("No")) {
                    coreLabel.setAfter("");
                } else {
                    coreLabel.setAfter(" ");
                }
            }
            if (coNLLUSentence.mwtData.containsKey(Integer.valueOf(i - 1))) {
                coreLabel.set(CoreAnnotations.MWTTokenTextAnnotation.class, coNLLUSentence.mwtTokens.get(coNLLUSentence.mwtData.get(Integer.valueOf(i - 1)).intValue()));
                coreLabel.setIsMWT(true);
                if (coNLLUSentence.mwtData.containsKey(Integer.valueOf(i - 2)) && coNLLUSentence.mwtData.get(Integer.valueOf(i - 2)).equals(coNLLUSentence.mwtData.get(Integer.valueOf(i - 1)))) {
                    coreLabel.setIsMWTFirst(false);
                } else {
                    coreLabel.setIsMWTFirst(true);
                }
                for (String str2 : coNLLUSentence.mwtMiscs.get(coNLLUSentence.mwtData.get(Integer.valueOf(i - 1)).intValue()).split("\\|")) {
                    if (str2.startsWith("SpaceAfter")) {
                        coreLabel.setAfter(str2.split("=")[1].equals("No") ? "" : " ");
                    }
                }
            } else {
                coreLabel.setIsMWT(false);
                coreLabel.setIsMWTFirst(false);
            }
            i++;
            arrayList.add(coreLabel);
        }
        ((CoreLabel) arrayList.get(arrayList.size() - 1)).setAfter("\n");
        ((CoreLabel) arrayList.get(0)).setBefore("");
        for (int i3 = 1; i3 < arrayList.size(); i3++) {
            if (!((CoreLabel) arrayList.get(i3)).isMWT().booleanValue() || ((CoreLabel) arrayList.get(i3)).isMWTFirst().booleanValue()) {
                ((CoreLabel) arrayList.get(i3)).setBefore(((CoreLabel) arrayList.get(i3 - 1)).after());
            } else {
                ((CoreLabel) arrayList.get(i3)).setBefore(((CoreLabel) arrayList.get(i3 - 1)).before());
            }
        }
        int length = coNLLUDocument.docText.length();
        int i4 = 0;
        int i5 = -1;
        int i6 = -1;
        for (CoreLabel coreLabel2 : arrayList) {
            if (coNLLUSentence.mwtData.containsKey(Integer.valueOf(coreLabel2.index() - 1))) {
                if (coNLLUSentence.mwtData.get(Integer.valueOf(coreLabel2.index() - 1)).intValue() == i4) {
                    coreLabel2.setBeginPosition(coNLLUDocument.docText.length());
                    coNLLUDocument.docText += coNLLUSentence.mwtTokens.get(i4);
                    coreLabel2.setEndPosition(coNLLUDocument.docText.length());
                    i5 = coreLabel2.beginPosition();
                    i6 = coreLabel2.endPosition();
                    coNLLUDocument.docText += ((CoreLabel) arrayList.get(coNLLUSentence.mwtLastCoreLabels.get(i4).intValue())).after();
                    i4++;
                } else {
                    coreLabel2.setBeginPosition(i5);
                    coreLabel2.setEndPosition(i6);
                }
                coreLabel2.setIsMWT(true);
            } else {
                coreLabel2.setBeginPosition(coNLLUDocument.docText.length());
                coNLLUDocument.docText += coreLabel2.word();
                coreLabel2.setEndPosition(coNLLUDocument.docText.length());
                coNLLUDocument.docText += coreLabel2.after();
            }
        }
        ArrayList arrayList2 = new ArrayList();
        for (int i7 = 0; i7 < list.size(); i7++) {
            List asList2 = Arrays.asList(list.get(i7).split(LinearClassifier.TEXT_SERIALIZATION_DELIMITER));
            if (!((String) asList2.get(6)).equals("0")) {
                arrayList2.add(new SemanticGraphEdge(new IndexedWord((CoreLabel) arrayList.get(Integer.parseInt((String) asList2.get(6)) - 1)), new IndexedWord((CoreLabel) arrayList.get(i7)), GrammaticalRelation.valueOf((String) asList2.get(7)), 1.0d, false));
            }
        }
        SemanticGraph makeFromEdges = SemanticGraphFactory.makeFromEdges(arrayList2);
        Annotation annotation = new Annotation(coNLLUDocument.docText.substring(length).trim());
        annotation.set(CoreAnnotations.TokensAnnotation.class, arrayList);
        annotation.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, makeFromEdges);
        return annotation;
    }

    static {
        classShorthandToFull.put("CoreAnnotations", "edu.stanford.nlp.ling.");
        classShorthandToFull.put("SemanticGraphCoreAnnotations", "edu.stanford.nlp.semgraph.");
        classShorthandToFull.put("SentimentCoreAnnotations", "edu.stanford.nlp.sentiment.");
    }
}
