package opennlp.tools.formats.ad;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import opennlp.tools.chunker.ChunkSample;
import opennlp.tools.formats.ad.ADSentenceStream;
import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.StringUtil;

/* loaded from: input_file:lib/opennlp-tools-1.9.1.jar:opennlp/tools/formats/ad/ADChunkSampleStream.class */
public class ADChunkSampleStream implements ObjectStream<ChunkSample> {
    protected final ObjectStream<ADSentenceStream.Sentence> adSentenceStream;
    private int start = -1;
    private int end = -1;
    private int index = 0;
    public static final String OTHER = "O";

    public ADChunkSampleStream(ObjectStream<String> objectStream) {
        this.adSentenceStream = new ADSentenceStream(objectStream);
    }

    public ADChunkSampleStream(InputStreamFactory inputStreamFactory, String str) throws IOException {
        try {
            this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(inputStreamFactory, str));
        } catch (UnsupportedEncodingException e) {
            throw new IllegalStateException(e);
        }
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // opennlp.tools.util.ObjectStream
    public ChunkSample read() throws IOException {
        while (true) {
            ADSentenceStream.Sentence read = this.adSentenceStream.read();
            if (read == null) {
                return null;
            }
            if (this.end > -1 && this.index >= this.end) {
                return null;
            }
            if (this.start <= -1 || this.index >= this.start) {
                ADSentenceStream.SentenceParser.Node root = read.getRoot();
                ArrayList arrayList = new ArrayList();
                ArrayList arrayList2 = new ArrayList();
                ArrayList arrayList3 = new ArrayList();
                processRoot(root, arrayList, arrayList2, arrayList3);
                if (arrayList.size() > 0) {
                    this.index++;
                    return new ChunkSample(arrayList, arrayList2, arrayList3);
                }
            } else {
                this.index++;
            }
        }
    }

    protected void processRoot(ADSentenceStream.SentenceParser.Node node, List<String> list, List<String> list2, List<String> list3) {
        if (node != null) {
            ADSentenceStream.SentenceParser.TreeElement[] elements = node.getElements();
            for (int i = 0; i < elements.length; i++) {
                if (elements[i].isLeaf()) {
                    processLeaf((ADSentenceStream.SentenceParser.Leaf) elements[i], false, "O", list, list2, list3);
                } else {
                    processNode((ADSentenceStream.SentenceParser.Node) elements[i], list, list2, list3, null);
                }
            }
        }
    }

    private void processNode(ADSentenceStream.SentenceParser.Node node, List<String> list, List<String> list2, List<String> list3, String str) {
        String chunkTag = getChunkTag(node);
        boolean z = false;
        if (chunkTag.equals("O") && str != null) {
            chunkTag = str;
            z = true;
        }
        ADSentenceStream.SentenceParser.TreeElement[] elements = node.getElements();
        for (int i = 0; i < elements.length; i++) {
            if (elements[i].isLeaf()) {
                boolean z2 = false;
                String str2 = chunkTag;
                ADSentenceStream.SentenceParser.Leaf leaf = (ADSentenceStream.SentenceParser.Leaf) elements[i];
                String chunkTag2 = getChunkTag(leaf);
                if (chunkTag2 != null && !str2.equals(chunkTag2)) {
                    str2 = chunkTag2;
                }
                if (isIntermediate(list2, list3, str2) && (z || i > 0)) {
                    z2 = true;
                }
                if (!isIncludePunctuations() && leaf.getFunctionalTag() == null && (i + 1 >= elements.length || !elements[i + 1].isLeaf() || i <= 0 || !elements[i - 1].isLeaf())) {
                    z2 = false;
                    str2 = "O";
                }
                processLeaf(leaf, z2, str2, list, list2, list3);
            } else {
                int size = list3.size();
                processNode((ADSentenceStream.SentenceParser.Node) elements[i], list, list2, list3, chunkTag);
                int size2 = list3.size() - 1;
                while (true) {
                    if (size2 < size) {
                        break;
                    }
                    if (!list3.get(size2).endsWith("-" + chunkTag)) {
                        chunkTag = "O";
                        break;
                    }
                    size2--;
                }
            }
        }
    }

    protected void processLeaf(ADSentenceStream.SentenceParser.Leaf leaf, boolean z, String str, List<String> list, List<String> list2, List<String> list3) {
        if (leaf.getFunctionalTag() != null && str.equals("O")) {
            str = getPhraseTagFromPosTag(leaf.getFunctionalTag());
        }
        String str2 = !str.equals("O") ? z ? "I-" + str : "B-" + str : str;
        list.add(leaf.getLexeme());
        if (leaf.getSyntacticTag() == null) {
            list2.add(leaf.getLexeme());
        } else {
            list2.add(convertFuncTag(leaf.getFunctionalTag(), false));
        }
        list3.add(str2);
    }

    protected String getPhraseTagFromPosTag(String str) {
        return str.equals("v-fin") ? "VP" : str.equals("n") ? "NP" : "O";
    }

    public static String convertFuncTag(String str, boolean z) {
        if (z && ("art".equals(str) || "pron-det".equals(str) || "pron-indef".equals(str))) {
            str = "det";
        }
        return str;
    }

    protected String getChunkTag(ADSentenceStream.SentenceParser.Leaf leaf) {
        if ("P".equals(leaf.getSyntacticTag())) {
            return "VP";
        }
        return null;
    }

    protected String getChunkTag(ADSentenceStream.SentenceParser.Node node) {
        String str;
        String syntacticTag = node.getSyntacticTag();
        String substring = syntacticTag.substring(syntacticTag.lastIndexOf(":") + 1);
        while (true) {
            str = substring;
            if (!str.endsWith("-")) {
                break;
            }
            substring = str.substring(0, str.length() - 1);
        }
        return (str.equals("np") || str.equals("vp") || str.equals("pp") || str.equals("ap") || str.equals("advp") || str.equals("adjp")) ? StringUtil.toUpperCase(str) : "O";
    }

    public void setStart(int i) {
        this.start = i;
    }

    public void setEnd(int i) {
        this.end = i;
    }

    @Override // opennlp.tools.util.ObjectStream
    public void reset() throws IOException, UnsupportedOperationException {
        this.adSentenceStream.reset();
    }

    @Override // opennlp.tools.util.ObjectStream, java.lang.AutoCloseable
    public void close() throws IOException {
        this.adSentenceStream.close();
    }

    protected boolean isIncludePunctuations() {
        return false;
    }

    protected boolean isIntermediate(List<String> list, List<String> list2, String str) {
        return list2.size() > 0 && list2.get(list2.size() - 1).endsWith(new StringBuilder().append("-").append(str).toString());
    }
}
