/*
 * Decompiled with CFR 0.152.
 */
package ai.djl.basicdataset.nlp;

import ai.djl.Application;
import ai.djl.basicdataset.nlp.TextDataset;
import ai.djl.modality.nlp.embedding.EmbeddingException;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.NDManager;
import ai.djl.ndarray.types.DataType;
import ai.djl.repository.Artifact;
import ai.djl.repository.MRL;
import ai.djl.training.dataset.Record;
import ai.djl.util.Progress;
import java.io.BufferedReader;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

public class UniversalDependenciesEnglishEWT
extends TextDataset {
    private static final String VERSION = "2.0";
    private static final String ARTIFACT_ID = "universal-dependencies-en-ewt";
    private List<List<Integer>> universalPosTags;

    protected UniversalDependenciesEnglishEWT(Builder builder) {
        super(builder);
        this.usage = builder.usage;
        this.mrl = builder.getMrl();
    }

    public static Builder builder() {
        return new Builder();
    }

    public void prepare(Progress progress) throws IOException, EmbeddingException {
        if (this.prepared) {
            return;
        }
        Artifact artifact = this.mrl.getDefaultArtifact();
        this.mrl.prepare(artifact, progress);
        Path root = this.mrl.getRepository().getResourceDirectory(artifact);
        Path usagePath = null;
        switch (this.usage) {
            case TRAIN: {
                usagePath = Paths.get("en-ud-v2/en-ud-v2/en-ud-tag.v2.train.txt", new String[0]);
                break;
            }
            case TEST: {
                usagePath = Paths.get("en-ud-v2/en-ud-v2/en-ud-tag.v2.test.txt", new String[0]);
                break;
            }
            case VALIDATION: {
                usagePath = Paths.get("en-ud-v2/en-ud-v2/en-ud-tag.v2.dev.txt", new String[0]);
                break;
            }
        }
        usagePath = root.resolve(usagePath);
        StringBuilder sourceTextDatum = new StringBuilder();
        ArrayList<String> sourceTextData = new ArrayList<String>();
        this.universalPosTags = new ArrayList<List<Integer>>();
        ArrayList<Integer> universalPosTag = new ArrayList<Integer>();
        try (BufferedReader reader = Files.newBufferedReader(usagePath);){
            String row;
            while ((row = reader.readLine()) != null) {
                if ("".equals(row)) {
                    sourceTextData.add(sourceTextDatum.toString());
                    this.universalPosTags.add(universalPosTag);
                    sourceTextDatum.delete(0, sourceTextDatum.length());
                    universalPosTag = new ArrayList();
                    continue;
                }
                String[] splits = row.split("\t");
                if (sourceTextDatum.length() != 0) {
                    sourceTextDatum.append(' ');
                }
                sourceTextDatum.append(splits[0]);
                universalPosTag.add(UniversalPosTag.valueOf(splits[1]).ordinal());
            }
        }
        this.preprocess(sourceTextData, true);
        this.prepared = true;
    }

    public Record get(NDManager manager, long index) {
        NDList data = new NDList(new NDArray[]{this.sourceTextData.getEmbedding(manager, index)});
        NDList labels = new NDList(new NDArray[]{manager.create(this.universalPosTags.get(Math.toIntExact(index)).stream().mapToInt(Integer::intValue).toArray()).toType(DataType.INT32, false)});
        return new Record(data, labels);
    }

    protected long availableSize() {
        return this.sourceTextData.getSize();
    }

    public static class Builder
    extends TextDataset.Builder<Builder> {
        public Builder() {
            this.groupId = "ai.djl.basicdataset.universal-dependencies";
            this.artifactId = UniversalDependenciesEnglishEWT.ARTIFACT_ID;
        }

        public Builder self() {
            return this;
        }

        public UniversalDependenciesEnglishEWT build() {
            return new UniversalDependenciesEnglishEWT(this);
        }

        MRL getMrl() {
            return this.repository.dataset(Application.NLP.ANY, this.groupId, this.artifactId, UniversalDependenciesEnglishEWT.VERSION);
        }
    }

    static enum UniversalPosTag {
        ADJ,
        ADV,
        INTJ,
        NOUN,
        PROPN,
        VERB,
        ADP,
        AUX,
        CCONJ,
        DET,
        NUM,
        PART,
        PRON,
        SCONJ,
        PUNCT,
        SYM,
        X;

    }
}

