/*
 * Decompiled with CFR 0.152.
 */
package org.deeplearning4j.bagofwords.vectorizer;

import java.io.File;
import java.io.InputStream;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.uima.util.FileUtils;
import org.deeplearning4j.bagofwords.vectorizer.BaseTextVectorizer;
import org.deeplearning4j.bagofwords.vectorizer.TextVectorizer;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.deeplearning4j.text.documentiterator.DocumentIterator;
import org.deeplearning4j.text.invertedindex.InvertedIndex;
import org.deeplearning4j.text.sentenceiterator.SentenceIterator;
import org.deeplearning4j.text.tokenization.tokenizer.Tokenizer;
import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.util.FeatureUtil;

public class BagOfWordsVectorizer
extends BaseTextVectorizer {
    public BagOfWordsVectorizer() {
    }

    protected BagOfWordsVectorizer(VocabCache cache, TokenizerFactory tokenizerFactory, List<String> stopWords, int layerSize, int minWordFrequency, DocumentIterator docIter, SentenceIterator sentenceIterator, List<String> labels, InvertedIndex index) {
        super(cache, tokenizerFactory, stopWords, layerSize, minWordFrequency, docIter, sentenceIterator, labels, index);
    }

    @Override
    public DataSet vectorize(InputStream is, String label) {
        try {
            String inputString = IOUtils.toString((InputStream)is);
            return this.vectorize(inputString, label);
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public DataSet vectorize(String text, String label) {
        Tokenizer tokenizer = this.tokenizerFactory.create(text);
        List<String> tokens = tokenizer.getTokens();
        INDArray input = Nd4j.create((int)1, (int)this.cache.numWords());
        for (int i = 0; i < tokens.size(); ++i) {
            int idx = this.cache.indexOf(tokens.get(i));
            if (this.cache.indexOf(tokens.get(i)) < 0) continue;
            input.putScalar(idx, this.cache.wordFrequency(tokens.get(i)));
        }
        INDArray labelMatrix = FeatureUtil.toOutcomeVector((int)this.labels.indexOf(label), (int)this.labels.size());
        return new DataSet(input, labelMatrix);
    }

    @Override
    public DataSet vectorize(File input, String label) {
        try {
            String text = FileUtils.file2String((File)input);
            return this.vectorize(text, label);
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public INDArray transform(String text) {
        Tokenizer tokenizer = this.tokenizerFactory.create(text);
        List<String> tokens = tokenizer.getTokens();
        INDArray input = Nd4j.create((int)1, (int)this.cache.numWords());
        for (int i = 0; i < tokens.size(); ++i) {
            int idx = this.cache.indexOf(tokens.get(i));
            if (this.cache.indexOf(tokens.get(i)) < 0) continue;
            input.putScalar(idx, this.cache.wordFrequency(tokens.get(i)));
        }
        return input;
    }

    public DataSet vectorize() {
        return null;
    }

    public static class Builder
    extends org.deeplearning4j.bagofwords.vectorizer.Builder {
        @Override
        public TextVectorizer build() {
            return new BagOfWordsVectorizer(this.cache, this.tokenizerFactory, this.stopWords, this.layerSize, this.minWordFrequency, this.docIter, this.sentenceIterator, this.labels, this.index);
        }
    }
}

