/*
 * Decompiled with CFR 0.152.
 */
package org.deeplearning4j.models.word2vec.actor;

import akka.actor.UntypedActor;
import akka.dispatch.Futures;
import akka.dispatch.OnFailure;
import akka.dispatch.OnSuccess;
import java.io.Closeable;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.compress.utils.IOUtils;
import org.deeplearning4j.models.word2vec.StreamWork;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.deeplearning4j.models.word2vec.VocabWork;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.deeplearning4j.text.invertedindex.InvertedIndex;
import org.deeplearning4j.text.movingwindow.Util;
import org.deeplearning4j.text.tokenization.tokenizer.Tokenizer;
import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.tartarus.snowball.ext.PorterStemmer;
import scala.PartialFunction;
import scala.concurrent.ExecutionContext;
import scala.concurrent.Future;

public class VocabActor
extends UntypedActor {
    private transient TokenizerFactory tokenizer;
    private List<String> stopWords;
    private AtomicLong lastUpdate;
    private VocabCache cache;
    private int minWordFrequency;
    private AtomicLong numWordsEncountered;
    private InvertedIndex index;
    private static final Logger log = LoggerFactory.getLogger(VocabActor.class);
    private PorterStemmer stemmer = new PorterStemmer();

    public VocabActor(TokenizerFactory tokenizer, VocabCache cache, List<String> stopWords, AtomicLong lastUpdate, int minWordFrequency, AtomicLong numWordsEncountered, InvertedIndex index) {
        this.tokenizer = tokenizer;
        this.stopWords = stopWords;
        this.lastUpdate = lastUpdate;
        this.cache = cache;
        this.minWordFrequency = minWordFrequency;
        this.numWordsEncountered = numWordsEncountered;
        this.index = index;
    }

    public void onReceive(Object message) throws Exception {
        final HashSet<String> encountered = new HashSet<String>();
        if (message instanceof VocabWork) {
            final ArrayList document = new ArrayList();
            final VocabWork work = (VocabWork)message;
            if (work.getWork() == null || work.getWork().isEmpty()) {
                return;
            }
            final String sentence = work.getWork();
            if (sentence.isEmpty() || sentence.length() <= 2) {
                work.increment();
                this.lastUpdate.getAndSet(System.currentTimeMillis());
                return;
            }
            Future f = Futures.future((Callable)new Callable<Object>(){

                @Override
                public Object call() throws Exception {
                    String token;
                    VocabActor.this.numWordsEncountered.set(VocabActor.this.numWordsEncountered.get() + (long)document.size());
                    Tokenizer t = VocabActor.this.tokenizer.create(sentence);
                    while (t.hasMoreTokens() && !(token = t.nextToken()).isEmpty()) {
                        VocabActor.this.processToken(token, encountered, document, work.isStem());
                    }
                    if (work.getLabel() != null) {
                        VocabActor.this.index.addWordsToDoc(VocabActor.this.index.numDocuments(), (List<VocabWord>)document, work.getLabel());
                    } else {
                        VocabActor.this.index.addWordsToDoc(VocabActor.this.index.numDocuments(), document);
                    }
                    return null;
                }
            }, (ExecutionContext)this.context().dispatcher());
            f.onFailure((PartialFunction)new OnFailure(){

                public void onFailure(Throwable failure) throws Throwable {
                    log.error("Failure on vocab actor ", failure);
                }
            }, (ExecutionContext)this.context().dispatcher());
            f.onSuccess((PartialFunction)new OnSuccess<Object>(){

                public void onSuccess(Object result) throws Throwable {
                    work.increment();
                    VocabActor.this.lastUpdate.getAndSet(System.currentTimeMillis());
                }
            }, (ExecutionContext)this.context().dispatcher());
        } else if (message instanceof StreamWork) {
            String token;
            StreamWork work = (StreamWork)message;
            ArrayList<VocabWord> document = new ArrayList<VocabWord>();
            InputStream is = work.getIs();
            if (is == null) {
                return;
            }
            boolean tryRead = false;
            try {
                if (is.available() > 0) {
                    tryRead = true;
                }
            }
            catch (Exception e) {
                tryRead = false;
            }
            if (!tryRead) {
                return;
            }
            Tokenizer t = this.tokenizer.create(is);
            while (t.hasMoreTokens() && (token = t.nextToken()) != null && !token.isEmpty()) {
                this.processToken(token, encountered, document, false);
            }
            this.index.addWordsToDoc(this.index.numDocuments(), document);
            this.numWordsEncountered.set(this.numWordsEncountered.get() + (long)document.size());
            IOUtils.closeQuietly((Closeable)is);
            work.countDown();
            this.lastUpdate.getAndSet(System.currentTimeMillis());
        } else {
            this.unhandled(message);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected synchronized void processToken(String token, Set<String> encountered, List<VocabWord> words, boolean stem) {
        VocabWord token2;
        if (this.stopWords.contains(token)) {
            token = "STOP";
        }
        if (token.isEmpty()) {
            return;
        }
        String oldToken = token;
        if (stem) {
            PorterStemmer porterStemmer = this.stemmer;
            synchronized (porterStemmer) {
                this.stemmer.setCurrent(token);
                if (this.stemmer.stem() && this.stemmer.getCurrent() != null && !this.stemmer.getCurrent().isEmpty()) {
                    token = this.stemmer.getCurrent();
                }
            }
        }
        if (token.isEmpty()) {
            token = oldToken;
        }
        this.cache.incrementWordCount(token);
        if (!encountered.contains(token)) {
            this.cache.incrementDocCount(token, 1);
            encountered.add(token);
        }
        if (this.cache.hasToken(token)) {
            token2 = this.cache.tokenFor(token);
        } else {
            token2 = new VocabWord(1.0, token);
            this.cache.addToken(token2);
        }
        words.add(token2);
        if (!Util.matchesAnyStopWord(this.stopWords, token) && !token.isEmpty()) {
            if (!this.cache.containsWord(token) && this.cache.wordFrequency(token) >= this.minWordFrequency) {
                int idx = this.cache.numWords();
                token2.setIndex(idx);
                this.cache.putVocabWord(token);
            } else if (Util.matchesAnyStopWord(this.stopWords, token) && !token.isEmpty() && !this.cache.containsWord(token = "STOP") && this.cache.wordFrequency(token) >= this.minWordFrequency) {
                int idx = this.cache.numWords();
                token2.setIndex(idx);
                this.cache.putVocabWord(token);
            }
        }
    }
}

