/*
 * Decompiled with CFR 0.152.
 */
package ch.epfl.bbp.uima.filter;

import ch.epfl.bbp.io.LineReader;
import ch.epfl.bbp.uima.types.Keep;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import java.io.FileNotFoundException;
import java.util.Collection;
import java.util.Set;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FrequencyFilterAnnotator
extends JCasAnnotator_ImplBase {
    private static Logger LOG = LoggerFactory.getLogger(FrequencyFilterAnnotator.class);
    public static final String MINIMUM_FREQUENCY = "minimumFrequency";
    @ConfigurationParameter(name="minimumFrequency", description="minimum frequency of token to be retained")
    private int minimumFrequency;
    public static final String MAXIMUM_FREQUENCY = "maximumFrequency";
    @ConfigurationParameter(name="maximumFrequency", defaultValue={"2147483647"}, description="maximum frequency of token to be retained")
    private int maximumFrequency;
    @ConfigurationParameter(name="caseSensitive", defaultValue={"false"}, description="If true, tokens are not normalized to lowercase before string comparisions")
    private boolean caseSensitive;
    @ConfigurationParameter(name="inputFile", description="Path to file containing tokens and their frequency in the corpus")
    private String tokenFrequencyFile;
    private Set<String> normalText = Sets.newHashSet();

    public void initialize(UimaContext context) throws ResourceInitializationException {
        super.initialize(context);
        try {
            for (String line : LineReader.linesFrom((String)this.tokenFrequencyFile)) {
                String[] split = line.split("\t");
                int freq = Integer.parseInt(split[1]);
                if (freq <= this.minimumFrequency || freq >= this.maximumFrequency) continue;
                String txt = this.caseSensitive ? split[0] : split[0].toLowerCase();
                this.normalText.add(txt);
            }
        }
        catch (FileNotFoundException e) {
            throw new ResourceInitializationException((Throwable)e);
        }
        Preconditions.checkArgument((this.maximumFrequency > this.minimumFrequency ? 1 : 0) != 0, (Object)"expected: maximumFrequency>minimumFrequency");
        Preconditions.checkArgument((this.minimumFrequency >= 0 ? 1 : 0) != 0, (Object)"expected: minimumFrequency >= 0");
        if (this.maximumFrequency == 0) {
            LOG.info("FrequencyFilterAnnotator's minimumFrequency set to 0 --> Filter inactive");
        }
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        if (this.minimumFrequency == 0) {
            return;
        }
        Collection keeps = JCasUtil.select((JCas)jCas, Keep.class);
        Keep[] array = keeps.toArray(new Keep[keeps.size()]);
        for (int i = 0; i < array.length; ++i) {
            String txt;
            String string = txt = this.caseSensitive ? array[i].getNormalizedText() : array[i].getNormalizedText().toLowerCase();
            if (this.normalText.contains(txt)) continue;
            array[i].removeFromIndexes();
        }
    }
}

