/*
 * Decompiled with CFR 0.152.
 */
package com.google.refine.clustering.knn;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.FilteredRows;
import com.google.refine.browsing.RowVisitor;
import com.google.refine.clustering.ClusteredEntry;
import com.google.refine.clustering.Clusterer;
import com.google.refine.clustering.ClustererConfig;
import com.google.refine.clustering.knn.DistanceFactory;
import com.google.refine.clustering.knn.SimilarityDistance;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import edu.mit.simile.vicino.clustering.NGramClusterer;
import edu.mit.simile.vicino.clustering.VPTreeClusterer;
import edu.mit.simile.vicino.distances.Distance;
import java.io.Serializable;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class kNNClusterer
extends Clusterer {
    private SimilarityDistance _distance;
    private kNNClustererConfigParameters _params;
    List<Set<Serializable>> _clusters;
    Map<Serializable, Integer> _counts = new HashMap<Serializable, Integer>();
    static final Logger logger = LoggerFactory.getLogger((String)"kNN_clusterer");

    public void initializeFromConfig(Project project, kNNClustererConfig config) {
        super.initializeFromConfig(project, config);
        this._distance = config.getDistance();
        this._params = config.getParameters();
    }

    @Override
    public void computeClusters(Engine engine) {
        BlockingClusteringRowVisitor visitor = new BlockingClusteringRowVisitor(this._distance, this._params);
        FilteredRows filteredRows = engine.getAllFilteredRows();
        filteredRows.accept(this._project, visitor);
        this._clusters = visitor.getClusters();
    }

    protected List<ClusteredEntry> getClusteredEntries(Set<Serializable> s) {
        return s.stream().map(e -> new ClusteredEntry((Serializable)e, this._counts.get(e))).sorted(ClusteredEntry.comparator).collect(Collectors.toList());
    }

    @JsonValue
    public List<List<ClusteredEntry>> getJsonRepresentation() {
        return this._clusters.stream().filter(m -> m.size() > 1).map(m -> this.getClusteredEntries((Set<Serializable>)m)).collect(Collectors.toList());
    }

    private void count(Serializable s) {
        if (this._counts.containsKey(s)) {
            this._counts.put(s, this._counts.get(s) + 1);
        } else {
            this._counts.put(s, 1);
        }
    }

    public static class kNNClustererConfig
    extends ClustererConfig {
        @JsonIgnore
        private String _distanceStr;
        @JsonIgnore
        private SimilarityDistance _distance;
        @JsonIgnore
        private kNNClustererConfigParameters _parameters = null;

        @JsonIgnore
        public SimilarityDistance getDistance() {
            return this._distance;
        }

        @JsonProperty(value="function")
        public void setDistance(String distanceStr) {
            this._distanceStr = distanceStr;
            this._distance = DistanceFactory.get(this._distanceStr.toLowerCase());
        }

        @JsonProperty(value="function")
        public String getDistanceStr() {
            return this._distanceStr;
        }

        @JsonProperty(value="params")
        public kNNClustererConfigParameters getParameters() {
            return this._parameters;
        }

        @JsonProperty(value="params")
        public void setParameters(kNNClustererConfigParameters params) {
            this._parameters = params;
        }

        @Override
        public kNNClusterer apply(Project project) {
            kNNClusterer clusterer = new kNNClusterer();
            clusterer.initializeFromConfig(project, this);
            return clusterer;
        }

        @Override
        public String getType() {
            return "knn";
        }
    }

    public static class kNNClustererConfigParameters {
        public static final double defaultRadius = 1.0;
        public static final int defaultBlockingNgramSize = 6;
        @JsonProperty(value="radius")
        public double radius = 1.0;
        @JsonProperty(value="blocking-ngram-size")
        public int blockingNgramSize = 6;
    }

    class BlockingClusteringRowVisitor
    implements RowVisitor {
        SimilarityDistance _distance;
        double _radius = 1.0;
        int _blockingNgramSize = 6;
        HashSet<String> _data;
        NGramClusterer _clusterer;

        public BlockingClusteringRowVisitor(SimilarityDistance _distance2, kNNClustererConfigParameters params) {
            this._distance = _distance2;
            this._data = new HashSet();
            this._blockingNgramSize = params.blockingNgramSize;
            this._radius = params.radius;
            this._clusterer = new NGramClusterer((Distance)new DistanceWrapper(this._distance), this._blockingNgramSize);
        }

        @Override
        public void start(Project project) {
        }

        @Override
        public void end(Project project) {
        }

        @Override
        public boolean visit(Project project, int rowIndex, Row row) {
            Cell cell = row.getCell(kNNClusterer.this._colindex);
            if (cell != null && cell.value != null) {
                Serializable v = cell.value;
                String s = v instanceof String ? (String)((Object)v) : v.toString().intern();
                this._clusterer.populate(s);
                kNNClusterer.this.count((Serializable)((Object)s));
            }
            return false;
        }

        public List<Set<Serializable>> getClusters() {
            return this._clusterer.getClusters(this._radius);
        }

        private class DistanceWrapper
        extends Distance {
            private final SimilarityDistance _d;

            protected DistanceWrapper(SimilarityDistance d) {
                this._d = d;
            }

            public double d(String arg0, String arg1) {
                return this._d.compute(arg0, arg1);
            }
        }
    }

    public static class ValuesComparator
    implements Comparator<Map.Entry<Serializable, Integer>>,
    Serializable {
        private static final long serialVersionUID = 204469656070583155L;

        @Override
        public int compare(Map.Entry<Serializable, Integer> o1, Map.Entry<Serializable, Integer> o2) {
            return o2.getValue() - o1.getValue();
        }
    }

    class VPTreeClusteringRowVisitor
    implements RowVisitor {
        Distance _distance;
        kNNClustererConfigParameters _params;
        VPTreeClusterer _clusterer;

        public VPTreeClusteringRowVisitor(Distance d, kNNClustererConfigParameters params) {
            this._distance = d;
            this._clusterer = new VPTreeClusterer(this._distance);
            this._params = params;
        }

        @Override
        public void start(Project project) {
        }

        @Override
        public void end(Project project) {
        }

        @Override
        public boolean visit(Project project, int rowIndex, Row row) {
            Cell cell = row.getCell(kNNClusterer.this._colindex);
            if (cell != null && cell.value != null) {
                Serializable v = cell.value;
                String s = v instanceof String ? (String)((Object)v) : v.toString();
                this._clusterer.populate(s);
                kNNClusterer.this.count((Serializable)((Object)s));
            }
            return false;
        }

        public List<Set<Serializable>> getClusters() {
            return this._clusterer.getClusters(this._params.radius);
        }
    }
}

