/*
 * Decompiled with CFR 0.152.
 */
package com.hazelcast.jet.examples.tfidf;

import com.hazelcast.jet.Util;
import com.hazelcast.jet.examples.tfidf.SearchGui;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class TfIdfJdkStreams {
    static final Pattern DELIMITER = Pattern.compile("\\W+");
    private Set<String> stopwords;
    private Map<String, List<Map.Entry<String, Double>>> invertedIndex;
    private Set<String> docIds;

    public static void main(String[] args) {
        new TfIdfJdkStreams().go();
    }

    private void go() {
        this.stopwords = TfIdfJdkStreams.readStopwords();
        this.docIds = TfIdfJdkStreams.buildDocumentInventory();
        long start = System.nanoTime();
        this.buildInvertedIndex();
        System.out.println("Done in " + TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start) + " milliseconds.");
        new SearchGui(this.invertedIndex, this.stopwords);
    }

    private void buildInvertedIndex() {
        double logDocCount = Math.log(this.docIds.size());
        Stream docWords = this.docIds.parallelStream().flatMap(TfIdfJdkStreams::docLines).flatMap(this::tokenize);
        System.out.println("Building TF");
        Map tfMap = docWords.collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
        System.out.println("Building inverted index");
        this.invertedIndex = tfMap.entrySet().parallelStream().collect(Collectors.groupingBy(e -> (String)((Map.Entry)e.getKey()).getValue(), Collectors.collectingAndThen(Collectors.toList(), entries -> {
            double idf = logDocCount - Math.log(entries.size());
            return entries.stream().map(e -> TfIdfJdkStreams.tfidfEntry(e, idf)).collect(Collectors.toList());
        })));
    }

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    private static Set<String> readStopwords() {
        try (BufferedReader r = TfIdfJdkStreams.resourceReader("stopwords.txt");){
            Set<String> set = r.lines().collect(Collectors.toSet());
            return set;
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    static Set<String> buildDocumentInventory() {
        try (BufferedReader r = TfIdfJdkStreams.resourceReader("books");){
            System.out.println("These books will be indexed:");
            Set<String> set = r.lines().peek(System.out::println).collect(Collectors.toSet());
            return set;
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    static Stream<Map.Entry<String, String>> docLines(String name) {
        try {
            return Files.lines(Paths.get(TfIdfJdkStreams.class.getResource("/books/" + name).toURI())).map(String::toLowerCase).map(line -> Util.entry((Object)name, (Object)line));
        }
        catch (IOException | URISyntaxException e) {
            throw new RuntimeException(e);
        }
    }

    private static BufferedReader resourceReader(String resourceName) {
        ClassLoader cl = TfIdfJdkStreams.class.getClassLoader();
        return new BufferedReader(new InputStreamReader(cl.getResourceAsStream(resourceName), StandardCharsets.UTF_8));
    }

    private Stream<Map.Entry<String, String>> tokenize(Map.Entry<String, String> docLine) {
        return Arrays.stream(DELIMITER.split(docLine.getValue())).filter(token -> !token.isEmpty()).filter(token -> !this.stopwords.contains(token)).map(word -> Util.entry(docLine.getKey(), (Object)word));
    }

    private static Map.Entry<String, Double> tfidfEntry(Map.Entry<Map.Entry<String, String>, Long> tfEntry, Double idf) {
        Long tf = tfEntry.getValue();
        return Util.entry((Object)tfEntry.getKey().getKey(), (Object)((double)tf.longValue() * idf));
    }
}

