/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.dev.archive;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
import morfologik.fsa.FSA;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.JLanguageTool;
import org.languagetool.languagemodel.LuceneLanguageModel;
import org.languagetool.tagging.de.GermanTagger;

public class MissingGenitiveFinder {
    private static final String DICT_FILENAME = "/de/german.dict";
    private static final int THRESHOLD = 50;
    private final LuceneLanguageModel lm = new LuceneLanguageModel(new File("/home/dnaber/data/google-ngram-index/de"));

    private MissingGenitiveFinder() {
    }

    private void run() throws IOException {
        GermanTagger tagger = new GermanTagger();
        FSA fsa = FSA.read((InputStream)JLanguageTool.getDataBroker().getFromResourceDirAsStream(DICT_FILENAME));
        int i = 0;
        for (ByteBuffer buffer : fsa) {
            byte[] sequence = new byte[buffer.remaining()];
            buffer.get(sequence);
            String output = new String(sequence, StandardCharsets.UTF_8);
            boolean isNoun = output.contains("SUB:") || output.contains("EIG:") && output.contains("COU");
            if (!isNoun || !output.contains(":GEN:")) continue;
            String[] parts = output.split("_");
            String word = parts[0];
            String esWord = parts[0].replaceFirst("s$", "es");
            if (!this.isRelevantWord(word)) continue;
            boolean hasEsGenitive = this.hasEsGenitive(tagger, word);
            boolean ignore1 = word.endsWith("els") && !word.endsWith("iels");
            long occurrences = this.lm.getCount(esWord);
            if (hasEsGenitive || ignore1 || occurrences < 50L) continue;
            System.out.println(esWord + "\t" + word.replaceFirst("s$", "") + "\t" + parts[2]);
            ++i;
        }
    }

    private boolean isRelevantWord(String word) {
        return word.endsWith("s") && !word.endsWith("es") && !word.endsWith("ens") && !word.endsWith("ems") && !word.endsWith("els") && !word.endsWith("ers") && !word.endsWith("lings") && !word.endsWith("leins") && !word.endsWith("chens") && !word.endsWith("erns") && !word.endsWith("elns") && !word.endsWith("os") && !word.endsWith("us") && !word.endsWith("is") && !word.endsWith("as") && !word.endsWith("ols");
    }

    private boolean hasEsGenitive(GermanTagger tagger, String word) throws IOException {
        String esForm = word.replaceFirst("s$", "es");
        List readings = tagger.tag(Collections.singletonList(esForm));
        for (AnalyzedTokenReadings reading : readings) {
            if (!reading.isTagged()) continue;
            return true;
        }
        return false;
    }

    public static void main(String[] args) throws IOException {
        MissingGenitiveFinder prg = new MissingGenitiveFinder();
        prg.run();
    }
}

