package edu.stanford.nlp.patterns.surface;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.patterns.CandidatePhrase;
import edu.stanford.nlp.patterns.ConstantsAndVariables;
import edu.stanford.nlp.patterns.DataInstance;
import edu.stanford.nlp.patterns.PatternFactory;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import edu.stanford.nlp.util.ArgumentParser;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Triple;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

/* loaded from: input_file:edu/stanford/nlp/patterns/surface/SurfacePatternFactory.class */
public class SurfacePatternFactory extends PatternFactory {

    @ArgumentParser.Option(name = "usePOS4Pattern")
    public static boolean usePOS4Pattern = true;

    @ArgumentParser.Option(name = "useCoarsePOS")
    public static boolean useCoarsePOS = true;

    @ArgumentParser.Option(name = "addPatWithoutPOS")
    public static boolean addPatWithoutPOS = true;

    @ArgumentParser.Option(name = "minWindow4Pattern")
    public static int minWindow4Pattern = 2;

    @ArgumentParser.Option(name = "maxWindow4Pattern")
    public static int maxWindow4Pattern = 4;

    @ArgumentParser.Option(name = "usePreviousContext")
    public static boolean usePreviousContext = true;

    @ArgumentParser.Option(name = "useNextContext")
    public static boolean useNextContext = false;

    @ArgumentParser.Option(name = "numMinStopWordsToAdd")
    public static int numMinStopWordsToAdd = 3;

    @ArgumentParser.Option(name = "useTargetParserParentRestriction")
    public static boolean useTargetParserParentRestriction = false;

    @ArgumentParser.Option(name = "useContextNERRestriction")
    public static boolean useContextNERRestriction = false;

    @ArgumentParser.Option(name = "useFillerWordsInPat")
    public static boolean useFillerWordsInPat = true;
    static Token fw;
    static Token sw;

    /* loaded from: input_file:edu/stanford/nlp/patterns/surface/SurfacePatternFactory$Genre.class */
    public enum Genre {
        PREV,
        NEXT,
        PREVNEXT
    }

    public static void setUp(Properties properties) {
        ArgumentParser.fillOptions((Class<?>) PatternFactory.class, properties);
        ArgumentParser.fillOptions((Class<?>) SurfacePatternFactory.class, properties);
        ArgumentParser.fillOptions((Class<?>) SurfacePattern.class, properties);
        if (!addPatWithoutPOS && !usePOS4Pattern) {
            throw new RuntimeException("addPatWithoutPOS and usePOS4Pattern both cannot be false ");
        }
        fw = new Token(PatternFactory.PatternType.SURFACE);
        if (useFillerWordsInPat) {
            fw.setEnvBindRestriction("$FILLER");
            fw.setNumOcc(0, 2);
        }
        sw = new Token(PatternFactory.PatternType.SURFACE);
        if (useStopWordsBeforeTerm) {
            sw.setEnvBindRestriction("$STOPWORD");
            sw.setNumOcc(0, 2);
        }
    }

    public static Set<SurfacePattern> getContext(List<CoreLabel> list, int i, Set<CandidatePhrase> set) {
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        HashSet hashSet3 = new HashSet();
        CoreLabel coreLabel = list.get(i);
        String str = null;
        if (usePOS4Pattern) {
            String tag = coreLabel.tag();
            str = useCoarsePOS ? tag.substring(0, Math.min(tag.length(), 2)) : tag;
        }
        String str2 = (String) coreLabel.get(CoreAnnotations.NamedEntityTagAnnotation.class);
        for (int i2 = 1; i2 <= maxWindow4Pattern; i2++) {
            ArrayList arrayList = new ArrayList();
            ArrayList arrayList2 = new ArrayList();
            ArrayList<String> arrayList3 = new ArrayList();
            ArrayList<Token> arrayList4 = new ArrayList();
            int i3 = 0;
            int i4 = 0;
            int i5 = 0;
            int i6 = 0;
            boolean z = false;
            boolean z2 = false;
            PatternToken patternToken = addPatWithoutPOS ? new PatternToken(str, false, numWordsCompoundMax > 1, numWordsCompoundMax, str2, useTargetNERRestriction, useTargetParserParentRestriction, (String) coreLabel.get(CoreAnnotations.GrandparentAnnotation.class)) : null;
            PatternToken patternToken2 = usePOS4Pattern ? new PatternToken(str, true, numWordsCompoundMax > 1, numWordsCompoundMax, str2, useTargetNERRestriction, useTargetParserParentRestriction, (String) coreLabel.get(CoreAnnotations.GrandparentAnnotation.class)) : null;
            if (usePreviousContext) {
                int i7 = i - 1;
                int i8 = 0;
                while (true) {
                    if (i8 >= i2 || i7 < 0) {
                        break;
                    }
                    CoreLabel coreLabel2 = list.get(i7);
                    String lemma = useLemmaContextTokens ? coreLabel2.lemma() : coreLabel2.word();
                    if (useFillerWordsInPat && fillerWords.contains(coreLabel2.word().toLowerCase())) {
                        i7--;
                    } else {
                        Triple<Boolean, Token, String> contextTokenStr = getContextTokenStr(coreLabel2);
                        boolean booleanValue = contextTokenStr.first.booleanValue();
                        Token token = contextTokenStr.second;
                        String str3 = contextTokenStr.third;
                        if (!booleanValue) {
                            arrayList.add(0, token);
                            arrayList2.add(0, str3);
                            i6++;
                        } else {
                            if (coreLabel2.word().startsWith("http")) {
                                z = false;
                                arrayList.clear();
                                arrayList2.clear();
                                break;
                            }
                            arrayList.add(0, SurfacePattern.getContextToken(coreLabel2));
                            arrayList2.add(0, lemma);
                            if (doNotUse(lemma, set)) {
                                i3++;
                            } else {
                                i6++;
                            }
                        }
                        i8++;
                        i7--;
                    }
                }
            }
            if (useNextContext) {
                int i9 = 0;
                int i10 = i + 1;
                while (true) {
                    if (i9 >= i2 || i10 >= list.size()) {
                        break;
                    }
                    CoreLabel coreLabel3 = list.get(i10);
                    String lemma2 = useLemmaContextTokens ? coreLabel3.lemma() : coreLabel3.word();
                    if (useFillerWordsInPat && fillerWords.contains(coreLabel3.word().toLowerCase())) {
                        i10++;
                    } else {
                        Triple<Boolean, Token, String> contextTokenStr2 = getContextTokenStr(coreLabel3);
                        boolean booleanValue2 = contextTokenStr2.first.booleanValue();
                        Token token2 = contextTokenStr2.second;
                        String str4 = contextTokenStr2.third;
                        if (!booleanValue2) {
                            i5++;
                            arrayList4.add(token2);
                            arrayList3.add(str4);
                        } else {
                            if (coreLabel3.word().startsWith("http")) {
                                z2 = false;
                                arrayList4.clear();
                                arrayList3.clear();
                                break;
                            }
                            arrayList4.add(SurfacePattern.getContextToken(coreLabel3));
                            arrayList3.add(lemma2);
                            if (doNotUse(lemma2, set)) {
                                i4++;
                            } else {
                                i5++;
                            }
                        }
                        i10++;
                        i9++;
                    }
                }
            }
            Token[] tokenArr = null;
            if (arrayList.size() >= minWindow4Pattern && (i6 > 0 || i3 > numMinStopWordsToAdd)) {
                ArrayList arrayList5 = new ArrayList();
                ArrayList arrayList6 = new ArrayList();
                Iterator it = arrayList.iterator();
                while (it.hasNext()) {
                    arrayList5.add((Token) it.next());
                    if (!fw.isEmpty()) {
                        arrayList5.add(fw);
                    }
                }
                Iterator it2 = arrayList2.iterator();
                while (it2.hasNext()) {
                    arrayList6.add((String) it2.next());
                    if (!fw.isEmpty()) {
                        arrayList6.add(" FW ");
                    }
                }
                if (!sw.isEmpty()) {
                    arrayList5.add(sw);
                    arrayList6.add(" SW ");
                }
                if (isASCII(StringUtils.join(arrayList6))) {
                    tokenArr = (Token[]) arrayList5.toArray(new Token[0]);
                    if (arrayList.size() >= minWindow4Pattern) {
                        if (patternToken != null) {
                            hashSet.add(new SurfacePattern(tokenArr, patternToken, null, Genre.PREV));
                        }
                        if (patternToken2 != null) {
                            hashSet.add(new SurfacePattern(tokenArr, patternToken2, null, Genre.PREV));
                        }
                    }
                    z = true;
                }
            }
            Token[] tokenArr2 = null;
            if (arrayList4.size() > 0 && (i5 > 0 || i4 > numMinStopWordsToAdd)) {
                ArrayList arrayList7 = new ArrayList();
                ArrayList arrayList8 = new ArrayList();
                if (!sw.isEmpty()) {
                    arrayList7.add(sw);
                    arrayList8.add(" SW ");
                }
                for (Token token3 : arrayList4) {
                    if (!fw.isEmpty()) {
                        arrayList7.add(fw);
                    }
                    arrayList7.add(token3);
                }
                for (String str5 : arrayList3) {
                    if (!fw.isEmpty()) {
                        arrayList8.add(" FW ");
                    }
                    arrayList8.add(str5);
                }
                if (arrayList4.size() >= minWindow4Pattern) {
                    tokenArr2 = (Token[]) arrayList7.toArray(new Token[0]);
                    if (patternToken != null) {
                        hashSet2.add(new SurfacePattern(null, patternToken, tokenArr2, Genre.NEXT));
                    }
                    if (patternToken2 != null) {
                        hashSet2.add(new SurfacePattern(null, patternToken2, tokenArr2, Genre.NEXT));
                    }
                }
                z2 = true;
            }
            if (z && z2 && arrayList.size() + arrayList4.size() >= minWindow4Pattern) {
                if (patternToken != null) {
                    hashSet3.add(new SurfacePattern(tokenArr, patternToken, tokenArr2, Genre.PREVNEXT));
                }
                if (patternToken2 != null) {
                    hashSet3.add(new SurfacePattern(tokenArr, patternToken2, tokenArr2, Genre.PREVNEXT));
                }
            }
        }
        return CollectionUtils.unionAsSet(hashSet, hashSet2, hashSet3);
    }

    static Triple<Boolean, Token, String> getContextTokenStr(CoreLabel coreLabel) {
        String str;
        Token token = new Token(PatternFactory.PatternType.SURFACE);
        String str2 = "";
        boolean z = true;
        for (Map.Entry<String, Class> entry : ConstantsAndVariables.getGeneralizeClasses().entrySet()) {
            if (!coreLabel.containsKey(entry.getValue()) || coreLabel.get(entry.getValue()) == null) {
                throw new RuntimeException(" Why does the token not have the class " + entry.getValue() + " set? Existing classes " + coreLabel.toString(CoreLabel.OutputFormat.ALL));
            }
            if (!coreLabel.get(entry.getValue()).equals(ConstantsAndVariables.backgroundSymbol)) {
                z = false;
                str2 = str2.isEmpty() ? entry.getKey() : str2 + "|" + entry.getKey();
                token.addORRestriction(entry.getValue(), entry.getKey());
            }
        }
        if (useContextNERRestriction && (str = (String) coreLabel.get(CoreAnnotations.NamedEntityTagAnnotation.class)) != null && !str.equals(SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL)) {
            z = false;
            str2 = str2.isEmpty() ? str : str2 + "|" + str;
            token.addORRestriction(CoreAnnotations.NamedEntityTagAnnotation.class, str);
        }
        return new Triple<>(Boolean.valueOf(z), token, str2);
    }

    public static boolean isASCII(String str) {
        Charset forName = Charset.forName("US-ASCII");
        return new String(str.getBytes(forName), forName).equals(str);
    }

    public static Map<Integer, Set> getPatternsAroundTokens(DataInstance dataInstance, Set<CandidatePhrase> set) {
        HashMap hashMap = new HashMap();
        List<CoreLabel> tokens = dataInstance.getTokens();
        for (int i = 0; i < tokens.size(); i++) {
            hashMap.put(Integer.valueOf(i), new HashSet());
            if (!PatternFactory.doNotUse(tokens.get(i).word(), set)) {
                hashMap.put(Integer.valueOf(i), getContext(dataInstance.getTokens(), i, set));
            }
        }
        return hashMap;
    }
}
