package org.languagetool.tokenizers.es;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.languagetool.tagging.es.SpanishTagger;
import org.languagetool.tokenizers.WordTokenizer;

/* loaded from: input_file:org/languagetool/tokenizers/es/SpanishWordTokenizer.class */
public class SpanishWordTokenizer extends WordTokenizer {
    private static final String wordCharacters = "§©@€£\\$_\\p{L}\\d·\\-̀-ͯ¨⁰-\u209f°%‰‱&�\u00ad¬";
    private static final Pattern tokenizerPattern = Pattern.compile("[§©@€£\\$_\\p{L}\\d·\\-̀-ͯ¨⁰-\u209f°%‰‱&�\u00ad¬]+|[^§©@€£\\$_\\p{L}\\d·\\-̀-ͯ¨⁰-\u209f°%‰‱&�\u00ad¬]");
    private static final Pattern DECIMAL_POINT = Pattern.compile("([\\d])\\.([\\d])", 66);
    private static final Pattern DECIMAL_COMMA = Pattern.compile("([\\d]),([\\d])", 66);
    private static final Pattern ORDINAL_POINT = Pattern.compile("\\b([\\d]+)\\.(º|ª|o|a|er|os|as)\\b", 66);
    private static final Pattern PATTERN_1 = Pattern.compile("xxES_DECIMAL_POINTxx", 16);
    private static final Pattern PATTERN_2 = Pattern.compile("xxES_DECIMAL_COMMAxx", 16);
    private static final Pattern PATTERN_3 = Pattern.compile("xxES_ORDINAL_POINTxx", 16);
    private static final Pattern SOFT_HYPHEN = Pattern.compile("\u00ad", 16);

    public List<String> tokenize(String str) {
        ArrayList arrayList = new ArrayList();
        Matcher matcher = tokenizerPattern.matcher(ORDINAL_POINT.matcher(DECIMAL_COMMA.matcher(DECIMAL_POINT.matcher(str.replace((char) 8208, '-').replace((char) 8209, '-')).replaceAll("$1xxES_DECIMAL_POINTxx$2")).replaceAll("$1xxES_DECIMAL_COMMAxx$2")).replaceAll("$1xxES_ORDINAL_POINTxx$2"));
        while (matcher.find()) {
            String group = matcher.group();
            if (arrayList.size() <= 0 || group.length() != 1 || group.codePointAt(0) < 65024 || group.codePointAt(0) > 65039) {
                arrayList.addAll(wordsToAdd(PATTERN_3.matcher(PATTERN_2.matcher(PATTERN_1.matcher(group).replaceAll(".")).replaceAll(",")).replaceAll(".")));
            } else {
                arrayList.set(arrayList.size() - 1, ((String) arrayList.get(arrayList.size() - 1)) + group);
            }
        }
        return joinEMailsAndUrls(arrayList);
    }

    private List<String> wordsToAdd(String str) {
        ArrayList arrayList = new ArrayList();
        synchronized (this) {
            if (!str.isEmpty()) {
                if (!str.contains("-")) {
                    arrayList.add(str);
                } else if (SpanishTagger.INSTANCE.tag(Arrays.asList(SOFT_HYPHEN.matcher(str).replaceAll("").replace((char) 8217, '\''))).get(0).isTagged()) {
                    arrayList.add(str);
                } else if (str.equalsIgnoreCase("mers-cov") || str.equalsIgnoreCase("mcgraw-hill") || str.equalsIgnoreCase("sars-cov-2") || str.equalsIgnoreCase("sars-cov") || str.equalsIgnoreCase("ph-metre") || str.equalsIgnoreCase("ph-metres")) {
                    arrayList.add(str);
                } else {
                    StringTokenizer stringTokenizer = new StringTokenizer(str, "-", true);
                    while (stringTokenizer.hasMoreElements()) {
                        arrayList.add(stringTokenizer.nextToken());
                    }
                }
            }
        }
        return arrayList;
    }
}
