package org.languagetool.tokenizers.es;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
import org.languagetool.JLanguageTool;
import org.languagetool.rules.spelling.morfologik.MorfologikSpeller;
import org.languagetool.tokenizers.WordTokenizer;

/* loaded from: input_file:org/languagetool/tokenizers/es/SpanishWordTokenizer.class */
public class SpanishWordTokenizer extends WordTokenizer {
    private static final String DICT_FILENAME = "/es/es-ES.dict";
    protected MorfologikSpeller speller;
    private static final Pattern DECIMAL_POINT = Pattern.compile("([\\d])\\.([\\d])", 66);
    private static final Pattern DECIMAL_COMMA = Pattern.compile("([\\d]),([\\d])", 66);

    public SpanishWordTokenizer() {
        if (this.speller == null && JLanguageTool.getDataBroker().resourceExists(DICT_FILENAME)) {
            try {
                this.speller = new MorfologikSpeller(DICT_FILENAME);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    }

    public List<String> tokenize(String str) {
        ArrayList arrayList = new ArrayList();
        StringTokenizer stringTokenizer = new StringTokenizer(DECIMAL_COMMA.matcher(DECIMAL_POINT.matcher(str).replaceAll("$1\u0001\u0001CA_DECIMALPOINT\u0001\u0001$2")).replaceAll("$1\u0001\u0001CA_DECIMALCOMMA\u0001\u0001$2"), "  ᅟᅠ\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006 \u2008\u2009\u200a\u200b\u200c\u200d\u200e\u200f–—―\u2028\u2029\u202a\u202b\u202c\u202d\u202e \u205f\u2060\u2061\u2062\u2063\u206a\u206b\u206c\u206d\u206e\u206f\u3000ㅤ\ufeffﾠ\ufff9\ufffa\ufffb,.;()[]{}<>!?:=*#∗×+÷/\\\"'«»„”“‘`’…¿¡\t\n\r", true);
        while (stringTokenizer.hasMoreElements()) {
            arrayList.addAll(wordsToAdd(stringTokenizer.nextToken().replace("\u0001\u0001CA_DECIMALPOINT\u0001\u0001", ".").replace("\u0001\u0001CA_DECIMALCOMMA\u0001\u0001", ",")));
        }
        return joinEMailsAndUrls(arrayList);
    }

    private List<String> wordsToAdd(String str) {
        ArrayList arrayList = new ArrayList();
        synchronized (this) {
            if (!str.isEmpty()) {
                if (!str.contains("-")) {
                    arrayList.add(str);
                } else if (!this.speller.isMisspelled(str.replace("’", "'"))) {
                    arrayList.add(str);
                } else if (str.equalsIgnoreCase("mers-cov") || str.equalsIgnoreCase("mcgraw-hill") || str.equalsIgnoreCase("sars-cov-2") || str.equalsIgnoreCase("sars-cov") || str.equalsIgnoreCase("ph-metre") || str.equalsIgnoreCase("ph-metres")) {
                    arrayList.add(str);
                } else {
                    StringTokenizer stringTokenizer = new StringTokenizer(str, "-", true);
                    while (stringTokenizer.hasMoreElements()) {
                        arrayList.add(stringTokenizer.nextToken());
                    }
                }
            }
        }
        return arrayList;
    }
}
