/*
 * Decompiled with CFR 0.152.
 */
package org.apache.lucene.wikipedia.analysis;

import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.Set;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.wikipedia.analysis.WikipediaTokenizerImpl;

public class WikipediaTokenizer
extends Tokenizer {
    public static final String INTERNAL_LINK = "il";
    public static final String EXTERNAL_LINK = "el";
    public static final String EXTERNAL_LINK_URL = "elu";
    public static final String CITATION = "ci";
    public static final String CATEGORY = "c";
    public static final String BOLD = "b";
    public static final String ITALICS = "i";
    public static final String BOLD_ITALICS = "bi";
    public static final String HEADING = "h";
    public static final String SUB_HEADING = "sh";
    public static final int ALPHANUM_ID = 0;
    public static final int APOSTROPHE_ID = 1;
    public static final int ACRONYM_ID = 2;
    public static final int COMPANY_ID = 3;
    public static final int EMAIL_ID = 4;
    public static final int HOST_ID = 5;
    public static final int NUM_ID = 6;
    public static final int CJ_ID = 7;
    public static final int INTERNAL_LINK_ID = 8;
    public static final int EXTERNAL_LINK_ID = 9;
    public static final int CITATION_ID = 10;
    public static final int CATEGORY_ID = 11;
    public static final int BOLD_ID = 12;
    public static final int ITALICS_ID = 13;
    public static final int BOLD_ITALICS_ID = 14;
    public static final int HEADING_ID = 15;
    public static final int SUB_HEADING_ID = 16;
    public static final int EXTERNAL_LINK_URL_ID = 17;
    public static final String[] TOKEN_TYPES;
    public static final String[] tokenImage;
    public static final int TOKENS_ONLY = 0;
    public static final int UNTOKENIZED_ONLY = 1;
    public static final int BOTH = 2;
    public static final int UNTOKENIZED_TOKEN_FLAG = 1;
    private final WikipediaTokenizerImpl scanner;
    private int tokenOutput = 0;
    private Set untokenizedTypes = Collections.EMPTY_SET;
    private Iterator tokens = null;
    static final /* synthetic */ boolean $assertionsDisabled;

    void setInput(Reader reader) {
        this.input = reader;
    }

    public WikipediaTokenizer(Reader input) {
        this(input, 0, Collections.EMPTY_SET);
    }

    public WikipediaTokenizer(Reader input, int tokenOutput, Set untokenizedTypes) {
        super(input);
        this.tokenOutput = tokenOutput;
        this.scanner = new WikipediaTokenizerImpl(input);
        this.untokenizedTypes = untokenizedTypes;
    }

    public Token next(Token reusableToken) throws IOException {
        if (!$assertionsDisabled && reusableToken == null) {
            throw new AssertionError();
        }
        if (this.tokens != null && this.tokens.hasNext()) {
            return (Token)this.tokens.next();
        }
        int tokenType = this.scanner.getNextToken();
        if (tokenType == -1) {
            return null;
        }
        String type = WikipediaTokenizerImpl.TOKEN_TYPES[tokenType];
        if (this.tokenOutput == 0 || !this.untokenizedTypes.contains(type)) {
            this.setupToken(reusableToken);
        } else if (this.tokenOutput == 1 && this.untokenizedTypes.contains(type)) {
            this.collapseTokens(reusableToken, tokenType);
        } else if (this.tokenOutput == 2) {
            this.collapseAndSaveTokens(reusableToken, tokenType, type);
        }
        reusableToken.setPositionIncrement(this.scanner.getPositionIncrement());
        reusableToken.setType(type);
        return reusableToken;
    }

    private void collapseAndSaveTokens(Token reusableToken, int tokenType, String type) throws IOException {
        int tmpTokType;
        StringBuffer buffer = new StringBuffer(32);
        int numAdded = this.scanner.setText(buffer);
        int theStart = this.scanner.yychar();
        int lastPos = theStart + numAdded;
        ArrayList<Token> tmp = new ArrayList<Token>();
        Token saved = new Token();
        this.setupSavedToken(saved, 0, type);
        tmp.add(saved);
        for (int numSeen = 0; (tmpTokType = this.scanner.getNextToken()) != -1 && tmpTokType == tokenType && this.scanner.getNumWikiTokensSeen() > numSeen; ++numSeen) {
            int currPos = this.scanner.yychar();
            for (int i = 0; i < currPos - lastPos; ++i) {
                buffer.append(' ');
            }
            numAdded = this.scanner.setText(buffer);
            saved = new Token();
            this.setupSavedToken(saved, this.scanner.getPositionIncrement(), type);
            tmp.add(saved);
            lastPos = currPos + numAdded;
        }
        String s = buffer.toString().trim();
        reusableToken.setTermBuffer(s.toCharArray(), 0, s.length());
        reusableToken.setStartOffset(theStart);
        reusableToken.setEndOffset(theStart + s.length());
        reusableToken.setFlags(1);
        if (tmpTokType != -1) {
            this.scanner.yypushback(this.scanner.yylength());
        }
        this.tokens = tmp.iterator();
    }

    private void setupSavedToken(Token saved, int positionInc, String type) {
        this.setupToken(saved);
        saved.setPositionIncrement(positionInc);
        saved.setType(type);
    }

    private void collapseTokens(Token reusableToken, int tokenType) throws IOException {
        int tmpTokType;
        StringBuffer buffer = new StringBuffer(32);
        int numAdded = this.scanner.setText(buffer);
        int theStart = this.scanner.yychar();
        int lastPos = theStart + numAdded;
        for (int numSeen = 0; (tmpTokType = this.scanner.getNextToken()) != -1 && tmpTokType == tokenType && this.scanner.getNumWikiTokensSeen() > numSeen; ++numSeen) {
            int currPos = this.scanner.yychar();
            for (int i = 0; i < currPos - lastPos; ++i) {
                buffer.append(' ');
            }
            numAdded = this.scanner.setText(buffer);
            lastPos = currPos + numAdded;
        }
        String s = buffer.toString().trim();
        reusableToken.setTermBuffer(s.toCharArray(), 0, s.length());
        reusableToken.setStartOffset(theStart);
        reusableToken.setEndOffset(theStart + s.length());
        reusableToken.setFlags(1);
        if (tmpTokType != -1) {
            this.scanner.yypushback(this.scanner.yylength());
        } else {
            this.tokens = null;
        }
    }

    private void setupToken(Token reusableToken) {
        this.scanner.getText(reusableToken);
        int start = this.scanner.yychar();
        reusableToken.setStartOffset(start);
        reusableToken.setEndOffset(start + reusableToken.termLength());
    }

    public void reset() throws IOException {
        super.reset();
        this.scanner.yyreset(this.input);
    }

    public void reset(Reader reader) throws IOException {
        this.input = reader;
        this.reset();
    }

    static {
        $assertionsDisabled = !WikipediaTokenizer.class.desiredAssertionStatus();
        TOKEN_TYPES = new String[]{"<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<CJ>", INTERNAL_LINK, EXTERNAL_LINK, CITATION, CATEGORY, BOLD, ITALICS, BOLD_ITALICS, HEADING, SUB_HEADING, EXTERNAL_LINK_URL};
        tokenImage = TOKEN_TYPES;
    }
}

