/*
 * Decompiled with CFR 0.152.
 */
package gate.creole.gazetteer;

import gate.AnnotationSet;
import gate.Factory;
import gate.FeatureMap;
import gate.Resource;
import gate.Utils;
import gate.creole.CustomDuplication;
import gate.creole.ExecutionException;
import gate.creole.ExecutionInterruptedException;
import gate.creole.ResourceInstantiationException;
import gate.creole.gazetteer.AbstractGazetteer;
import gate.creole.gazetteer.FSMState;
import gate.creole.gazetteer.GazetteerList;
import gate.creole.gazetteer.GazetteerNode;
import gate.creole.gazetteer.LinearDefinition;
import gate.creole.gazetteer.LinearNode;
import gate.creole.gazetteer.Lookup;
import gate.creole.gazetteer.MappingNode;
import gate.creole.gazetteer.SharedDefaultGazetteer;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.util.GateRuntimeException;
import gate.util.InvalidOffsetException;
import gate.util.Strings;
import java.io.IOException;
import java.io.Serializable;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

@CreoleResource(name="ANNIE Gazetteer", comment="A list lookup component.", icon="gazetteer", helpURL="http://gate.ac.uk/userguide/sec:annie:gazetteer")
public class DefaultGazetteer
extends AbstractGazetteer
implements CustomDuplication {
    private static final long serialVersionUID = -8976141132455436099L;
    public static final String DEF_GAZ_DOCUMENT_PARAMETER_NAME = "document";
    public static final String DEF_GAZ_ANNOT_SET_PARAMETER_NAME = "annotationSetName";
    public static final String DEF_GAZ_LISTS_URL_PARAMETER_NAME = "listsURL";
    public static final String DEF_GAZ_ENCODING_PARAMETER_NAME = "encoding";
    public static final String DEF_GAZ_CASE_SENSITIVE_PARAMETER_NAME = "caseSensitive";
    public static final String DEF_GAZ_LONGEST_MATCH_ONLY_PARAMETER_NAME = "longestMatchOnly";
    public static final String DEF_GAZ_FEATURE_SEPARATOR_PARAMETER_NAME = "gazetteerFeatureSeparator";
    protected String gazetteerFeatureSeparator;
    protected Map<LinearNode, GazetteerList> listsByNode;
    protected FSMState initialState;
    protected Set<FSMState> fsmStates;

    public Resource init() throws ResourceInstantiationException {
        this.fsmStates = new HashSet<FSMState>();
        this.initialState = new FSMState(this);
        if (this.listsURL == null) {
            throw new ResourceInstantiationException("No URL provided for gazetteer creation!");
        }
        this.definition = new LinearDefinition();
        this.definition.setSeparator(Strings.unescape((String)this.gazetteerFeatureSeparator));
        try {
            this.definition.setURL(this.listsURL.toURL());
        }
        catch (IOException e) {
            throw new ResourceInstantiationException((Exception)e);
        }
        this.definition.load();
        int linesCnt = this.definition.size();
        this.listsByNode = this.definition.loadLists();
        Iterator<LinearNode> inodes = this.definition.iterator();
        int nodeIdx = 0;
        while (inodes.hasNext()) {
            LinearNode node = inodes.next();
            this.fireStatusChanged("Reading " + node.toString());
            this.fireProgressChanged(++nodeIdx * 100 / linesCnt);
            this.readList(node, true);
        }
        this.fireProcessFinished();
        return this;
    }

    protected void readList(LinearNode node, boolean add) throws ResourceInstantiationException {
        MappingNode mnode;
        if (null == node) {
            throw new ResourceInstantiationException(" LinearNode node is null ");
        }
        String listName = node.getList();
        String majorType = node.getMajorType();
        String minorType = node.getMinorType();
        String languages = node.getLanguage();
        String annotationType = node.getAnnotationType();
        GazetteerList gazList = this.listsByNode.get(node);
        if (null == gazList) {
            throw new ResourceInstantiationException("gazetteer list not found by node");
        }
        Iterator<GazetteerNode> iline = gazList.iterator();
        Lookup defaultLookup = new Lookup(listName, majorType, minorType, languages, annotationType);
        defaultLookup.list = node.getList();
        if (null != this.mappingDefinition && null != (mnode = this.mappingDefinition.getNodeByList(defaultLookup.list))) {
            defaultLookup.oClass = mnode.getClassID();
            defaultLookup.ontology = mnode.getOntologyID();
        }
        while (iline.hasNext()) {
            Lookup lookup;
            GazetteerNode gazNode = iline.next();
            String entry = gazNode.getEntry();
            Map<String, Object> features = gazNode.getFeatureMap();
            if (features == null) {
                lookup = defaultLookup;
            } else {
                MappingNode mnode2;
                lookup = new Lookup(listName, majorType, minorType, languages, annotationType);
                lookup.list = node.getList();
                if (null != this.mappingDefinition && null != (mnode2 = this.mappingDefinition.getNodeByList(lookup.list))) {
                    lookup.oClass = mnode2.getClassID();
                    lookup.ontology = mnode2.getOntologyID();
                }
                lookup.features = features;
            }
            if (add) {
                this.addLookup(entry, lookup);
                continue;
            }
            this.removeLookup(entry, lookup);
        }
    }

    public void addLookup(String text, Lookup lookup) {
        FSMState currentState = this.initialState;
        for (int i = 0; i < text.length(); ++i) {
            boolean isSpace;
            char currentChar = text.charAt(i);
            boolean bl = isSpace = Character.isSpaceChar(currentChar) || Character.isWhitespace(currentChar);
            currentChar = isSpace ? (char)' ' : (this.caseSensitive != false ? currentChar : Character.toUpperCase(currentChar));
            FSMState nextState = currentState.next(currentChar);
            if (nextState == null) {
                nextState = new FSMState(this);
                currentState.put(currentChar, nextState);
                if (isSpace) {
                    nextState.put(' ', nextState);
                }
            }
            currentState = nextState;
        }
        currentState.addLookup(lookup);
    }

    public void removeLookup(String text, Lookup lookup) {
        FSMState currentState = this.initialState;
        for (int i = 0; i < text.length(); ++i) {
            FSMState nextState;
            char currentChar = text.charAt(i);
            if (Character.isSpaceChar(currentChar) || Character.isWhitespace(currentChar)) {
                currentChar = ' ';
            }
            if (!this.caseSensitive.booleanValue()) {
                currentChar = Character.toUpperCase(currentChar);
            }
            if ((nextState = currentState.next(currentChar)) == null) {
                return;
            }
            currentState = nextState;
        }
        currentState.removeLookup(lookup);
    }

    public String getFSMgml() {
        String res = "graph[ \ndirected 1\n";
        StringBuffer nodes = new StringBuffer(1024);
        StringBuffer edges = new StringBuffer(1024);
        for (FSMState currentState : this.fsmStates) {
            int stateIndex = currentState.getIndex();
            nodes.append("node[ id ");
            nodes.append(stateIndex);
            nodes.append(" label \"");
            nodes.append(stateIndex);
            if (currentState.isFinal()) {
                nodes.append(",F\\n");
                nodes.append(currentState.getLookupSet());
            }
            nodes.append("\"  ]\n");
            edges.append(currentState.getEdgesGML());
        }
        res = res + nodes.toString() + edges.toString() + "]\n";
        return res;
    }

    public static boolean isWordInternal(char ch) {
        return Character.isLetter(ch) || Character.getType(ch) == 8 || Character.getType(ch) == 6;
    }

    public void execute() throws ExecutionException {
        this.interrupted = false;
        if (this.document == null) {
            throw new ExecutionException("No document to process!");
        }
        AnnotationSet annotationSet = this.annotationSetName == null || this.annotationSetName.equals("") ? this.document.getAnnotations() : this.document.getAnnotations(this.annotationSetName);
        this.fireStatusChanged("Performing look-up in " + this.document.getName() + "...");
        String content = this.document.getContent().toString();
        int length = content.length();
        FSMState currentState = this.initialState;
        FSMState lastMatchingState = null;
        int matchedRegionEnd = 0;
        int matchedRegionStart = 0;
        int charIdx = 0;
        int oldCharIdx = 0;
        while (charIdx < length) {
            char currentChar = content.charAt(charIdx);
            currentChar = Character.isSpaceChar(currentChar) || Character.isWhitespace(currentChar) ? (char)' ' : (this.caseSensitive != false ? currentChar : Character.toUpperCase(currentChar));
            FSMState nextState = currentState.next(currentChar);
            if (nextState == null) {
                if (lastMatchingState != null) {
                    this.createLookups(lastMatchingState, matchedRegionStart, matchedRegionEnd, annotationSet);
                    lastMatchingState = null;
                }
                matchedRegionStart = charIdx = matchedRegionStart + 1;
                currentState = this.initialState;
            } else {
                currentState = nextState;
                if (!(!currentState.isFinal() || this.wholeWordsOnly.booleanValue() && (matchedRegionStart != 0 && DefaultGazetteer.isWordInternal(content.charAt(matchedRegionStart - 1)) || charIdx + 1 < content.length() && DefaultGazetteer.isWordInternal(content.charAt(charIdx + 1))))) {
                    if (!this.longestMatchOnly.booleanValue() && lastMatchingState != null) {
                        this.createLookups(lastMatchingState, matchedRegionStart, matchedRegionEnd, annotationSet);
                    }
                    matchedRegionEnd = charIdx;
                    lastMatchingState = currentState;
                }
                if (++charIdx == content.length()) {
                    if (lastMatchingState != null) {
                        this.createLookups(lastMatchingState, matchedRegionStart, matchedRegionEnd, annotationSet);
                        lastMatchingState = null;
                    }
                    matchedRegionStart = charIdx = matchedRegionStart + 1;
                    currentState = this.initialState;
                }
            }
            if (charIdx - oldCharIdx <= 256) continue;
            this.fireProgressChanged(100 * charIdx / length);
            oldCharIdx = charIdx;
            if (!this.isInterrupted()) continue;
            throw new ExecutionInterruptedException("The execution of the " + this.getName() + " gazetteer has been abruptly interrupted!");
        }
        if (lastMatchingState != null) {
            this.createLookups(lastMatchingState, matchedRegionStart, matchedRegionEnd, annotationSet);
        }
        this.fireProcessFinished();
        this.fireStatusChanged("Look-up complete!");
    }

    protected void createLookups(FSMState matchingState, long matchedRegionStart, long matchedRegionEnd, AnnotationSet annotationSet) {
        for (Lookup currentLookup : matchingState.getLookupSet()) {
            FeatureMap fm = Factory.newFeatureMap();
            fm.put((Object)"majorType", (Object)currentLookup.majorType);
            if (null != currentLookup.oClass && null != currentLookup.ontology) {
                fm.put((Object)"class", (Object)currentLookup.oClass);
                fm.put((Object)"ontology", (Object)currentLookup.ontology);
            }
            if (null != currentLookup.minorType) {
                fm.put((Object)"minorType", (Object)currentLookup.minorType);
            }
            if (null != currentLookup.languages) {
                fm.put((Object)"language", (Object)currentLookup.languages);
            }
            if (null != currentLookup.features) {
                fm.putAll(currentLookup.features);
            }
            try {
                annotationSet.add(Long.valueOf(matchedRegionStart), Long.valueOf(matchedRegionEnd + 1L), currentLookup.annotationType, fm);
            }
            catch (InvalidOffsetException ioe) {
                throw new GateRuntimeException(ioe.toString());
            }
        }
    }

    @Override
    public Set<Lookup> lookup(String singleItem) {
        HashSet<Lookup> set = new HashSet();
        FSMState currentState = this.initialState;
        for (int i = 0; i < singleItem.length(); ++i) {
            FSMState nextState;
            char currentChar = singleItem.charAt(i);
            if (Character.isSpaceChar(currentChar) || Character.isWhitespace(currentChar)) {
                currentChar = ' ';
            }
            if ((nextState = currentState.next(currentChar)) == null) {
                return set;
            }
            currentState = nextState;
        }
        set = currentState.getLookupSet();
        return set;
    }

    @Override
    public boolean remove(String singleItem) {
        FSMState currentState = this.initialState;
        for (int i = 0; i < singleItem.length(); ++i) {
            FSMState nextState;
            char currentChar = singleItem.charAt(i);
            if (Character.isSpaceChar(currentChar) || Character.isWhitespace(currentChar)) {
                currentChar = ' ';
            }
            if (!this.caseSensitive.booleanValue()) {
                currentChar = Character.toUpperCase(currentChar);
            }
            if ((nextState = currentState.next(currentChar)) == null) {
                return false;
            }
            currentState = nextState;
        }
        currentState.lookupSet = new HashSet<Lookup>();
        return true;
    }

    @Override
    public boolean add(String singleItem, Lookup lookup) {
        this.addLookup(singleItem, lookup);
        return true;
    }

    public Resource duplicate(Factory.DuplicationContext ctx) throws ResourceInstantiationException {
        return Factory.createResource((String)SharedDefaultGazetteer.class.getName(), (FeatureMap)Utils.featureMap((Object[])new Object[]{"bootstrapGazetteer", this}), (FeatureMap)Factory.duplicate((FeatureMap)this.getFeatures(), (Factory.DuplicationContext)ctx), (String)this.getName());
    }

    public String getGazetteerFeatureSeparator() {
        return this.gazetteerFeatureSeparator;
    }

    @Optional
    @CreoleParameter(comment="The character used to separate features for entries in gazetteer lists. Accepts strings like &quot;\t&quot; and will unescape it to the relevant character. If not specified, this gazetteer does not support extra features.", defaultValue=":")
    public void setGazetteerFeatureSeparator(String gazetteerFeatureSeparator) {
        this.gazetteerFeatureSeparator = gazetteerFeatureSeparator;
    }

    public static class CharMap
    implements Serializable {
        private static final long serialVersionUID = 4192829422957074447L;
        char[] itemsKeys = null;
        Object[] itemsObjs = null;

        void resize(int index) {
            int newsz = this.itemsKeys.length + 1;
            char[] tempKeys = new char[newsz];
            Object[] tempObjs = new Object[newsz];
            System.arraycopy(this.itemsKeys, 0, tempKeys, 0, index);
            System.arraycopy(this.itemsObjs, 0, tempObjs, 0, index);
            System.arraycopy(this.itemsKeys, index, tempKeys, index + 1, newsz - index - 1);
            System.arraycopy(this.itemsObjs, index, tempObjs, index + 1, newsz - index - 1);
            this.itemsKeys = tempKeys;
            this.itemsObjs = tempObjs;
        }

        Object get(char key) {
            if (this.itemsKeys == null) {
                return null;
            }
            int index = Arrays.binarySearch(this.itemsKeys, key);
            if (index < 0) {
                return null;
            }
            return this.itemsObjs[index];
        }

        Object put(char key, Object value) {
            if (this.itemsKeys == null) {
                this.itemsKeys = new char[1];
                this.itemsKeys[0] = key;
                this.itemsObjs = new Object[1];
                this.itemsObjs[0] = value;
                return value;
            }
            int index = Arrays.binarySearch(this.itemsKeys, key);
            if (index < 0) {
                this.resize(index ^= 0xFFFFFFFF);
                this.itemsKeys[index] = key;
                this.itemsObjs[index] = value;
            }
            return this.itemsObjs[index];
        }
    }

    public static interface Iter {
        public boolean hasNext();

        public char next();
    }
}

