/*
 * Decompiled with CFR 0.152.
 */
package gate.html;

import gate.AnnotationSet;
import gate.Document;
import gate.Factory;
import gate.FeatureMap;
import gate.Gate;
import gate.corpora.DocumentContentImpl;
import gate.corpora.RepositioningInfo;
import gate.event.StatusListener;
import gate.util.Err;
import gate.util.InvalidOffsetException;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.Stack;
import org.apache.xerces.xni.Augmentations;
import org.apache.xerces.xni.NamespaceContext;
import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLAttributes;
import org.apache.xerces.xni.XMLDocumentHandler;
import org.apache.xerces.xni.XMLLocator;
import org.apache.xerces.xni.XMLResourceIdentifier;
import org.apache.xerces.xni.XMLString;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLDocumentSource;
import org.apache.xerces.xni.parser.XMLErrorHandler;
import org.apache.xerces.xni.parser.XMLParseException;
import org.cyberneko.html.HTMLEventInfo;

public class NekoHtmlDocumentHandler
implements XMLDocumentHandler,
XMLErrorHandler {
    private static final boolean DEBUG = false;
    private static final boolean DEBUG_GENERAL = false;
    private static final boolean DEBUG_ELEMENTS = false;
    private static final boolean DEBUG_CHARACTERS = false;
    private static final boolean DEBUG_UNUSED = false;
    public static final String AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations";
    private static final Comparator<Object> POSITION_INFO_COMPARATOR = new Comparator<Object>(){

        @Override
        public int compare(Object a, Object b) {
            Long offA = null;
            if (a instanceof Long) {
                offA = (Long)a;
            } else if (a instanceof RepositioningInfo.PositionInfo) {
                offA = ((RepositioningInfo.PositionInfo)a).getOriginalPosition();
            }
            Long offB = null;
            if (b instanceof Long) {
                offB = (Long)b;
            } else if (b instanceof RepositioningInfo.PositionInfo) {
                offB = ((RepositioningInfo.PositionInfo)a).getOriginalPosition();
            }
            return offA.compareTo(offB);
        }
    };
    private RepositioningInfo reposInfo = null;
    private RepositioningInfo ampCodingInfo = null;
    private Set<String> ignorableTags = null;
    int ignorableTagLevels = 0;
    static final int ELEMENTS_RATE = 128;
    private int[] lineOffsets;
    private StringBuilder tmpDocContent = null;
    private StringBuilder contentBuffer = new StringBuilder("");
    private boolean readCharacterStatus = false;
    private int charactersStartOffset;
    private Stack<CustomObject> stack = null;
    private Document doc = null;
    private AnnotationSet basicAS;
    protected List<StatusListener> myStatusListeners = new LinkedList<StatusListener>();
    private int elements = 0;
    protected int customObjectsId = 0;
    private LinkedList<CustomObject> colector = null;
    protected boolean addSpaceOnUnpack = true;
    protected boolean previousChunkEndedWithWS = false;

    public NekoHtmlDocumentHandler(Document aDocument, AnnotationSet anAnnotationSet, Set<String> ignorableTags) {
        if (ignorableTags == null) {
            ignorableTags = new HashSet<String>();
        }
        this.stack = new Stack();
        this.tmpDocContent = new StringBuilder(aDocument.getContent().size().intValue());
        this.colector = new LinkedList();
        this.doc = aDocument;
        this.basicAS = anAnnotationSet;
        this.customObjectsId = 0;
        this.ignorableTags = ignorableTags;
        if (Gate.getUserConfig().get("Document_add_space_on_unpack") != null) {
            this.addSpaceOnUnpack = Gate.getUserConfig().getBoolean("Document_add_space_on_unpack");
        }
    }

    public void setLineOffsets(int[] lineOffsets) {
        this.lineOffsets = lineOffsets;
    }

    public void startElement(QName element, XMLAttributes attributes, Augmentations augs) throws XNIException {
        this.charactersAction();
        if (0 == ++this.elements % 128) {
            this.fireStatusChangedEvent("Processed elements : " + this.elements);
        }
        if (this.ignorableTags.contains(element.localpart)) {
            ++this.ignorableTagLevels;
        }
        FeatureMap fm = Factory.newFeatureMap();
        for (int i = 0; i < attributes.getLength(); ++i) {
            fm.put(attributes.getLocalName(i), attributes.getValue(i));
        }
        this.customizeAppearanceOfDocumentWithStartTag(element.localpart);
        Long startIndex = new Long(this.tmpDocContent.length());
        CustomObject obj = new CustomObject(element.localpart, fm, startIndex, startIndex);
        this.stack.push(obj);
    }

    public void characters(XMLString text, Augmentations augs) throws XNIException {
        if (!this.readCharacterStatus) {
            if (this.reposInfo != null) {
                HTMLEventInfo evInfo;
                HTMLEventInfo hTMLEventInfo = evInfo = augs == null ? null : (HTMLEventInfo)augs.getItem(AUGMENTATIONS);
                if (evInfo == null) {
                    Err.println("Warning: could not determine proper repositioning info for character chunk \"" + new String(text.ch, text.offset, text.length) + "\" near offset " + this.charactersStartOffset + ".  Save preserving format may give incorret results.");
                } else {
                    int line = evInfo.getBeginLineNumber() - 1;
                    int col = evInfo.getBeginColumnNumber() - 1;
                    this.charactersStartOffset = this.lineOffsets[line] + col;
                }
            }
            this.contentBuffer = new StringBuilder();
        }
        this.readCharacterStatus = true;
        boolean canAppendWS = this.contentBuffer.length() == 0 || !Character.isWhitespace(this.contentBuffer.charAt(this.contentBuffer.length() - 1));
        for (int i = text.offset; i < text.offset + text.length; ++i) {
            if (!Character.isWhitespace(text.ch[i])) {
                this.contentBuffer.append(text.ch[i]);
                canAppendWS = true;
                continue;
            }
            if (!canAppendWS) continue;
            this.contentBuffer.append(' ');
            canAppendWS = false;
        }
    }

    public void charactersAction() throws XNIException {
        if (!this.readCharacterStatus) {
            return;
        }
        this.readCharacterStatus = false;
        if (this.contentBuffer.length() == 0) {
            return;
        }
        if (this.ignorableTagLevels > 0) {
            return;
        }
        boolean thisChunkStartsWithWS = Character.isWhitespace(this.contentBuffer.charAt(0));
        if (thisChunkStartsWithWS) {
            this.contentBuffer.deleteCharAt(0);
        }
        if (this.contentBuffer.length() == 0) {
            this.previousChunkEndedWithWS = thisChunkStartsWithWS;
            return;
        }
        boolean trailingWhitespace = Character.isWhitespace(this.contentBuffer.charAt(this.contentBuffer.length() - 1));
        if (trailingWhitespace) {
            this.contentBuffer.setLength(this.contentBuffer.length() - 1);
        }
        int tmpDocContentSize = this.tmpDocContent.length();
        boolean incrementStartIndex = false;
        if (tmpDocContentSize != 0 && !Character.isWhitespace(this.tmpDocContent.charAt(tmpDocContentSize - 1)) && (this.previousChunkEndedWithWS || thisChunkStartsWithWS || this.addSpaceOnUnpack)) {
            this.tmpDocContent.append(' ');
            incrementStartIndex = true;
        }
        this.tmpDocContent.append((CharSequence)this.contentBuffer);
        if (this.reposInfo != null) {
            long actualStartOffset = this.charactersStartOffset;
            if (thisChunkStartsWithWS) {
                actualStartOffset = this.fixStartOffsetForWhitespace(actualStartOffset);
            }
            int extractedPos = tmpDocContentSize;
            if (incrementStartIndex) {
                ++extractedPos;
            }
            this.addRepositioningInfo(this.contentBuffer.length(), (int)actualStartOffset, extractedPos);
        }
        Long end = new Long(this.tmpDocContent.length());
        CustomObject obj2 = null;
        for (CustomObject obj2 : this.stack) {
            if (incrementStartIndex && obj2.getStart().equals(obj2.getEnd())) {
                obj2.setStart(new Long(obj2.getStart() + 1L));
            }
            obj2.setEnd(end);
        }
        this.previousChunkEndedWithWS = trailingWhitespace;
    }

    public void endElement(QName element, Augmentations augs) throws XNIException {
        this.endElement(element, augs, false);
    }

    public void emptyElement(QName element, XMLAttributes attributes, Augmentations augs) throws XNIException {
        this.startElement(element, attributes, augs);
        this.endElement(element, augs, true);
    }

    public void endElement(QName element, Augmentations augs, boolean wasEmptyElement) throws XNIException {
        this.charactersAction();
        CustomObject obj = null;
        if (this.ignorableTags.contains(element.localpart)) {
            --this.ignorableTagLevels;
        }
        if (!this.stack.isEmpty()) {
            obj = this.stack.pop();
            if (obj.getStart().equals(obj.getEnd()) && !wasEmptyElement) {
                obj.getFM().put("isEmptyAndSpan", "true");
            }
            this.colector.add(obj);
        }
        if (obj != null && obj.getStart().longValue() != obj.getEnd().longValue()) {
            this.customizeAppearanceOfDocumentWithEndTag(element.localpart);
        }
    }

    public void endDocument(Augmentations augs) throws XNIException {
        CustomObject obj = null;
        this.doc.setContent(new DocumentContentImpl(this.tmpDocContent.toString()));
        if (this.basicAS == null) {
            this.basicAS = this.doc.getAnnotations("Original markups");
        }
        Collections.sort(this.colector);
        while (!this.colector.isEmpty()) {
            obj = this.colector.getFirst();
            this.colector.remove(obj);
            try {
                this.basicAS.add(obj.getStart(), obj.getEnd(), obj.getElemName(), obj.getFM());
            }
            catch (InvalidOffsetException e) {
                Err.prln("Error creating an annot :" + obj + " Discarded...");
            }
        }
        this.fireStatusChangedEvent("Total elements : " + this.elements);
    }

    public void error(String domain, String key, XMLParseException e) {
        e.printStackTrace(Err.getPrintWriter());
    }

    public void fatalError(String domain, String key, XMLParseException e) throws XNIException {
        throw e;
    }

    public void processingInstruction(String target, XMLString data, Augmentations augs) throws XNIException {
        this.charactersAction();
    }

    public void comment(XMLString content, Augmentations augs) throws XNIException {
        this.charactersAction();
    }

    public void startCDATA(Augmentations augs) throws XNIException {
        this.charactersAction();
    }

    public void endCDATA(Augmentations augs) throws XNIException {
        this.charactersAction();
    }

    private long fixStartOffsetForWhitespace(long wsOffset) {
        int wsPosInfoIndex = Collections.binarySearch(this.ampCodingInfo, wsOffset, POSITION_INFO_COMPARATOR);
        if (wsPosInfoIndex < 0) {
            return wsOffset + 1L;
        }
        return wsOffset + ((RepositioningInfo.PositionInfo)this.ampCodingInfo.get(wsPosInfoIndex)).getOriginalLength();
    }

    public void addRepositioningInfo(int contentLength, int pos, int extractedPos) {
        long offsetInExtracted;
        long remainingLen;
        RepositioningInfo.PositionInfo pi = null;
        long startPos = pos;
        long correction = 0L;
        for (int i = 0; i < this.ampCodingInfo.size(); ++i) {
            pi = (RepositioningInfo.PositionInfo)this.ampCodingInfo.get(i);
            long substituteStart = pi.getOriginalPosition();
            if (substituteStart < startPos) continue;
            if (substituteStart > (long)(pos + contentLength) + correction) break;
            remainingLen = substituteStart - (startPos + correction);
            offsetInExtracted = startPos - (long)pos;
            if (remainingLen > 0L) {
                this.reposInfo.addPositionInfo(startPos + correction, remainingLen, (long)extractedPos + offsetInExtracted, remainingLen);
            }
            this.reposInfo.addPositionInfo(substituteStart, pi.getOriginalLength(), (long)extractedPos + offsetInExtracted + remainingLen, pi.getCurrentLength());
            startPos = startPos + remainingLen + pi.getCurrentLength();
            correction += pi.getOriginalLength() - pi.getCurrentLength();
        }
        if ((remainingLen = (long)contentLength - (offsetInExtracted = startPos - (long)pos)) > 0L) {
            this.reposInfo.addPositionInfo(startPos + correction, remainingLen, (long)extractedPos + offsetInExtracted, remainingLen);
        }
    }

    protected void customizeAppearanceOfDocumentWithStartTag(String tagName) {
        boolean modification = false;
        int tmpDocContentSize = this.tmpDocContent.length();
        if ("p".equals(tagName) && tmpDocContentSize >= 2 && '\n' != this.tmpDocContent.charAt(tmpDocContentSize - 2)) {
            this.tmpDocContent.append("\n");
            modification = true;
        }
        if ("br".equals(tagName)) {
            this.tmpDocContent.append("\n");
            modification = true;
        }
        if ("div".equals(tagName) && tmpDocContentSize > 0 && this.tmpDocContent.charAt(tmpDocContentSize - 1) != '\n') {
            this.tmpDocContent.append("\n");
            modification = true;
        }
        if (modification) {
            Long end = new Long(this.tmpDocContent.length());
            Iterator anIterator = this.stack.iterator();
            while (anIterator.hasNext()) {
                ((CustomObject)anIterator.next()).setEnd(end);
            }
        }
    }

    protected void customizeAppearanceOfDocumentWithEndTag(String tagName) {
        boolean modification = false;
        if ("p".equals(tagName) || "h1".equals(tagName) || "h2".equals(tagName) || "h3".equals(tagName) || "h4".equals(tagName) || "h5".equals(tagName) || "h6".equals(tagName) || "tr".equals(tagName) || "center".equals(tagName) || "li".equals(tagName)) {
            this.tmpDocContent.append("\n");
            modification = true;
        }
        if ("div".equals(tagName) && this.tmpDocContent.length() > 0 && this.tmpDocContent.charAt(this.tmpDocContent.length() - 1) != '\n') {
            this.tmpDocContent.append("\n");
            modification = true;
        }
        if ("title".equals(tagName)) {
            this.tmpDocContent.append("\n\n");
            modification = true;
        }
        if (modification) {
            Long end = new Long(this.tmpDocContent.length());
            for (CustomObject obj : this.stack) {
                obj.setEnd(end);
            }
        }
    }

    public void setRepositioningInfo(RepositioningInfo info) {
        this.reposInfo = info;
    }

    public RepositioningInfo getRepositioningInfo() {
        return this.reposInfo;
    }

    public void setAmpCodingInfo(RepositioningInfo info) {
        this.ampCodingInfo = info;
    }

    public RepositioningInfo getAmpCodingInfo() {
        return this.ampCodingInfo;
    }

    public void setIgnorableTags(Set<String> newTags) {
        this.ignorableTags = newTags;
    }

    public Set<String> getIgnorableTags() {
        return this.ignorableTags;
    }

    public int getCustomObjectsId() {
        return this.customObjectsId;
    }

    public void addStatusListener(StatusListener listener) {
        this.myStatusListeners.add(listener);
    }

    public void removeStatusListener(StatusListener listener) {
        this.myStatusListeners.remove(listener);
    }

    protected void fireStatusChangedEvent(String text) {
        Iterator<StatusListener> listenersIter = this.myStatusListeners.iterator();
        while (listenersIter.hasNext()) {
            listenersIter.next().statusChanged(text);
        }
    }

    public void doctypeDecl(String arg0, String arg1, String arg2, Augmentations arg3) throws XNIException {
    }

    public void endGeneralEntity(String arg0, Augmentations arg1) throws XNIException {
    }

    public XMLDocumentSource getDocumentSource() {
        return null;
    }

    public void ignorableWhitespace(XMLString arg0, Augmentations arg1) throws XNIException {
    }

    public void setDocumentSource(XMLDocumentSource arg0) {
    }

    public void startDocument(XMLLocator arg0, String arg1, NamespaceContext arg2, Augmentations arg3) throws XNIException {
    }

    public void startGeneralEntity(String arg0, XMLResourceIdentifier arg1, String arg2, Augmentations arg3) throws XNIException {
    }

    public void textDecl(String arg0, String arg1, Augmentations arg2) throws XNIException {
    }

    public void xmlDecl(String arg0, String arg1, String arg2, Augmentations arg3) throws XNIException {
    }

    public void warning(String arg0, String arg1, XMLParseException arg2) throws XNIException {
    }

    class CustomObject
    implements Comparable<CustomObject> {
        private String elemName = null;
        private FeatureMap fm = null;
        private Long start = null;
        private Long end = null;
        private Long id = null;

        public CustomObject(String anElemName, FeatureMap aFm, Long aStart, Long anEnd) {
            this.elemName = anElemName;
            this.fm = aFm;
            this.start = aStart;
            this.end = anEnd;
            this.id = new Long(NekoHtmlDocumentHandler.this.customObjectsId++);
        }

        @Override
        public int compareTo(CustomObject obj) {
            return this.id.compareTo(obj.getId());
        }

        public String getElemName() {
            return this.elemName;
        }

        public FeatureMap getFM() {
            return this.fm;
        }

        public Long getStart() {
            return this.start;
        }

        public Long getEnd() {
            return this.end;
        }

        public Long getId() {
            return this.id;
        }

        public void setElemName(String anElemName) {
            this.elemName = anElemName;
        }

        public void setFM(FeatureMap aFm) {
            this.fm = aFm;
        }

        public void setStart(Long aStart) {
            this.start = aStart;
        }

        public void setEnd(Long anEnd) {
            this.end = anEnd;
        }
    }
}

