/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.parser.html;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.tika.config.Field;
import org.apache.tika.detect.EncodingDetector;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractEncodingDetectorParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.html.DefaultHtmlMapper;
import org.apache.tika.parser.html.HtmlHandler;
import org.apache.tika.parser.html.HtmlMapper;
import org.apache.tika.parser.html.XHTMLDowngradeHandler;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.DataNode;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.NodeFilter;
import org.jsoup.select.NodeTraversor;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

public class JSoupParser
extends AbstractEncodingDetectorParser {
    private static final long serialVersionUID = 7895315240498733128L;
    public static final Charset DEFAULT_CHARSET = StandardCharsets.US_ASCII;
    private static final MediaType XHTML = MediaType.application((String)"xhtml+xml");
    private static final MediaType WAP_XHTML = MediaType.application((String)"vnd.wap.xhtml+xml");
    private static final MediaType X_ASP = MediaType.application((String)"x-asp");
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(MediaType.text((String)"html"), XHTML, WAP_XHTML, X_ASP)));
    @Field
    private boolean extractScripts = false;

    public JSoupParser() {
    }

    public JSoupParser(EncodingDetector encodingDetector) {
        super(encodingDetector);
    }

    public Set<MediaType> getSupportedTypes(ParseContext context) {
        return SUPPORTED_TYPES;
    }

    public boolean isExtractScripts() {
        return this.extractScripts;
    }

    @Field
    public void setExtractScripts(boolean extractScripts) {
        this.extractScripts = extractScripts;
    }

    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
        EncodingDetector encodingDetector = this.getEncodingDetector(context);
        Charset charset = encodingDetector.detect(stream, metadata);
        charset = charset == null ? DEFAULT_CHARSET : charset;
        String previous = metadata.get("Content-Type");
        MediaType contentType = null;
        if (previous == null || previous.startsWith("text/html")) {
            contentType = new MediaType(MediaType.TEXT_HTML, charset);
        } else if (previous.startsWith("application/xhtml+xml")) {
            contentType = new MediaType(XHTML, charset);
        } else if (previous.startsWith("application/vnd.wap.xhtml+xml")) {
            contentType = new MediaType(WAP_XHTML, charset);
        } else if (previous.startsWith("application/x-asp")) {
            contentType = new MediaType(X_ASP, charset);
        }
        if (contentType != null) {
            metadata.set("Content-Type", contentType.toString());
        }
        metadata.set("Content-Encoding", charset.name());
        HtmlMapper mapper = (HtmlMapper)context.get(HtmlMapper.class, (Object)new DefaultHtmlMapper());
        Document document = Jsoup.parse((InputStream)stream, (String)charset.name(), (String)"");
        document.quirksMode(Document.QuirksMode.quirks);
        XHTMLDowngradeHandler xhtml = new XHTMLDowngradeHandler((ContentHandler)((Object)new HtmlHandler(mapper, handler, metadata, context, this.extractScripts)));
        xhtml.startDocument();
        try {
            NodeTraversor.filter((NodeFilter)new TikaNodeFilter((ContentHandler)((Object)xhtml)), (Node)document);
        }
        catch (RuntimeSAXException e) {
            throw e.getWrapped();
        }
        finally {
            xhtml.endDocument();
        }
    }

    public void parseString(String html, ContentHandler handler, Metadata metadata, ParseContext context) throws SAXException {
        HtmlMapper mapper = (HtmlMapper)context.get(HtmlMapper.class, (Object)new DefaultHtmlMapper());
        Document document = Jsoup.parse((String)html);
        document.quirksMode(Document.QuirksMode.quirks);
        XHTMLDowngradeHandler xhtml = new XHTMLDowngradeHandler((ContentHandler)((Object)new HtmlHandler(mapper, handler, metadata, context, this.extractScripts)));
        xhtml.startDocument();
        try {
            NodeTraversor.filter((NodeFilter)new TikaNodeFilter((ContentHandler)((Object)xhtml)), (Node)document);
        }
        catch (RuntimeSAXException e) {
            throw e.getWrapped();
        }
        finally {
            xhtml.endDocument();
        }
    }

    protected EncodingDetector getEncodingDetector(ParseContext parseContext) {
        EncodingDetector fromParseContext = (EncodingDetector)parseContext.get(EncodingDetector.class);
        if (fromParseContext != null) {
            return fromParseContext;
        }
        return this.getEncodingDetector();
    }

    private class RuntimeSAXException
    extends RuntimeException {
        private SAXException wrapped;

        private RuntimeSAXException(SAXException e) {
            this.wrapped = e;
        }

        SAXException getWrapped() {
            return this.wrapped;
        }
    }

    private class TikaNodeFilter
    implements NodeFilter {
        ContentHandler handler;

        private TikaNodeFilter(ContentHandler handler) {
            this.handler = handler;
        }

        public NodeFilter.FilterResult head(Node node, int i) {
            if (node instanceof TextNode) {
                String txt = ((TextNode)node).getWholeText();
                if (txt != null) {
                    char[] chars = txt.toCharArray();
                    try {
                        if (chars.length > 0) {
                            this.handler.characters(chars, 0, chars.length);
                        }
                    }
                    catch (SAXException e) {
                        throw new RuntimeSAXException(e);
                    }
                }
                return NodeFilter.FilterResult.CONTINUE;
            }
            if (node instanceof DataNode) {
                String txt = ((DataNode)node).getWholeData();
                if (txt != null) {
                    char[] chars = txt.toCharArray();
                    try {
                        if (chars.length > 0) {
                            this.handler.characters(chars, 0, chars.length);
                        }
                    }
                    catch (SAXException e) {
                        throw new RuntimeSAXException(e);
                    }
                }
                return NodeFilter.FilterResult.CONTINUE;
            }
            AttributesImpl attributes = new AttributesImpl();
            for (Attribute jsoupAttr : node.attributes()) {
                attributes.addAttribute("", jsoupAttr.getKey(), jsoupAttr.getKey(), "", jsoupAttr.getValue());
            }
            try {
                this.handler.startElement("", node.nodeName(), node.nodeName(), attributes);
            }
            catch (SAXException e) {
                throw new RuntimeSAXException(e);
            }
            return NodeFilter.FilterResult.CONTINUE;
        }

        public NodeFilter.FilterResult tail(Node node, int i) {
            if (node instanceof TextNode || node instanceof DataNode) {
                return NodeFilter.FilterResult.CONTINUE;
            }
            try {
                this.handler.endElement("", node.nodeName(), node.nodeName());
            }
            catch (SAXException e) {
                throw new RuntimeSAXException(e);
            }
            return NodeFilter.FilterResult.CONTINUE;
        }
    }
}

