/*
 * Decompiled with CFR 0.152.
 */
package gate.corpora;

import gate.Document;
import gate.DocumentFormat;
import gate.FeatureMap;
import gate.Resource;
import gate.corpora.DocumentImpl;
import gate.corpora.MimeType;
import gate.corpora.RepositioningInfo;
import gate.creole.ResourceInstantiationException;
import gate.creole.metadata.AutoInstance;
import gate.creole.metadata.CreoleResource;
import gate.event.StatusListener;
import gate.util.DocumentFormatException;
import gate.xml.XmlDocumentHandler;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URISyntaxException;
import java.util.Map;
import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

@CreoleResource(name="Apache Tika Document Format", isPrivate=true, autoinstances={@AutoInstance(hidden=true)})
public class TikaFormat
extends DocumentFormat {
    private static final long serialVersionUID = 1L;
    private static final Logger log = Logger.getLogger(TikaFormat.class);

    public Resource init() throws ResourceInstantiationException {
        super.init();
        this.setMimeType(new MimeType("application", "tika"));
        this.assignMime(this.getMimeType(), new String[0]);
        this.assignMime(new MimeType("application", "pdf"), "pdf");
        this.assignMime(new MimeType("application", "msword"), "doc");
        this.assignMime(new MimeType("application", "vnd.ms-powerpoint"), "ppt");
        this.assignMime(new MimeType("application", "vnd.ms-excel"), "xls");
        this.assignMime(new MimeType("application", "vnd.openxmlformats-officedocument.wordprocessingml.document"), "docx");
        this.assignMime(new MimeType("application", "vnd.openxmlformats-officedocument.presentationml.presentation"), "pptx");
        this.assignMime(new MimeType("application", "vnd.openxmlformats-officedocument.spreadsheetml.sheet"), "xlsx");
        this.assignMime(new MimeType("application", "vnd.oasis.opendocument.text"), "odt");
        this.assignMime(new MimeType("application", "vnd.oasis.opendocument.presentation"), "odp");
        this.assignMime(new MimeType("application", "vnd.oasis.opendocument.spreadsheet"), "ods");
        this.assignMime(new MimeType("application", "rtf"), "rtf");
        return this;
    }

    private void assignMime(MimeType mimeType, String ... stringArray) {
        String string = mimeType.getType() + "/" + mimeType.getSubtype();
        mimeString2ClassHandlerMap.put(string, this);
        mimeString2mimeTypeMap.put(string, mimeType);
        for (String string2 : stringArray) {
            suffixes2mimeTypeMap.put(string2, mimeType);
        }
    }

    public Boolean supportsRepositioning() {
        return true;
    }

    public void unpackMarkup(Document document) throws DocumentFormatException {
        this.unpackMarkup(document, null, null);
    }

    public void unpackMarkup(Document document, RepositioningInfo repositioningInfo, RepositioningInfo repositioningInfo2) throws DocumentFormatException {
        if (document == null || document.getSourceUrl() == null) {
            throw new DocumentFormatException("GATE document is null or no content found. Nothing to parse!");
        }
        StatusListener statusListener = new StatusListener(){

            public void statusChanged(String string) {
                TikaFormat.this.fireStatusChanged(string);
            }
        };
        Parser parser = this.createParser();
        XmlDocumentHandler xmlDocumentHandler = new XmlDocumentHandler(document, this.markupElementsMap, this.element2StringMap);
        Metadata metadata = this.extractParserTips(document);
        xmlDocumentHandler.addStatusListener(statusListener);
        xmlDocumentHandler.setRepositioningInfo(repositioningInfo);
        xmlDocumentHandler.setAmpCodingInfo(repositioningInfo2);
        InputStream inputStream = null;
        try {
            inputStream = document.getSourceUrl().openStream();
            parser.parse(inputStream, (ContentHandler)xmlDocumentHandler, metadata, new ParseContext());
            this.setDocumentFeatures(metadata, document);
        }
        catch (IOException iOException) {
            throw new DocumentFormatException(iOException);
        }
        catch (SAXException sAXException) {
            throw new DocumentFormatException(sAXException);
        }
        catch (TikaException tikaException) {
            throw new DocumentFormatException((Exception)((Object)tikaException));
        }
        finally {
            IOUtils.closeQuietly((InputStream)inputStream);
            xmlDocumentHandler.removeStatusListener(statusListener);
        }
        if (document instanceof DocumentImpl) {
            ((DocumentImpl)document).setNextAnnotationId(xmlDocumentHandler.getCustomObjectsId());
        }
    }

    private Parser createParser() {
        AutoDetectParser autoDetectParser = new AutoDetectParser();
        TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
        CompositeParser compositeParser = new CompositeParser();
        compositeParser.setFallback((Parser)autoDetectParser);
        compositeParser.setParsers(tikaConfig.getParsers());
        return compositeParser;
    }

    private void setDocumentFeatures(Metadata metadata, Document document) {
        FeatureMap featureMap = document.getFeatures();
        this.setTikaFeature(metadata, "title", featureMap);
        this.setTikaFeature(metadata, "Author", featureMap);
        this.setTikaFeature(metadata, "Comments", featureMap);
        this.setTikaFeature(metadata, "creator", featureMap);
        if (featureMap.get("AUTHORS") == null && featureMap.get("AUTHOR") != null) {
            featureMap.put("AUTHORS", featureMap.get("Author"));
        }
        featureMap.put("MimeType", metadata.get("Content-Type"));
    }

    private void setTikaFeature(Metadata metadata, String string, Map map) {
        String string2 = metadata.get(string);
        if (string2 == null) {
            return;
        }
        if ((string2 = string2.trim()).length() == 0) {
            return;
        }
        if (map.containsKey(string = string.toUpperCase())) {
            map.put("TIKA_" + string, string2);
        } else {
            map.put(string, string2);
            map.put("TIKA_" + string, string2);
        }
    }

    private Metadata extractParserTips(Document document) {
        Metadata metadata = new Metadata();
        Object v = document.getFeatures().get("MimeType");
        if (v instanceof String && !"application/tika".equals(v)) {
            metadata.add("Content-Type", (String)document.getFeatures().get("MimeType"));
        }
        if (document instanceof DocumentImpl && ((DocumentImpl)document).getMimeType() != null) {
            metadata.add("Content-Type", ((DocumentImpl)document).getMimeType());
        }
        if (document.getSourceUrl() != null && document.getSourceUrl().getProtocol().startsWith("file")) {
            try {
                File file = new File(document.getSourceUrl().toURI());
                metadata.add("resourceName", file.getName());
            }
            catch (URISyntaxException uRISyntaxException) {
                log.debug((Object)("Could not extract filename from uri: " + document.getSourceUrl()), (Throwable)uRISyntaxException);
            }
            catch (IllegalArgumentException illegalArgumentException) {
                log.debug((Object)("Could not extract filename from uri: " + document.getSourceUrl()), (Throwable)illegalArgumentException);
            }
        }
        return metadata;
    }
}

