/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nifi.processors.document;

import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.util.HashMap;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.flowfile.attributes.CoreAttributes;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.tika.Tika;

@Tags(value={"extract, document, text"})
@CapabilityDescription(value="Extract text contents from supported binary document formats using Apache Tika")
public class ExtractDocumentText
extends AbstractProcessor {
    private static final String TEXT_PLAIN = "text/plain";
    public static final Relationship REL_ORIGINAL = new Relationship.Builder().name("original").description("Success for original input FlowFiles").build();
    public static final Relationship REL_EXTRACTED = new Relationship.Builder().name("extracted").description("Success for extracted text FlowFiles").build();
    public static final Relationship REL_FAILURE = new Relationship.Builder().name("failure").description("Content extraction failed").build();
    private static final Set<Relationship> RELATIONSHIPS = Set.of(REL_ORIGINAL, REL_EXTRACTED, REL_FAILURE);

    public Set<Relationship> getRelationships() {
        return RELATIONSHIPS;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
        FlowFile flowFile = session.get();
        if (flowFile == null) {
            return;
        }
        FlowFile extracted = session.create(flowFile);
        boolean error = false;
        try (InputStream is = session.read(flowFile);
             Reader tikaReader = new Tika().parse(is);
             OutputStream os = session.write(extracted);
             OutputStreamWriter writer = new OutputStreamWriter(os);){
            IOUtils.copy((Reader)tikaReader, (Writer)writer);
        }
        catch (Throwable t) {
            error = true;
            this.getLogger().error("Extraction Failed {}", new Object[]{flowFile, t});
            session.remove(extracted);
            session.transfer(flowFile, REL_FAILURE);
        }
        finally {
            if (!error) {
                HashMap<String, String> attributes = new HashMap<String, String>();
                attributes.put(CoreAttributes.MIME_TYPE.key(), TEXT_PLAIN);
                extracted = session.putAllAttributes(extracted, attributes);
                session.transfer(extracted, REL_EXTRACTED);
                session.transfer(flowFile, REL_ORIGINAL);
            }
        }
    }
}

