/*
 * Decompiled with CFR 0.152.
 */
package org.apache.pdfbox.tools;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.Writer;
import java.util.Map;
import java.util.concurrent.Callable;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSInputStream;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
import org.apache.pdfbox.tools.AngleCollector;
import org.apache.pdfbox.tools.FilteredTextStripper;
import org.apache.pdfbox.tools.NullWriter;
import org.apache.pdfbox.tools.PDFText2HTML;
import org.apache.pdfbox.tools.Version;
import org.apache.pdfbox.util.Matrix;
import picocli.CommandLine;

@CommandLine.Command(name="extracttext", header={"Extracts the text from a PDF document"}, versionProvider=Version.class, mixinStandardHelpOptions=true)
public final class ExtractText
implements Callable<Integer> {
    private static final Log LOG = LogFactory.getLog(ExtractText.class);
    private static final String STD_ENCODING = "UTF-8";
    private static final PrintStream SYSOUT = System.out;
    private static final PrintStream SYSERR = System.err;
    @CommandLine.Option(names={"-alwaysNext"}, description={"Process next page (if applicable) despite IOException (ignored when -html)"})
    private boolean alwaysNext = false;
    @CommandLine.Option(names={"-console"}, description={"Send text to console instead of file"})
    private boolean toConsole = false;
    @CommandLine.Option(names={"-debug"}, description={"Enables debug output about the time consumption of every stage"})
    private boolean debug = false;
    @CommandLine.Option(names={"-encoding"}, description={"UTF-8 or ISO-8859-1, UTF-16BE, UTF-16LE, etc. (default: ${DEFAULT-VALUE})"})
    private String encoding = "UTF-8";
    @CommandLine.Option(names={"-endPage"}, description={"The last page to extract (1 based, inclusive)"})
    private int endPage = Integer.MAX_VALUE;
    @CommandLine.Option(names={"-html"}, description={"Output in HTML format instead of raw text"})
    private boolean toHTML = false;
    @CommandLine.Option(names={"-ignoreBeads"}, description={"Disables the separation by beads"})
    private boolean ignoreBeads = false;
    @CommandLine.Option(names={"-password"}, description={"the password for the PDF or certificate in keystore."}, arity="0..1", interactive=true)
    private String password = "";
    @CommandLine.Option(names={"-rotationMagic"}, description={"Analyze each page for rotated/skewed text, rotate to 0\u00b0 and extract separately (slower, and ignored when -html)"})
    private boolean rotationMagic = false;
    @CommandLine.Option(names={"-sort"}, description={"Sort the text before writing of every stage"})
    private boolean sort = false;
    @CommandLine.Option(names={"-startPage"}, description={"The first page to start extraction (1 based)"})
    private int startPage = 1;
    @CommandLine.Option(names={"-i", "--input"}, description={"the PDF file"}, required=true)
    private File infile;
    @CommandLine.Option(names={"-o", "--output"}, description={"the exported text file"})
    private File outfile;

    public static void main(String[] args) {
        System.setProperty("apple.awt.UIElement", "true");
        int exitCode = new CommandLine((Object)new ExtractText()).execute(args);
        System.exit(exitCode);
    }

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    @Override
    public Integer call() {
        String ext;
        String string = ext = this.toHTML ? ".html" : ".txt";
        if (this.outfile == null) {
            String outPath = FilenameUtils.removeExtension((String)this.infile.getAbsolutePath()) + ext;
            this.outfile = new File(outPath);
        }
        try (PDDocument document = Loader.loadPDF((File)this.infile, (String)this.password);
             OutputStreamWriter output = this.toConsole ? new OutputStreamWriter((OutputStream)SYSOUT, this.encoding) : new OutputStreamWriter((OutputStream)new FileOutputStream(this.outfile), this.encoding);){
            Map embeddedFileNames;
            PDEmbeddedFilesNameTreeNode embeddedFiles;
            PDFTextStripper stripper;
            long startTime = this.startProcessing("Loading PDF " + this.infile);
            AccessPermission ap = document.getCurrentAccessPermission();
            if (!ap.canExtractContent()) {
                SYSERR.println("You do not have permission to extract text");
                Integer n = 1;
                return n;
            }
            this.stopProcessing("Time for loading: ", startTime);
            if (this.toHTML && !STD_ENCODING.equals(this.encoding)) {
                this.encoding = STD_ENCODING;
                SYSOUT.println("The encoding parameter is ignored when writing html output.");
            }
            startTime = this.startProcessing("Starting text extraction");
            if (this.debug) {
                SYSERR.println("Writing to " + this.outfile.getAbsolutePath());
            }
            if (this.toHTML) {
                stripper = new PDFText2HTML();
                stripper.setSortByPosition(this.sort);
                stripper.setShouldSeparateByBeads(!this.ignoreBeads);
                stripper.setStartPage(this.startPage);
                stripper.setEndPage(this.endPage);
                stripper.writeText(document, (Writer)output);
            } else {
                stripper = this.rotationMagic ? new FilteredTextStripper() : new PDFTextStripper();
                stripper.setSortByPosition(this.sort);
                stripper.setShouldSeparateByBeads(!this.ignoreBeads);
                this.extractPages(this.startPage, Math.min(this.endPage, document.getNumberOfPages()), stripper, document, output, this.rotationMagic, this.alwaysNext);
            }
            PDDocumentCatalog catalog = document.getDocumentCatalog();
            PDDocumentNameDictionary names = catalog.getNames();
            if (names != null && (embeddedFiles = names.getEmbeddedFiles()) != null && (embeddedFileNames = embeddedFiles.getNames()) != null) {
                for (Map.Entry ent : embeddedFileNames.entrySet()) {
                    PDComplexFileSpecification spec;
                    PDEmbeddedFile file;
                    if (this.debug) {
                        SYSERR.println("Processing embedded file " + (String)ent.getKey() + ":");
                    }
                    if ((file = (spec = (PDComplexFileSpecification)ent.getValue()).getEmbeddedFile()) == null || !"application/pdf".equals(file.getSubtype())) continue;
                    if (this.debug) {
                        SYSERR.println("  is PDF (size=" + file.getSize() + ")");
                    }
                    COSInputStream fis = file.createInputStream();
                    Throwable throwable = null;
                    try {
                        PDDocument subDoc = Loader.loadPDF((InputStream)fis);
                        Throwable throwable2 = null;
                        try {
                            if (this.toHTML) {
                                stripper.writeText(subDoc, (Writer)output);
                                continue;
                            }
                            this.extractPages(1, subDoc.getNumberOfPages(), stripper, subDoc, output, this.rotationMagic, this.alwaysNext);
                        }
                        catch (Throwable throwable3) {
                            throwable2 = throwable3;
                            throw throwable3;
                        }
                        finally {
                            if (subDoc == null) continue;
                            if (throwable2 != null) {
                                try {
                                    subDoc.close();
                                }
                                catch (Throwable throwable4) {
                                    throwable2.addSuppressed(throwable4);
                                }
                                continue;
                            }
                            subDoc.close();
                        }
                    }
                    catch (Throwable throwable5) {
                        throwable = throwable5;
                        throw throwable5;
                    }
                    finally {
                        if (fis == null) continue;
                        if (throwable != null) {
                            try {
                                fis.close();
                            }
                            catch (Throwable throwable6) {
                                throwable.addSuppressed(throwable6);
                            }
                            continue;
                        }
                        fis.close();
                    }
                }
            }
            this.stopProcessing("Time for extraction: ", startTime);
            return 0;
        }
        catch (IOException ioe) {
            SYSERR.println("Error extracting text for document [" + ioe.getClass().getSimpleName() + "]: " + ioe.getMessage());
            return 4;
        }
    }

    private void extractPages(int startPage, int endPage, PDFTextStripper stripper, PDDocument document, Writer output, boolean rotationMagic, boolean alwaysNext) throws IOException {
        for (int p = startPage; p <= endPage; ++p) {
            stripper.setStartPage(p);
            stripper.setEndPage(p);
            try {
                if (rotationMagic) {
                    PDPage page = document.getPage(p - 1);
                    int rotation = page.getRotation();
                    page.setRotation(0);
                    AngleCollector angleCollector = new AngleCollector();
                    angleCollector.setStartPage(p);
                    angleCollector.setEndPage(p);
                    angleCollector.writeText(document, new NullWriter());
                    for (int angle : angleCollector.getAngles()) {
                        try (PDPageContentStream cs = new PDPageContentStream(document, page, PDPageContentStream.AppendMode.PREPEND, false);){
                            cs.transform(Matrix.getRotateInstance((double)(-Math.toRadians(angle)), (float)0.0f, (float)0.0f));
                        }
                        stripper.writeText(document, output);
                        ((COSArray)page.getCOSObject().getItem(COSName.CONTENTS)).remove(0);
                    }
                    page.setRotation(rotation);
                    continue;
                }
                stripper.writeText(document, output);
                continue;
            }
            catch (IOException ex) {
                if (!alwaysNext) {
                    throw ex;
                }
                LOG.error((Object)("Failed to process page " + p), (Throwable)ex);
            }
        }
    }

    private long startProcessing(String message) {
        if (this.debug) {
            SYSERR.println(message);
        }
        return System.currentTimeMillis();
    }

    private void stopProcessing(String message, long startTime) {
        if (this.debug) {
            long stopTime = System.currentTimeMillis();
            float elapsedTime = (float)(stopTime - startTime) / 1000.0f;
            SYSERR.println(message + elapsedTime + " seconds");
        }
    }

    static int getAngle(TextPosition text) {
        Matrix m = text.getTextMatrix().clone();
        m.concatenate(text.getFont().getFontMatrix());
        return (int)Math.round(Math.toDegrees(Math.atan2(m.getShearY(), m.getScaleY())));
    }
}

