/*
 * Decompiled with CFR 0.152.
 */
package org.springframework.ai.reader.pdf;

import java.awt.Rectangle;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.document.Document;
import org.springframework.ai.document.DocumentReader;
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
import org.springframework.ai.reader.pdf.layout.PDFLayoutTextStripperByArea;
import org.springframework.core.io.DefaultResourceLoader;
import org.springframework.core.io.Resource;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;

public class PagePdfDocumentReader
implements DocumentReader {
    private final Logger logger = LoggerFactory.getLogger(this.getClass());
    private static final String PDF_PAGE_REGION = "pdfPageRegion";
    public static final String METADATA_START_PAGE_NUMBER = "page_number";
    public static final String METADATA_END_PAGE_NUMBER = "end_page_number";
    public static final String METADATA_FILE_NAME = "file_name";
    private final PDDocument document;
    private PdfDocumentReaderConfig config;
    private String resourceFileName;

    public PagePdfDocumentReader(String resourceUrl) {
        this(new DefaultResourceLoader().getResource(resourceUrl));
    }

    public PagePdfDocumentReader(Resource pdfResource) {
        this(pdfResource, PdfDocumentReaderConfig.defaultConfig());
    }

    public PagePdfDocumentReader(String resourceUrl, PdfDocumentReaderConfig config) {
        this(new DefaultResourceLoader().getResource(resourceUrl), config);
    }

    public PagePdfDocumentReader(Resource pdfResource, PdfDocumentReaderConfig config) {
        try {
            PDFParser pdfParser = new PDFParser((RandomAccessRead)new RandomAccessReadBuffer(pdfResource.getInputStream()));
            this.document = pdfParser.parse();
            this.resourceFileName = pdfResource.getFilename();
            this.config = config;
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public List<Document> get() {
        ArrayList<Document> readDocuments = new ArrayList<Document>();
        try {
            PDFLayoutTextStripperByArea pdfTextStripper = new PDFLayoutTextStripperByArea();
            int pageNumber = 0;
            int pagesPerDocument = 0;
            int startPageNumber = pageNumber;
            ArrayList<String> pageTextGroupList = new ArrayList<String>();
            int totalPages = this.document.getDocumentCatalog().getPages().getCount();
            int logFrequency = totalPages > 10 ? totalPages / 10 : 1;
            int counter = 0;
            for (PDPage page : this.document.getDocumentCatalog().getPages()) {
                if (counter % logFrequency == 0 && counter / logFrequency < 10) {
                    this.logger.info("Processing PDF page: {}", (Object)(counter + 1));
                }
                ++counter;
                if (this.config.pagesPerDocument != 0 && ++pagesPerDocument >= this.config.pagesPerDocument) {
                    pagesPerDocument = 0;
                    String aggregatedPageTextGroup = pageTextGroupList.stream().collect(Collectors.joining());
                    if (StringUtils.hasText((String)aggregatedPageTextGroup)) {
                        readDocuments.add(this.toDocument(aggregatedPageTextGroup, startPageNumber, pageNumber));
                    }
                    pageTextGroupList.clear();
                    startPageNumber = pageNumber + 1;
                }
                int x0 = (int)page.getMediaBox().getLowerLeftX();
                int xW = (int)page.getMediaBox().getWidth();
                int y0 = (int)page.getMediaBox().getLowerLeftY() + this.config.pageTopMargin;
                int yW = (int)page.getMediaBox().getHeight() - (this.config.pageTopMargin + this.config.pageBottomMargin);
                pdfTextStripper.addRegion(PDF_PAGE_REGION, new Rectangle(x0, y0, xW, yW));
                pdfTextStripper.extractRegions(page);
                String pageText = pdfTextStripper.getTextForRegion(PDF_PAGE_REGION);
                if (StringUtils.hasText((String)pageText)) {
                    pageText = this.config.pageExtractedTextFormatter.format(pageText, pageNumber);
                    pageTextGroupList.add(pageText);
                }
                ++pageNumber;
                pdfTextStripper.removeRegion(PDF_PAGE_REGION);
            }
            if (!CollectionUtils.isEmpty(pageTextGroupList)) {
                readDocuments.add(this.toDocument(pageTextGroupList.stream().collect(Collectors.joining()), startPageNumber, pageNumber));
            }
            this.logger.info("Processing {} pages", (Object)totalPages);
            return readDocuments;
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private Document toDocument(String docText, int startPageNumber, int endPageNumber) {
        Document doc = new Document(docText);
        doc.getMetadata().put(METADATA_START_PAGE_NUMBER, startPageNumber);
        if (startPageNumber != endPageNumber) {
            doc.getMetadata().put(METADATA_END_PAGE_NUMBER, endPageNumber);
        }
        doc.getMetadata().put(METADATA_FILE_NAME, this.resourceFileName);
        return doc;
    }
}

