package dev.langchain4j.data.document.parser.apache.pdfbox;

import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.DocumentParser;
import java.io.IOException;
import java.io.InputStream;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

/* loaded from: input_file:dev/langchain4j/data/document/parser/apache/pdfbox/ApachePdfBoxDocumentParser.class */
public class ApachePdfBoxDocumentParser implements DocumentParser {
    public Document parse(InputStream inputStream) {
        try {
            PDDocument load = PDDocument.load(inputStream);
            String text = new PDFTextStripper().getText(load);
            load.close();
            return Document.from(text);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}
