Java - 使用 PDFBox 将多个图像合并为一个 PDF

标签 java pdfbox

我能够使用以下代码将多个 PDF 文件合并为一个 PDF -

public void mergePDF() {
        File file1 = new File("inputPDF/001.pdf");
        File file2 = new File("inputPDF/002.pdf");
        File file3 = new File("inputPDF/003.pdf");
        File file4 = new File("inputPDF/004.pdf");
        try {
            PDDocument doc1 = PDDocument.load(file1);
            PDDocument doc2 = PDDocument.load(file2);
            PDDocument doc3 = PDDocument.load(file3);
            PDDocument doc4 = PDDocument.load(file4);
            PDFMergerUtility PDFmerger = new PDFMergerUtility(); 
            PDFmerger.setDestinationFileName("outputImages/merged.pdf");
            System.out.println("Destination path set to "+PDFmerger.getDestinationFileName());
            PDFmerger.addSource(file1);
            PDFmerger.addSource(file2);
            PDFmerger.addSource(file3);
            PDFmerger.addSource(file4);     
            //Merging the documents
            PDFmerger.mergeDocuments();
            doc1.close();
            doc2.close();
            doc3.close();
            doc4.close();
            System.out.println("Done!");            
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

但是,我的要求是将多个图像(JPG、PNG)也合并为一个 PDF。

是否可以使用 PDFBox 将多个图像合并为一个 PDF?

最佳答案

由于我一直在努力完成这项任务,所以这是我的代码。合并后的文档符合 PDF/A-1b 标准

import com.google.common.io.Resources;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Calendar;
import java.util.List;
import javax.xml.transform.TransformerException;
import org.apache.commons.io.FileUtils;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.color.PDOutputIntent;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.preflight.parser.PreflightParser;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.DublinCoreSchema;
import org.apache.xmpbox.schema.PDFAIdentificationSchema;
import org.apache.xmpbox.schema.XMPBasicSchema;
import org.apache.xmpbox.type.BadFieldValueException;
import org.apache.xmpbox.xml.XmpSerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


public final class PDFMerger {

  private static final Logger LOG = LoggerFactory.getLogger(PDFMerger3.class);
  private static final String OUTPUT_CONDITION_IDENTIFIER = "sRGB IEC61966-2.1";
  public static final String DOCUMENT_CREATOR = "Mr. Meeseeks";
  public static final String DOCUMENT_SUBJECT = "Great subject";
  public static final String DOCUMENT_TITLE = "Here goes your title";

  /**
   * Creates a compound PDF document from a list of input documents.
   * <p>
   * The merged document is PDF/A-1b compliant
   *
   * @param sources          list of source PDF document streams.
   * @return compound PDF document as a readable input stream.
   * @throws IOException if anything goes wrong during PDF merge.
   */
  public static ByteArrayOutputStream mergeFiles(final List<InputStream> sources) throws IOException {

    Path mergeDirectory = Files.createTempDirectory("merge-" + System.currentTimeMillis());
    try (ByteArrayOutputStream mergedPDFOutputStream = new ByteArrayOutputStream()) {
      LOG.debug("Merging {} source documents into one PDF", sources.size());
      PDFMergerUtility mixedPdfMerger = createMixedPdfMerger(sources, mergedPDFOutputStream, mergeDirectory);
      mergeFileStreams(mergedPDFOutputStream, mixedPdfMerger);
      return mergedPDFOutputStream;
    } catch (Exception e) {
      if (!(e instanceof IOException)) {
        throw new IOException("PDF merge problem", e);
      }
      throw (IOException) e;
    } finally {
      FileUtils.deleteDirectory(mergeDirectory.toFile());
      sources.forEach(IOUtils::closeQuietly);
    }
  }


  private static void mergeFileStreams(ByteArrayOutputStream mergedPDFOutputStream, PDFMergerUtility pdfMerger)
      throws IOException, BadFieldValueException, TransformerException {
    LOG.debug("Initialising PDF merge utility");
    try (COSStream cosStream = new COSStream()) {
      // PDF and XMP properties must be identical, otherwise document is not PDF/A compliant
      pdfMerger.setDestinationDocumentInformation(createPDFDocumentInfo());
      pdfMerger.setDestinationMetadata(createXMPMetadata(cosStream));
      pdfMerger.mergeDocuments(MemoryUsageSetting.setupTempFileOnly());
      LOG.debug("PDF merge successful, size = {} bytes", mergedPDFOutputStream.size());
    }
  }

  @SuppressWarnings("UnstableApiUsage")
  private static PDFMergerUtility createMixedPdfMerger(List<InputStream> sources, ByteArrayOutputStream mergedPDFOutputStream, Path mergeDirectory) throws IOException {
    PDFMergerUtility pdfMerger = new PDFMergerUtility();
    byte[] colorProfile = org.apache.commons.io.IOUtils.toByteArray(Resources.getResource("sRGB.icc"));
    for (InputStream source : sources) {
      File file = streamToFile(mergeDirectory, source);
      if (isPdf(file)) {
        pdfMerger.addSource(file);
      } else {
        pdfMerger.addSource(imageToPDDocument(mergeDirectory, file, colorProfile));
      }
    }
    pdfMerger.setDestinationStream(mergedPDFOutputStream);
    return pdfMerger;
  }

  private static PDDocumentInformation createPDFDocumentInfo() {
    LOG.debug("Setting document info (title, author, subject) for merged PDF");
    PDDocumentInformation documentInformation = new PDDocumentInformation();
    documentInformation.setTitle(DOCUMENT_TITLE);
    documentInformation.setCreator(DOCUMENT_CREATOR);
    documentInformation.setSubject(DOCUMENT_SUBJECT);
    return documentInformation;
  }

  private static PDMetadata createXMPMetadata(COSStream cosStream)
      throws BadFieldValueException, TransformerException, IOException {
    LOG.debug("Setting XMP metadata (title, author, subject) for merged PDF");
    XMPMetadata xmpMetadata = XMPMetadata.createXMPMetadata();

    // PDF/A-1b properties
    PDFAIdentificationSchema pdfaSchema = xmpMetadata.createAndAddPFAIdentificationSchema();
    pdfaSchema.setPart(1);
    pdfaSchema.setConformance("B");
    pdfaSchema.setAboutAsSimple("");

    // Dublin Core properties
    DublinCoreSchema dublinCoreSchema = xmpMetadata.createAndAddDublinCoreSchema();
    dublinCoreSchema.setTitle(DOCUMENT_TITLE);
    dublinCoreSchema.addCreator(DOCUMENT_CREATOR);
    dublinCoreSchema.setDescription(DOCUMENT_SUBJECT);

    // XMP Basic properties
    XMPBasicSchema basicSchema = xmpMetadata.createAndAddXMPBasicSchema();
    Calendar creationDate = Calendar.getInstance();
    basicSchema.setCreateDate(creationDate);
    basicSchema.setModifyDate(creationDate);
    basicSchema.setMetadataDate(creationDate);
    basicSchema.setCreatorTool(DOCUMENT_CREATOR);

    // Create and return XMP data structure in XML format
    try (ByteArrayOutputStream xmpOutputStream = new ByteArrayOutputStream();
         OutputStream cosXMPStream = cosStream.createOutputStream()) {
      new XmpSerializer().serialize(xmpMetadata, xmpOutputStream, true);
      cosXMPStream.write(xmpOutputStream.toByteArray());
      return new PDMetadata(cosStream);
    }
  }

  private static File imageToPDDocument(Path mergeDirectory, File file, byte[] colorProfile) throws IOException {
    try (PDDocument doc = new PDDocument()) {
      PDImageXObject pdImage = PDImageXObject.createFromFileByContent(file, doc);
      drawPage(doc, pdImage);
      doc.getDocumentCatalog().addOutputIntent(createColorScheme(doc, colorProfile));
      File pdfFile = Files.createTempFile(mergeDirectory, String.valueOf(System.currentTimeMillis()), ".tmp").toFile();
      doc.save(pdfFile);
      return pdfFile;
    }
  }

  private static void drawPage(PDDocument doc, PDImageXObject pdImage) throws IOException {
    PDPage page;
    pdImage.getCOSObject().setItem(COSName.SMASK, COSName.NONE);
    boolean isLandscapeMode = pdImage.getWidth() > pdImage.getHeight();
    if (isLandscapeMode) {
      page = new PDPage(new PDRectangle(PDRectangle.A4.getHeight(), PDRectangle.A4.getWidth()));
      float scale = Math.min(Math.min(PDRectangle.A4.getWidth() / pdImage.getHeight(), PDRectangle.A4.getHeight() / pdImage.getWidth()), 1);
      float width = pdImage.getWidth() * scale;
      float height = pdImage.getHeight() * scale;
      // center the image
      float startWidth = (PDRectangle.A4.getHeight() - width) / 2;
      float startHeight = (PDRectangle.A4.getWidth() - height) / 2;
      try (PDPageContentStream contentStream = new PDPageContentStream(doc, page)) {
        contentStream.drawImage(pdImage, startWidth, startHeight, width, height);
      }
    } else {
      page = new PDPage(PDRectangle.A4);
      float scale = Math.min(Math.min(PDRectangle.A4.getWidth() / pdImage.getWidth(), PDRectangle.A4.getHeight() / pdImage.getHeight()), 1);
      float width = pdImage.getWidth() * scale;
      float height = pdImage.getHeight() * scale;
      // try to center the image
      float startWidth = (PDRectangle.A4.getWidth() - width) / 2;
      float startHeight = (PDRectangle.A4.getHeight() - height) / 2;
      try (PDPageContentStream contentStream = new PDPageContentStream(doc, page)) {
        contentStream.drawImage(pdImage, startWidth, startHeight, width, height);
      }
    }
    doc.addPage(page);
  }

  private static PDOutputIntent createColorScheme(PDDocument doc, byte[] colorProfile) throws IOException {
    PDOutputIntent intent = new PDOutputIntent(doc, new ByteArrayInputStream(colorProfile));
    intent.setInfo(OUTPUT_CONDITION_IDENTIFIER);
    intent.setOutputCondition(OUTPUT_CONDITION_IDENTIFIER);
    intent.setOutputConditionIdentifier(OUTPUT_CONDITION_IDENTIFIER);
    intent.setRegistryName("http://www.color.org");
    return intent;
  }

  private static boolean isPdf(File file) {
    try {
      PreflightParser preflightParser = new PreflightParser(file);
      preflightParser.parse();
      return true;
    } catch (Exception e) {
      return false;
    }
  }

  private static File streamToFile(Path tempDirectory, InputStream in) throws IOException {
    final Path tempFile = Files.createTempFile(tempDirectory, String.valueOf(System.currentTimeMillis()), ".tmp");
    try (FileOutputStream out = new FileOutputStream(tempFile.toFile())) {
      IOUtils.copy(in, out);
    }
    return tempFile.toFile();
  }
}

可以看看this gist也可以选择合并 pdf 文件。

关于Java - 使用 PDFBox 将多个图像合并为一个 PDF,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/39285329/

相关文章:

Java - PDFBox - 带有 JBIG2 图像的 PDF

java - Java 编译器 API 不起作用我一直无法解析类 javax.tools.JavaCompilerTool 和其他类似的类

java - 使用斯坦福 coreNLP 进行中文句子分割

java - spring 工具套件默认 mvc 项目 Web 应用程序

java - 为自定义 View 提供阴影效果

java - PDFBox 将 PDF 转换为 TIFF。减少图像大小(以字节为单位)

java - 如何在线程上实现有保证的 sleep 时间

pdf - 使用 PDFBox 从 PDF 中获取文本行的边界框

Java PDFBox 列出页面的所有指定目的地

java - 将 PDF 评论提取为 HTML