diff --git a/.bruno/Chromium/Convert/HTML to PDF.bru b/.bruno/Chromium/Convert/HTML to PDF.bru
index 7d219d7..34f201c 100644
--- a/.bruno/Chromium/Convert/HTML to PDF.bru
+++ b/.bruno/Chromium/Convert/HTML to PDF.bru
@@ -11,7 +11,7 @@ post {
}
body:multipart-form {
- files: @file(../../test/integration/testdata/page-1-html/index.html)
+ files: @file(../test/integration/testdata/page-1-html/index.html)
~landscape: false
~printBackground: false
~scale: 1.0
@@ -50,6 +50,9 @@ body:multipart-form {
~metadata: {"Author":"Bruno","Title":"Test"}
~userPassword:
~ownerPassword:
+ ~embeds: @file(../test/integration/testdata/embed_1.xml)
+ ~embeds: @file(../test/integration/testdata/embed_2.xml)
+ ~embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}}
~watermarkSource: text
~watermarkExpression: CONFIDENTIAL
~watermarkPages:
diff --git a/.bruno/Chromium/Convert/Markdown to PDF.bru b/.bruno/Chromium/Convert/Markdown to PDF.bru
index ac850fc..3b49f6a 100644
--- a/.bruno/Chromium/Convert/Markdown to PDF.bru
+++ b/.bruno/Chromium/Convert/Markdown to PDF.bru
@@ -51,6 +51,9 @@ body:multipart-form {
~metadata: {"Author":"Bruno","Title":"Test"}
~userPassword:
~ownerPassword:
+ ~embeds: @file(../test/integration/testdata/embed_1.xml)
+ ~embeds: @file(../test/integration/testdata/embed_2.xml)
+ ~embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}}
~watermarkSource: text
~watermarkExpression: CONFIDENTIAL
~watermarkPages:
diff --git a/.bruno/Chromium/Convert/URL to PDF.bru b/.bruno/Chromium/Convert/URL to PDF.bru
index a40f75d..d8fd198 100644
--- a/.bruno/Chromium/Convert/URL to PDF.bru
+++ b/.bruno/Chromium/Convert/URL to PDF.bru
@@ -50,6 +50,9 @@ body:multipart-form {
~metadata: {"Author":"Bruno","Title":"Test"}
~userPassword:
~ownerPassword:
+ ~embeds: @file(../test/integration/testdata/embed_1.xml)
+ ~embeds: @file(../test/integration/testdata/embed_2.xml)
+ ~embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}}
~watermarkSource: text
~watermarkExpression: CONFIDENTIAL
~watermarkPages:
diff --git a/.bruno/LibreOffice/Convert to PDF.bru b/.bruno/LibreOffice/Convert to PDF.bru
index 9081658..105e7a8 100644
--- a/.bruno/LibreOffice/Convert to PDF.bru
+++ b/.bruno/LibreOffice/Convert to PDF.bru
@@ -11,7 +11,7 @@ post {
}
body:multipart-form {
- files: @file(../../test/integration/testdata/page_1.docx)
+ files: @file(../test/integration/testdata/page_1.docx)
~password:
~landscape: false
~nativePageRanges:
@@ -67,6 +67,9 @@ body:multipart-form {
~metadata: {"Author":"Bruno","Title":"Test"}
~userPassword:
~ownerPassword:
+ ~embeds: @file(../test/integration/testdata/embed_1.xml)
+ ~embeds: @file(../test/integration/testdata/embed_2.xml)
+ ~embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}}
~watermarkSource: text
~watermarkExpression: CONFIDENTIAL
~watermarkPages:
diff --git a/.bruno/PDF Engines/Embed/Embed Files.bru b/.bruno/PDF Engines/Embed/Embed Files.bru
index 30bf349..6b45070 100644
--- a/.bruno/PDF Engines/Embed/Embed Files.bru
+++ b/.bruno/PDF Engines/Embed/Embed Files.bru
@@ -11,8 +11,10 @@ post {
}
body:multipart-form {
- files: @file(../../test/integration/testdata/page_1.pdf)
- embeds: @file(../../test/integration/testdata/page_1.pdf)
+ files: @file(../test/integration/testdata/page_1.pdf)
+ embeds: @file(../test/integration/testdata/embed_1.xml)
+ embeds: @file(../test/integration/testdata/embed_2.xml)
+ embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}}
~downloadFrom: [{"url":"https://example.com/attachment.xml","embedded":true}]
}
diff --git a/Makefile b/Makefile
index cef5aee..de1c9f5 100644
--- a/Makefile
+++ b/Makefile
@@ -76,7 +76,8 @@ PDFENGINES_WATERMARK_ENGINES=pdfcpu,pdftk
PDFENGINES_STAMP_ENGINES=pdfcpu,pdftk
PDFENGINES_ENCRYPT_ENGINES=qpdf,pdfcpu,pdftk
PDFENGINES_ROTATE_ENGINES=pdfcpu,pdftk
-PDFENGINES_EMBED_ENGINES=pdfcpu
+PDFENGINES_EMBED_ENGINES=qpdf,pdfcpu
+PDFENGINES_EMBED_METADATA_ENGINES=qpdf
PROMETHEUS_NAMESPACE=gotenberg
PROMETHEUS_COLLECT_INTERVAL=1s
PROMETHEUS_DISABLE_ROUTE_TELEMETRY=true
diff --git a/compose.yaml b/compose.yaml
index 728818b..4c1698f 100644
--- a/compose.yaml
+++ b/compose.yaml
@@ -77,6 +77,7 @@ services:
- "--pdfengines-encrypt-engines=${PDFENGINES_ENCRYPT_ENGINES}"
- "--pdfengines-rotate-engines=${PDFENGINES_ROTATE_ENGINES}"
- "--pdfengines-embed-engines=${PDFENGINES_EMBED_ENGINES}"
+ - "--pdfengines-embed-metadata-engines=${PDFENGINES_EMBED_METADATA_ENGINES}"
- "--pdfengines-disable-routes=${PDFENGINES_DISABLE_ROUTES}"
- "--prometheus-namespace=${PROMETHEUS_NAMESPACE}"
- "--prometheus-collect-interval=${PROMETHEUS_COLLECT_INTERVAL}"
diff --git a/pkg/gotenberg/mocks.go b/pkg/gotenberg/mocks.go
index ad87dfc..2b413dd 100644
--- a/pkg/gotenberg/mocks.go
+++ b/pkg/gotenberg/mocks.go
@@ -45,20 +45,21 @@ func (mod *DebuggableMock) Debug() map[string]any {
//
//nolint:dupl
type PdfEngineMock struct {
- MergeMock func(ctx context.Context, logger *slog.Logger, inputPaths []string, outputPath string) error
- SplitMock func(ctx context.Context, logger *slog.Logger, mode SplitMode, inputPath, outputDirPath string) ([]string, error)
- FlattenMock func(ctx context.Context, logger *slog.Logger, inputPath string) error
- ConvertMock func(ctx context.Context, logger *slog.Logger, formats PdfFormats, inputPath, outputPath string) error
- ReadMetadataMock func(ctx context.Context, logger *slog.Logger, inputPath string) (map[string]any, error)
- PageCountMock func(ctx context.Context, logger *slog.Logger, inputPath string) (int, error)
- WriteMetadataMock func(ctx context.Context, logger *slog.Logger, metadata map[string]any, inputPath string) error
- ReadBookmarksMock func(ctx context.Context, logger *slog.Logger, inputPath string) ([]Bookmark, error)
- EncryptMock func(ctx context.Context, logger *slog.Logger, inputPath, userPassword, ownerPassword string) error
- EmbedFilesMock func(ctx context.Context, logger *slog.Logger, filePaths []string, inputPath string) error
- WriteBookmarksMock func(ctx context.Context, logger *slog.Logger, inputPath string, bookmarks []Bookmark) error
- WatermarkMock func(ctx context.Context, logger *slog.Logger, inputPath string, stamp Stamp) error
- StampMock func(ctx context.Context, logger *slog.Logger, inputPath string, stamp Stamp) error
- RotateMock func(ctx context.Context, logger *slog.Logger, inputPath string, angle int, pages string) error
+ MergeMock func(ctx context.Context, logger *slog.Logger, inputPaths []string, outputPath string) error
+ SplitMock func(ctx context.Context, logger *slog.Logger, mode SplitMode, inputPath, outputDirPath string) ([]string, error)
+ FlattenMock func(ctx context.Context, logger *slog.Logger, inputPath string) error
+ ConvertMock func(ctx context.Context, logger *slog.Logger, formats PdfFormats, inputPath, outputPath string) error
+ ReadMetadataMock func(ctx context.Context, logger *slog.Logger, inputPath string) (map[string]any, error)
+ PageCountMock func(ctx context.Context, logger *slog.Logger, inputPath string) (int, error)
+ WriteMetadataMock func(ctx context.Context, logger *slog.Logger, metadata map[string]any, inputPath string) error
+ ReadBookmarksMock func(ctx context.Context, logger *slog.Logger, inputPath string) ([]Bookmark, error)
+ EncryptMock func(ctx context.Context, logger *slog.Logger, inputPath, userPassword, ownerPassword string) error
+ EmbedFilesMock func(ctx context.Context, logger *slog.Logger, filePaths []string, inputPath string) error
+ EmbedFilesMetadataMock func(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error
+ WriteBookmarksMock func(ctx context.Context, logger *slog.Logger, inputPath string, bookmarks []Bookmark) error
+ WatermarkMock func(ctx context.Context, logger *slog.Logger, inputPath string, stamp Stamp) error
+ StampMock func(ctx context.Context, logger *slog.Logger, inputPath string, stamp Stamp) error
+ RotateMock func(ctx context.Context, logger *slog.Logger, inputPath string, angle int, pages string) error
}
func (engine *PdfEngineMock) Merge(ctx context.Context, logger *slog.Logger, inputPaths []string, outputPath string) error {
@@ -101,6 +102,10 @@ func (engine *PdfEngineMock) EmbedFiles(ctx context.Context, logger *slog.Logger
return engine.EmbedFilesMock(ctx, logger, filePaths, inputPath)
}
+func (engine *PdfEngineMock) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
+ return engine.EmbedFilesMetadataMock(ctx, logger, metadata, inputPath)
+}
+
func (engine *PdfEngineMock) WriteBookmarks(ctx context.Context, logger *slog.Logger, inputPath string, bookmarks []Bookmark) error {
return engine.WriteBookmarksMock(ctx, logger, inputPath, bookmarks)
}
diff --git a/pkg/gotenberg/pdfengine.go b/pkg/gotenberg/pdfengine.go
index 0bb99bd..7f0e9d2 100644
--- a/pkg/gotenberg/pdfengine.go
+++ b/pkg/gotenberg/pdfengine.go
@@ -201,6 +201,12 @@ type PdfEngine interface {
// TODO: attachments instead? Rename the route?
EmbedFiles(ctx context.Context, logger *slog.Logger, filePaths []string, inputPath string) error
+ // EmbedFilesMetadata sets metadata (such as MIME type and AFRelationship)
+ // on already-embedded files in a PDF. The metadata map is keyed by
+ // filename, with each value being a map of property names to values
+ // (e.g., "mimeType" and "relationship").
+ EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error
+
// Watermark applies a watermark (behind page content) to a PDF file.
Watermark(ctx context.Context, logger *slog.Logger, inputPath string, stamp Stamp) error
diff --git a/pkg/modules/api/formdata.go b/pkg/modules/api/formdata.go
index d920e9f..631f3ab 100644
--- a/pkg/modules/api/formdata.go
+++ b/pkg/modules/api/formdata.go
@@ -1,6 +1,7 @@
package api
import (
+ "encoding/json"
"errors"
"fmt"
"math"
@@ -391,6 +392,38 @@ func (form *FormData) Embeds(target *[]string) *FormData {
return form
}
+// EmbedsMetadata parses the "embedsMetadata" form field (a JSON string) into
+// a map keyed by filename. Each value is a map of property names to values
+// (e.g., "mimeType" and "relationship").
+//
+// var metadata map[string]map[string]string
+//
+// ctx.FormData().EmbedsMetadata(&metadata)
+func (form *FormData) EmbedsMetadata(target *map[string]map[string]string) *FormData {
+ if form.errors != nil {
+ return form
+ }
+
+ val, ok := form.values["embedsMetadata"]
+ if !ok || len(val) == 0 || val[0] == "" {
+ return form
+ }
+
+ raw := val[0]
+ parsed := make(map[string]map[string]string)
+
+ err := json.Unmarshal([]byte(raw), &parsed)
+ if err != nil {
+ form.append(
+ fmt.Errorf("form field 'embedsMetadata' is invalid: %w", err),
+ )
+ return form
+ }
+
+ *target = parsed
+ return form
+}
+
// MandatoryPaths binds the absolute paths of form data files, according to a
// list of file extensions, to a string slice variable. It populates an error
// if there is no file for given file extensions.
diff --git a/pkg/modules/chromium/routes.go b/pkg/modules/chromium/routes.go
index eb8afb8..3a03568 100644
--- a/pkg/modules/chromium/routes.go
+++ b/pkg/modules/chromium/routes.go
@@ -421,6 +421,7 @@ func convertUrlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
stamp := pdfengines.FormDataPdfStamp(form, false)
stampFile := pdfengines.FormDataPdfStampFile(form)
rotateAngle, rotatePages := pdfengines.FormDataPdfRotate(form, false)
+ embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form)
var url string
err := form.
@@ -437,7 +438,7 @@ func convertUrlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
stamp.Expression = stampFile
}
- err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, watermark, stamp, rotateAngle, rotatePages)
+ err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, embedsMetadata, watermark, stamp, rotateAngle, rotatePages)
if err != nil {
return fmt.Errorf("convert URL to PDF: %w", err)
}
@@ -496,6 +497,7 @@ func convertHtmlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
stamp := pdfengines.FormDataPdfStamp(form, false)
stampFile := pdfengines.FormDataPdfStampFile(form)
rotateAngle, rotatePages := pdfengines.FormDataPdfRotate(form, false)
+ embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form)
var inputPath string
err := form.
@@ -514,7 +516,7 @@ func convertHtmlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
url := fmt.Sprintf("file://%s", inputPath)
options.AllowedFilePrefixes = []string{ctx.DirPath()}
- err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, watermark, stamp, rotateAngle, rotatePages)
+ err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, embedsMetadata, watermark, stamp, rotateAngle, rotatePages)
if err != nil {
return fmt.Errorf("convert HTML to PDF: %w", err)
}
@@ -575,6 +577,7 @@ func convertMarkdownRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
stamp := pdfengines.FormDataPdfStamp(form, false)
stampFile := pdfengines.FormDataPdfStampFile(form)
rotateAngle, rotatePages := pdfengines.FormDataPdfRotate(form, false)
+ embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form)
var (
inputPath string
@@ -602,7 +605,7 @@ func convertMarkdownRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
}
options.AllowedFilePrefixes = []string{ctx.DirPath()}
- err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, watermark, stamp, rotateAngle, rotatePages)
+ err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, embedsMetadata, watermark, stamp, rotateAngle, rotatePages)
if err != nil {
return fmt.Errorf("convert markdown to PDF: %w", err)
}
@@ -727,7 +730,7 @@ func markdownToHtml(ctx *api.Context, inputPath string, markdownPaths []string)
return fmt.Sprintf("file://%s", inputPath), nil
}
-func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url string, options PdfOptions, mode gotenberg.SplitMode, pdfFormats gotenberg.PdfFormats, metadata map[string]any, userPassword, ownerPassword string, embedPaths []string, watermark, stamp gotenberg.Stamp, rotateAngle int, rotatePages string) error {
+func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url string, options PdfOptions, mode gotenberg.SplitMode, pdfFormats gotenberg.PdfFormats, metadata map[string]any, userPassword, ownerPassword string, embedPaths []string, embedsMetadata map[string]map[string]string, watermark, stamp gotenberg.Stamp, rotateAngle int, rotatePages string) error {
outputPath := ctx.GeneratePath(".pdf")
// See https://github.com/gotenberg/gotenberg/issues/1130.
filename := ctx.OutputFilename(outputPath)
@@ -831,6 +834,11 @@ func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url
return fmt.Errorf("embed files into PDFs: %w", err)
}
+ err = pdfengines.EmbedFilesMetadataStub(ctx, engine, embedsMetadata, convertOutputPaths)
+ if err != nil {
+ return fmt.Errorf("set embeds metadata: %w", err)
+ }
+
err = pdfengines.EncryptPdfStub(ctx, engine, userPassword, ownerPassword, convertOutputPaths)
if err != nil {
return fmt.Errorf("encrypt PDFs: %w", err)
diff --git a/pkg/modules/exiftool/exiftool.go b/pkg/modules/exiftool/exiftool.go
index cde6f6a..1f71c31 100644
--- a/pkg/modules/exiftool/exiftool.go
+++ b/pkg/modules/exiftool/exiftool.go
@@ -531,6 +531,11 @@ func (engine *ExifTool) Rotate(ctx context.Context, logger *slog.Logger, inputPa
return err
}
+// EmbedFilesMetadata is not available in this implementation.
+func (engine *ExifTool) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
+ return fmt.Errorf("set embeds metadata with ExifTool: %w", gotenberg.ErrPdfEngineMethodNotSupported)
+}
+
// Interface guards.
var (
_ gotenberg.Module = (*ExifTool)(nil)
diff --git a/pkg/modules/libreoffice/pdfengine/pdfengine.go b/pkg/modules/libreoffice/pdfengine/pdfengine.go
index c9096a0..d451b5a 100644
--- a/pkg/modules/libreoffice/pdfengine/pdfengine.go
+++ b/pkg/modules/libreoffice/pdfengine/pdfengine.go
@@ -115,6 +115,11 @@ func (engine *LibreOfficePdfEngine) EmbedFiles(ctx context.Context, logger *slog
return fmt.Errorf("embed files with LibreOffice: %w", gotenberg.ErrPdfEngineMethodNotSupported)
}
+// EmbedFilesMetadata is not available in this implementation.
+func (engine *LibreOfficePdfEngine) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
+ return fmt.Errorf("set embeds metadata with LibreOffice: %w", gotenberg.ErrPdfEngineMethodNotSupported)
+}
+
// Watermark is not available in this implementation.
func (engine *LibreOfficePdfEngine) Watermark(ctx context.Context, logger *slog.Logger, inputPath string, stamp gotenberg.Stamp) error {
return fmt.Errorf("watermark PDF with LibreOffice: %w", gotenberg.ErrPdfEngineMethodNotSupported)
diff --git a/pkg/modules/libreoffice/routes.go b/pkg/modules/libreoffice/routes.go
index f3e996c..896f2a3 100644
--- a/pkg/modules/libreoffice/routes.go
+++ b/pkg/modules/libreoffice/routes.go
@@ -37,6 +37,7 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap
stamp := pdfengines.FormDataPdfStamp(form, false)
stampFile := pdfengines.FormDataPdfStampFile(form)
angle, rotatePages := pdfengines.FormDataPdfRotate(form, false)
+ embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form)
zeroValuedSplitMode := gotenberg.SplitMode{}
@@ -495,6 +496,11 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap
return fmt.Errorf("embed files into PDFs: %w", err)
}
+ err = pdfengines.EmbedFilesMetadataStub(ctx, engine, embedsMetadata, outputPaths)
+ if err != nil {
+ return fmt.Errorf("set embeds metadata: %w", err)
+ }
+
err = pdfengines.EncryptPdfStub(ctx, engine, userPassword, ownerPassword, outputPaths)
if err != nil {
return fmt.Errorf("encrypt PDFs: %w", err)
diff --git a/pkg/modules/pdfcpu/pdfcpu.go b/pkg/modules/pdfcpu/pdfcpu.go
index 889cb01..3af7fcc 100644
--- a/pkg/modules/pdfcpu/pdfcpu.go
+++ b/pkg/modules/pdfcpu/pdfcpu.go
@@ -447,6 +447,11 @@ func (engine *PdfCpu) WriteBookmarks(ctx context.Context, logger *slog.Logger, i
return nil
}
+// EmbedFilesMetadata is not available in this implementation.
+func (engine *PdfCpu) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
+ return fmt.Errorf("set embeds metadata with pdfcpu: %w", gotenberg.ErrPdfEngineMethodNotSupported)
+}
+
// EmbedFiles embeds files into a PDF. All files are embedded as file attachments
// without modifying the main PDF content.
func (engine *PdfCpu) EmbedFiles(ctx context.Context, logger *slog.Logger, filePaths []string, inputPath string) error {
diff --git a/pkg/modules/pdfengines/multi.go b/pkg/modules/pdfengines/multi.go
index 0c58e4e..5517c24 100644
--- a/pkg/modules/pdfengines/multi.go
+++ b/pkg/modules/pdfengines/multi.go
@@ -22,6 +22,7 @@ type multiPdfEngines struct {
writeMetadataEngines []gotenberg.PdfEngine
passwordEngines []gotenberg.PdfEngine
embedEngines []gotenberg.PdfEngine
+ embedMetadataEngines []gotenberg.PdfEngine
readBookmarksEngines []gotenberg.PdfEngine
writeBookmarksEngines []gotenberg.PdfEngine
watermarkEngines []gotenberg.PdfEngine
@@ -38,6 +39,7 @@ func newMultiPdfEngines(
writeMetadataEngines,
passwordEngines,
embedEngines,
+ embedMetadataEngines,
readBookmarksEngines,
writeBookmarksEngines,
watermarkEngines,
@@ -53,6 +55,7 @@ func newMultiPdfEngines(
writeMetadataEngines: writeMetadataEngines,
passwordEngines: passwordEngines,
embedEngines: embedEngines,
+ embedMetadataEngines: embedMetadataEngines,
readBookmarksEngines: readBookmarksEngines,
writeBookmarksEngines: writeBookmarksEngines,
watermarkEngines: watermarkEngines,
@@ -603,6 +606,43 @@ func (multi *multiPdfEngines) Rotate(ctx context.Context, logger *slog.Logger, i
return err
}
+// EmbedFilesMetadata sets metadata on embedded files using the first available
+// engine that supports it.
+//
+//nolint:dupl
+func (multi *multiPdfEngines) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
+ tracer := gotenberg.Tracer()
+ ctx, span := tracer.Start(ctx, "pdfengines.EmbedFilesMetadata", trace.WithSpanKind(trace.SpanKindInternal))
+ defer span.End()
+
+ var err error
+ errChan := make(chan error, 1)
+
+ for _, engine := range multi.embedMetadataEngines {
+ go func(engine gotenberg.PdfEngine) {
+ errChan <- engine.EmbedFilesMetadata(ctx, logger, metadata, inputPath)
+ }(engine)
+
+ select {
+ case setErr := <-errChan:
+ if setErr != nil {
+ err = errors.Join(err, setErr)
+ } else {
+ span.SetStatus(codes.Ok, "")
+ return nil
+ }
+ case <-ctx.Done():
+ return ctx.Err()
+ }
+ }
+
+ err = fmt.Errorf("set embeds metadata using multi PDF engines: %w", err)
+ span.RecordError(err)
+ span.SetStatus(codes.Error, err.Error())
+
+ return err
+}
+
// Interface guards.
var (
_ gotenberg.PdfEngine = (*multiPdfEngines)(nil)
diff --git a/pkg/modules/pdfengines/pdfengines.go b/pkg/modules/pdfengines/pdfengines.go
index 38c3ab0..5f01286 100644
--- a/pkg/modules/pdfengines/pdfengines.go
+++ b/pkg/modules/pdfengines/pdfengines.go
@@ -36,6 +36,7 @@ type PdfEngines struct {
writeMetadataNames []string
encryptNames []string
embedNames []string
+ embedMetadataNames []string
readBookmarksNames []string
writeBookmarksNames []string
watermarkNames []string
@@ -59,6 +60,7 @@ func (mod *PdfEngines) Descriptor() gotenberg.ModuleDescriptor {
fs.StringSlice("pdfengines-write-metadata-engines", []string{"exiftool"}, "Set the PDF engines and their order for the write metadata feature - empty means all")
fs.StringSlice("pdfengines-encrypt-engines", []string{"qpdf", "pdftk", "pdfcpu"}, "Set the PDF engines and their order for the password protection feature - empty means all")
fs.StringSlice("pdfengines-embed-engines", []string{"pdfcpu"}, "Set the PDF engines and their order for the file embedding feature - empty means all")
+ fs.StringSlice("pdfengines-embed-metadata-engines", []string{"qpdf"}, "Set the PDF engines and their order for the embed metadata feature - empty means all")
fs.StringSlice("pdfengines-read-bookmarks-engines", []string{"pdfcpu"}, "Set the PDF engines and their order for the read bookmarks feature - empty means all")
fs.StringSlice("pdfengines-write-bookmarks-engines", []string{"pdfcpu"}, "Set the PDF engines and their order for the write bookmarks feature - empty means all")
fs.StringSlice("pdfengines-watermark-engines", []string{"pdfcpu", "pdftk"}, "Set the PDF engines and their order for the watermark feature - empty means all")
@@ -91,6 +93,7 @@ func (mod *PdfEngines) Provision(ctx *gotenberg.Context) error {
writeMetadataNames := flags.MustStringSlice("pdfengines-write-metadata-engines")
encryptNames := flags.MustStringSlice("pdfengines-encrypt-engines")
embedNames := flags.MustStringSlice("pdfengines-embed-engines")
+ embedMetadataNames := flags.MustStringSlice("pdfengines-embed-metadata-engines")
readBookmarksNames := flags.MustStringSlice("pdfengines-read-bookmarks-engines")
writeBookmarksNames := flags.MustStringSlice("pdfengines-write-bookmarks-engines")
watermarkNames := flags.MustStringSlice("pdfengines-watermark-engines")
@@ -162,6 +165,11 @@ func (mod *PdfEngines) Provision(ctx *gotenberg.Context) error {
mod.embedNames = embedNames
}
+ mod.embedMetadataNames = defaultNames
+ if len(embedMetadataNames) > 0 {
+ mod.embedMetadataNames = embedMetadataNames
+ }
+
mod.readBookmarksNames = defaultNames
if len(readBookmarksNames) > 0 {
mod.readBookmarksNames = readBookmarksNames
@@ -236,6 +244,7 @@ func (mod *PdfEngines) Validate() error {
findNonExistingEngines(mod.writeMetadataNames)
findNonExistingEngines(mod.encryptNames)
findNonExistingEngines(mod.embedNames)
+ findNonExistingEngines(mod.embedMetadataNames)
findNonExistingEngines(mod.readBookmarksNames)
findNonExistingEngines(mod.writeBookmarksNames)
findNonExistingEngines(mod.watermarkNames)
@@ -261,6 +270,7 @@ func (mod *PdfEngines) SystemMessages() []string {
fmt.Sprintf("write metadata engines - %s", strings.Join(mod.writeMetadataNames, " ")),
fmt.Sprintf("encrypt engines - %s", strings.Join(mod.encryptNames, " ")),
fmt.Sprintf("embed engines - %s", strings.Join(mod.embedNames, " ")),
+ fmt.Sprintf("embed metadata engines - %s", strings.Join(mod.embedMetadataNames, " ")),
fmt.Sprintf("read bookmarks engines - %s", strings.Join(mod.readBookmarksNames, " ")),
fmt.Sprintf("write bookmarks engines - %s", strings.Join(mod.writeBookmarksNames, " ")),
fmt.Sprintf("watermark engines - %s", strings.Join(mod.watermarkNames, " ")),
@@ -294,6 +304,7 @@ func (mod *PdfEngines) PdfEngine() (gotenberg.PdfEngine, error) {
engines(mod.writeMetadataNames),
engines(mod.encryptNames),
engines(mod.embedNames),
+ engines(mod.embedMetadataNames),
engines(mod.readBookmarksNames),
engines(mod.writeBookmarksNames),
engines(mod.watermarkNames),
diff --git a/pkg/modules/pdfengines/routes.go b/pkg/modules/pdfengines/routes.go
index b1df965..4365ef7 100644
--- a/pkg/modules/pdfengines/routes.go
+++ b/pkg/modules/pdfengines/routes.go
@@ -443,6 +443,30 @@ func FormDataPdfEmbeds(form *api.FormData) []string {
return embedPaths
}
+// FormDataPdfEmbedsMetadata extracts embeds metadata from form data.
+// The "embedsMetadata" field is a JSON string keyed by filename.
+func FormDataPdfEmbedsMetadata(form *api.FormData) map[string]map[string]string {
+ var metadata map[string]map[string]string
+ form.EmbedsMetadata(&metadata)
+ return metadata
+}
+
+// EmbedFilesMetadataStub sets metadata on embedded files in PDFs.
+func EmbedFilesMetadataStub(ctx *api.Context, engine gotenberg.PdfEngine, metadata map[string]map[string]string, inputPaths []string) error {
+ if len(metadata) == 0 {
+ return nil
+ }
+
+ for _, inputPath := range inputPaths {
+ err := engine.EmbedFilesMetadata(ctx, ctx.Log(), metadata, inputPath)
+ if err != nil {
+ return fmt.Errorf("set embeds metadata on PDF '%s': %w", inputPath, err)
+ }
+ }
+
+ return nil
+}
+
// FormDataPdfEncrypt extracts encryption parameters from form data.
func FormDataPdfEncrypt(form *api.FormData) (userPassword, ownerPassword string) {
form.String("userPassword", &userPassword, "")
@@ -638,6 +662,7 @@ func mergeRoute(engine gotenberg.PdfEngine) api.Route {
stamp := FormDataPdfStamp(form, false)
stampFile := FormDataPdfStampFile(form)
angle, rotatePages := FormDataPdfRotate(form, false)
+ embedsMetadata := FormDataPdfEmbedsMetadata(form)
var inputPaths []string
var flatten bool
@@ -754,6 +779,11 @@ func mergeRoute(engine gotenberg.PdfEngine) api.Route {
return fmt.Errorf("embed files into PDFs: %w", err)
}
+ err = EmbedFilesMetadataStub(ctx, engine, embedsMetadata, outputPaths)
+ if err != nil {
+ return fmt.Errorf("set embeds metadata: %w", err)
+ }
+
err = EncryptPdfStub(ctx, engine, userPassword, ownerPassword, outputPaths)
if err != nil {
return fmt.Errorf("encrypt PDFs: %w", err)
@@ -789,6 +819,7 @@ func splitRoute(engine gotenberg.PdfEngine) api.Route {
stamp := FormDataPdfStamp(form, false)
stampFile := FormDataPdfStampFile(form)
angle, rotatePages := FormDataPdfRotate(form, false)
+ embedsMetadata := FormDataPdfEmbedsMetadata(form)
var inputPaths []string
var flatten bool
@@ -856,6 +887,11 @@ func splitRoute(engine gotenberg.PdfEngine) api.Route {
return fmt.Errorf("embed files into PDFs: %w", err)
}
+ err = EmbedFilesMetadataStub(ctx, engine, embedsMetadata, convertOutputPaths)
+ if err != nil {
+ return fmt.Errorf("set embeds metadata: %w", err)
+ }
+
err = EncryptPdfStub(ctx, engine, userPassword, ownerPassword, convertOutputPaths)
if err != nil {
return fmt.Errorf("encrypt PDFs: %w", err)
@@ -1180,6 +1216,7 @@ func embedRoute(engine gotenberg.PdfEngine) api.Route {
form := ctx.FormData()
embedPaths := FormDataPdfEmbeds(form)
+ embedsMetadata := FormDataPdfEmbedsMetadata(form)
var inputPaths []string
err := form.
@@ -1193,6 +1230,11 @@ func embedRoute(engine gotenberg.PdfEngine) api.Route {
return fmt.Errorf("embed files into PDFs: %w", err)
}
+ err = EmbedFilesMetadataStub(ctx, engine, embedsMetadata, inputPaths)
+ if err != nil {
+ return fmt.Errorf("set embeds metadata: %w", err)
+ }
+
err = ctx.AddOutputPaths(inputPaths...)
if err != nil {
return fmt.Errorf("add output paths: %w", err)
diff --git a/pkg/modules/pdftk/pdftk.go b/pkg/modules/pdftk/pdftk.go
index 87c2e10..fad32cf 100644
--- a/pkg/modules/pdftk/pdftk.go
+++ b/pkg/modules/pdftk/pdftk.go
@@ -495,6 +495,11 @@ func (engine *PdfTk) Rotate(ctx context.Context, logger *slog.Logger, inputPath
return nil
}
+// EmbedFilesMetadata is not available in this implementation.
+func (engine *PdfTk) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
+ return fmt.Errorf("set embeds metadata with PDFtk: %w", gotenberg.ErrPdfEngineMethodNotSupported)
+}
+
// Interface guards.
var (
_ gotenberg.Module = (*PdfTk)(nil)
diff --git a/pkg/modules/qpdf/qpdf.go b/pkg/modules/qpdf/qpdf.go
index b5bd67a..8a7f76d 100644
--- a/pkg/modules/qpdf/qpdf.go
+++ b/pkg/modules/qpdf/qpdf.go
@@ -3,12 +3,14 @@ package qpdf
import (
"bytes"
"context"
+ "encoding/json"
"errors"
"fmt"
"log/slog"
"os"
"os/exec"
"path/filepath"
+ "strings"
"syscall"
"go.opentelemetry.io/otel/codes"
@@ -348,6 +350,291 @@ func (engine *QPdf) EmbedFiles(ctx context.Context, logger *slog.Logger, filePat
return err
}
+// EmbedFilesMetadata sets metadata on already-embedded files in a PDF using
+// QPDF's JSON manipulation. It sets /AFRelationship on Filespec objects,
+// /Subtype on EmbeddedFile streams, and ensures the Catalog /AF array
+// references the Filespec objects.
+func (engine *QPdf) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
+ ctx, span := gotenberg.Tracer().Start(ctx, "qpdf.EmbedFilesMetadata",
+ trace.WithSpanKind(trace.SpanKindClient),
+ trace.WithAttributes(semconv.ServerAddress(engine.binPath)),
+ )
+ defer span.End()
+
+ if len(metadata) == 0 {
+ span.SetStatus(codes.Ok, "")
+ return nil
+ }
+
+ logger.DebugContext(ctx, fmt.Sprintf("setting embeds metadata on %s with QPDF", inputPath))
+
+ args := append([]string{inputPath}, engine.globalArgs...)
+ args = append(args, "--newline-before-endstream", "--json-output")
+
+ output, err := engine.execCaptureOutput(ctx, args...)
+ if err != nil {
+ err = fmt.Errorf("get PDF JSON with QPDF: %w", err)
+ span.RecordError(err)
+ span.SetStatus(codes.Error, err.Error())
+ return err
+ }
+
+ objects, err := parsePdfObjects(output)
+ if err != nil {
+ span.RecordError(err)
+ span.SetStatus(codes.Error, err.Error())
+ return err
+ }
+
+ catalogRef, catalogValue, filespecRefs, updateObjects := patchFilespecMetadata(logger, objects, metadata)
+ if len(filespecRefs) == 0 {
+ span.SetStatus(codes.Ok, "")
+ return nil
+ }
+
+ patchCatalogAF(catalogRef, catalogValue, filespecRefs, updateObjects)
+
+ err = engine.writeAndApplyUpdate(ctx, logger, inputPath, updateObjects)
+ if err != nil {
+ span.RecordError(err)
+ span.SetStatus(codes.Error, err.Error())
+ return err
+ }
+
+ span.SetStatus(codes.Ok, "")
+ return nil
+}
+
+// execCaptureOutput runs QPDF and returns its stdout. This uses
+// exec.CommandContext directly because gotenberg.Cmd does not support
+// capturing stdout (it only pipes to debug logs).
+func (engine *QPdf) execCaptureOutput(ctx context.Context, args ...string) ([]byte, error) {
+ cmd := exec.CommandContext(ctx, engine.binPath, args...) //nolint:gosec
+ cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
+ return cmd.Output()
+}
+
+// parsePdfObjects parses QPDF JSON v2 output and returns the objects map.
+func parsePdfObjects(output []byte) (map[string]json.RawMessage, error) {
+ var pdfJSON struct {
+ Qpdf []json.RawMessage `json:"qpdf"`
+ }
+ if err := json.Unmarshal(output, &pdfJSON); err != nil {
+ return nil, fmt.Errorf("parse PDF JSON: %w", err)
+ }
+ if len(pdfJSON.Qpdf) < 2 {
+ return nil, fmt.Errorf("unexpected QPDF JSON structure: expected at least 2 elements")
+ }
+
+ var objects map[string]json.RawMessage
+ if err := json.Unmarshal(pdfJSON.Qpdf[1], &objects); err != nil {
+ return nil, fmt.Errorf("parse QPDF objects: %w", err)
+ }
+
+ return objects, nil
+}
+
+// patchFilespecMetadata walks QPDF objects to find Filespecs matching the
+// metadata keys. It sets /AFRelationship and /Subtype on matching objects
+// and returns the catalog reference, catalog value, filespec references,
+// and the update objects map.
+func patchFilespecMetadata(logger *slog.Logger, objects map[string]json.RawMessage, metadata map[string]map[string]string) (string, map[string]any, []string, map[string]any) {
+ updateObjects := make(map[string]any)
+ var catalogRef string
+ var catalogValue map[string]any
+ var filespecRefs []string
+
+ for ref, raw := range objects {
+ var obj map[string]json.RawMessage
+ if err := json.Unmarshal(raw, &obj); err != nil {
+ continue
+ }
+
+ valueRaw, hasValue := obj["value"]
+ if !hasValue {
+ continue
+ }
+
+ var value map[string]any
+ if err := json.Unmarshal(valueRaw, &value); err != nil {
+ continue
+ }
+
+ typeVal, _ := value["/Type"].(string)
+
+ if typeVal == "/Catalog" {
+ catalogRef = ref
+ catalogValue = value
+ }
+
+ if typeVal == "/Filespec" {
+ uf, _ := value["/UF"].(string)
+ if uf == "" {
+ uf, _ = value["/F"].(string)
+ }
+
+ cleanUf := stripQpdfStringPrefix(uf)
+
+ meta, exists := metadata[cleanUf]
+ if !exists {
+ continue
+ }
+
+ if rel, ok := meta["relationship"]; ok {
+ value["/AFRelationship"] = "/" + rel
+ }
+
+ if mimeType, ok := meta["mimeType"]; ok {
+ if ef, ok := value["/EF"].(map[string]any); ok {
+ efRef, _ := ef["/F"].(string)
+ if efRef != "" {
+ setStreamSubtype(logger, objects, updateObjects, efRef, mimeType)
+ }
+ }
+ }
+
+ filespecRefs = append(filespecRefs, ref)
+ updateObjects[ref] = map[string]any{"value": value}
+ }
+ }
+
+ return catalogRef, catalogValue, filespecRefs, updateObjects
+}
+
+// patchCatalogAF ensures the Catalog /AF array references all filespec objects.
+func patchCatalogAF(catalogRef string, catalogValue map[string]any, filespecRefs []string, updateObjects map[string]any) {
+ if catalogRef == "" || catalogValue == nil {
+ return
+ }
+
+ afSet := make(map[string]bool)
+ existingAF, _ := catalogValue["/AF"].([]any)
+ for _, r := range existingAF {
+ if s, ok := r.(string); ok {
+ afSet[s] = true
+ }
+ }
+ for _, ref := range filespecRefs {
+ // Object references in values use "9 0 R" format,
+ // not the "obj:9 0 R" key format.
+ valRef := strings.TrimPrefix(ref, "obj:")
+ if !afSet[valRef] {
+ existingAF = append(existingAF, valRef)
+ }
+ }
+ catalogValue["/AF"] = existingAF
+ updateObjects[catalogRef] = map[string]any{"value": catalogValue}
+}
+
+// writeAndApplyUpdate marshals the update objects as QPDF JSON v2, writes
+// them to a temp file, and applies the update via --update-from-json.
+func (engine *QPdf) writeAndApplyUpdate(ctx context.Context, logger *slog.Logger, inputPath string, updateObjects map[string]any) error {
+ updateJSON := map[string]any{
+ "qpdf": []any{
+ map[string]any{
+ "jsonversion": 2,
+ "pushedinheritedpageresources": false,
+ "calledgetallpages": false,
+ "maxobjectid": 0,
+ },
+ updateObjects,
+ },
+ }
+
+ jsonBytes, err := json.Marshal(updateJSON)
+ if err != nil {
+ return fmt.Errorf("marshal update JSON: %w", err)
+ }
+
+ tmpFile, err := os.CreateTemp(filepath.Dir(inputPath), "qpdf-embeds-metadata-*.json")
+ if err != nil {
+ return fmt.Errorf("create temp file for update JSON: %w", err)
+ }
+ defer os.Remove(tmpFile.Name())
+
+ if _, err := tmpFile.Write(jsonBytes); err != nil {
+ tmpFile.Close()
+ return fmt.Errorf("write update JSON: %w", err)
+ }
+ if err := tmpFile.Close(); err != nil {
+ return fmt.Errorf("close temp file: %w", err)
+ }
+
+ updateArgs := make([]string, 0, 5+len(engine.globalArgs))
+ updateArgs = append(updateArgs, inputPath)
+ updateArgs = append(updateArgs, engine.globalArgs...)
+ updateArgs = append(updateArgs, "--newline-before-endstream")
+ updateArgs = append(updateArgs, "--update-from-json="+tmpFile.Name())
+ updateArgs = append(updateArgs, "--replace-input")
+
+ cmd, err := gotenberg.CommandContext(ctx, logger, engine.binPath, updateArgs...)
+ if err != nil {
+ return fmt.Errorf("create command for JSON update: %w", err)
+ }
+
+ _, err = cmd.Exec()
+ if err != nil {
+ return fmt.Errorf("update embeds metadata with QPDF: %w", err)
+ }
+
+ return nil
+}
+
+// setStreamSubtype finds a stream object by reference and sets the /Subtype
+// key in its dict.
+func setStreamSubtype(logger *slog.Logger, objects map[string]json.RawMessage, updateObjects map[string]any, ref, mimeType string) {
+ objKey := ref
+ if !strings.HasPrefix(objKey, "obj:") {
+ objKey = "obj:" + objKey
+ }
+ raw, ok := objects[objKey]
+ if !ok {
+ logger.Warn(fmt.Sprintf("set stream subtype on %s: object not found", ref))
+ return
+ }
+
+ var obj map[string]json.RawMessage
+ if err := json.Unmarshal(raw, &obj); err != nil {
+ logger.Warn(fmt.Sprintf("set stream subtype on %s: unmarshal object: %s", ref, err))
+ return
+ }
+
+ streamRaw, ok := obj["stream"]
+ if !ok {
+ logger.Warn(fmt.Sprintf("set stream subtype on %s: no stream key", ref))
+ return
+ }
+
+ var stream map[string]any
+ if err := json.Unmarshal(streamRaw, &stream); err != nil {
+ logger.Warn(fmt.Sprintf("set stream subtype on %s: unmarshal stream: %s", ref, err))
+ return
+ }
+
+ dict, ok := stream["dict"].(map[string]any)
+ if !ok {
+ logger.Warn(fmt.Sprintf("set stream subtype on %s: stream dict is not a map", ref))
+ return
+ }
+
+ // QPDF JSON uses literal name syntax; it handles PDF name
+ // encoding internally when writing the binary PDF.
+ dict["/Subtype"] = "/" + mimeType
+ stream["dict"] = dict
+ updateObjects[objKey] = map[string]any{"stream": stream}
+}
+
+// stripQpdfStringPrefix removes the type prefix that QPDF adds to JSON
+// string values. Known prefixes: "u:" (Unicode), "b:" (binary), "e:" (encoded).
+func stripQpdfStringPrefix(s string) string {
+ for _, prefix := range []string{"u:", "b:", "e:"} {
+ if strings.HasPrefix(s, prefix) {
+ return s[len(prefix):]
+ }
+ }
+ return s
+}
+
// Watermark is not available in this implementation.
func (engine *QPdf) Watermark(ctx context.Context, logger *slog.Logger, inputPath string, stamp gotenberg.Stamp) error {
_, span := gotenberg.Tracer().Start(ctx, "qpdf.Watermark",
diff --git a/pkg/modules/qpdf/qpdf_test.go b/pkg/modules/qpdf/qpdf_test.go
new file mode 100644
index 0000000..1c61569
--- /dev/null
+++ b/pkg/modules/qpdf/qpdf_test.go
@@ -0,0 +1,271 @@
+package qpdf
+
+import (
+ "encoding/json"
+ "log/slog"
+ "os"
+ "testing"
+)
+
+func TestStripQpdfStringPrefix(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expected string
+ }{
+ {"unicode prefix", "u:factur-x.xml", "factur-x.xml"},
+ {"binary prefix", "b:binary.bin", "binary.bin"},
+ {"encoded prefix", "e:encoded.txt", "encoded.txt"},
+ {"no prefix", "plain.xml", "plain.xml"},
+ {"empty string", "", ""},
+ {"prefix only", "u:", ""},
+ {"colon in value", "u:file:name.xml", "file:name.xml"},
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got := stripQpdfStringPrefix(tt.input)
+ if got != tt.expected {
+ t.Errorf("stripQpdfStringPrefix(%q) = %q, want %q", tt.input, got, tt.expected)
+ }
+ })
+ }
+}
+
+func TestParsePdfObjects(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ wantKeys []string
+ wantError bool
+ }{
+ {
+ name: "valid QPDF JSON v2",
+ input: `{"qpdf":[{"jsonversion":2},{"obj:1 0 R":{"value":{"/Type":"/Catalog"}}}]}`,
+ wantKeys: []string{"obj:1 0 R"},
+ },
+ {
+ name: "invalid JSON",
+ input: `not json`,
+ wantError: true,
+ },
+ {
+ name: "empty qpdf array",
+ input: `{"qpdf":[]}`,
+ wantError: true,
+ },
+ {
+ name: "only header element",
+ input: `{"qpdf":[{"jsonversion":2}]}`,
+ wantError: true,
+ },
+ {
+ name: "multiple objects",
+ input: `{"qpdf":[{},{"obj:1 0 R":{"value":{}},"obj:2 0 R":{"value":{}}}]}`,
+ wantKeys: []string{"obj:1 0 R", "obj:2 0 R"},
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ objects, err := parsePdfObjects([]byte(tt.input))
+ if tt.wantError {
+ if err == nil {
+ t.Error("expected error, got nil")
+ }
+ return
+ }
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ for _, key := range tt.wantKeys {
+ if _, ok := objects[key]; !ok {
+ t.Errorf("expected key %q in objects", key)
+ }
+ }
+ })
+ }
+}
+
+func TestPatchFilespecMetadata(t *testing.T) {
+ logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError}))
+
+ t.Run("sets AFRelationship on matching Filespec", func(t *testing.T) {
+ objects := map[string]json.RawMessage{
+ "obj:1 0 R": json.RawMessage(`{"value":{"/Type":"/Catalog"}}`),
+ "obj:2 0 R": json.RawMessage(`{"value":{"/Type":"/Filespec","/UF":"u:factur-x.xml"}}`),
+ }
+ metadata := map[string]map[string]string{
+ "factur-x.xml": {"relationship": "Data"},
+ }
+
+ catalogRef, _, filespecRefs, updateObjects := patchFilespecMetadata(logger, objects, metadata)
+
+ if catalogRef != "obj:1 0 R" {
+ t.Errorf("catalogRef = %q, want %q", catalogRef, "obj:1 0 R")
+ }
+ if len(filespecRefs) != 1 || filespecRefs[0] != "obj:2 0 R" {
+ t.Errorf("filespecRefs = %v, want [obj:2 0 R]", filespecRefs)
+ }
+ updated, ok := updateObjects["obj:2 0 R"]
+ if !ok {
+ t.Fatal("expected obj:2 0 R in updateObjects")
+ }
+ value := updated.(map[string]any)["value"].(map[string]any)
+ if value["/AFRelationship"] != "/Data" {
+ t.Errorf("/AFRelationship = %v, want /Data", value["/AFRelationship"])
+ }
+ })
+
+ t.Run("skips Filespec with no matching metadata", func(t *testing.T) {
+ objects := map[string]json.RawMessage{
+ "obj:1 0 R": json.RawMessage(`{"value":{"/Type":"/Filespec","/UF":"u:other.xml"}}`),
+ }
+ metadata := map[string]map[string]string{
+ "factur-x.xml": {"relationship": "Data"},
+ }
+
+ _, _, filespecRefs, _ := patchFilespecMetadata(logger, objects, metadata)
+ if len(filespecRefs) != 0 {
+ t.Errorf("filespecRefs = %v, want empty", filespecRefs)
+ }
+ })
+
+ t.Run("falls back to /F when /UF is absent", func(t *testing.T) {
+ objects := map[string]json.RawMessage{
+ "obj:1 0 R": json.RawMessage(`{"value":{"/Type":"/Filespec","/F":"u:factur-x.xml"}}`),
+ }
+ metadata := map[string]map[string]string{
+ "factur-x.xml": {"relationship": "Alternative"},
+ }
+
+ _, _, filespecRefs, updateObjects := patchFilespecMetadata(logger, objects, metadata)
+ if len(filespecRefs) != 1 {
+ t.Fatalf("filespecRefs = %v, want 1 entry", filespecRefs)
+ }
+ value := updateObjects["obj:1 0 R"].(map[string]any)["value"].(map[string]any)
+ if value["/AFRelationship"] != "/Alternative" {
+ t.Errorf("/AFRelationship = %v, want /Alternative", value["/AFRelationship"])
+ }
+ })
+
+ t.Run("sets stream Subtype via EF reference", func(t *testing.T) {
+ objects := map[string]json.RawMessage{
+ "obj:2 0 R": json.RawMessage(`{"value":{"/Type":"/Filespec","/UF":"u:factur-x.xml","/EF":{"/F":"3 0 R"}}}`),
+ "obj:3 0 R": json.RawMessage(`{"stream":{"dict":{"/Type":"/EmbeddedFile"}}}`),
+ }
+ metadata := map[string]map[string]string{
+ "factur-x.xml": {"mimeType": "text/xml"},
+ }
+
+ _, _, _, updateObjects := patchFilespecMetadata(logger, objects, metadata)
+ streamObj, ok := updateObjects["obj:3 0 R"]
+ if !ok {
+ t.Fatal("expected obj:3 0 R in updateObjects")
+ }
+ stream := streamObj.(map[string]any)["stream"].(map[string]any)
+ dict := stream["dict"].(map[string]any)
+ if dict["/Subtype"] != "/text/xml" {
+ t.Errorf("/Subtype = %v, want /text/xml", dict["/Subtype"])
+ }
+ })
+}
+
+func TestPatchCatalogAF(t *testing.T) {
+ t.Run("adds filespec refs to AF array", func(t *testing.T) {
+ catalogValue := map[string]any{"/Type": "/Catalog"}
+ updateObjects := make(map[string]any)
+
+ patchCatalogAF("obj:1 0 R", catalogValue, []string{"obj:2 0 R", "obj:3 0 R"}, updateObjects)
+
+ af, ok := catalogValue["/AF"].([]any)
+ if !ok {
+ t.Fatal("expected /AF to be []any")
+ }
+ if len(af) != 2 {
+ t.Fatalf("/AF has %d entries, want 2", len(af))
+ }
+ if af[0] != "2 0 R" || af[1] != "3 0 R" {
+ t.Errorf("/AF = %v, want [2 0 R, 3 0 R]", af)
+ }
+ })
+
+ t.Run("does not duplicate existing refs", func(t *testing.T) {
+ catalogValue := map[string]any{
+ "/Type": "/Catalog",
+ "/AF": []any{"2 0 R"},
+ }
+ updateObjects := make(map[string]any)
+
+ patchCatalogAF("obj:1 0 R", catalogValue, []string{"obj:2 0 R", "obj:3 0 R"}, updateObjects)
+
+ af := catalogValue["/AF"].([]any)
+ if len(af) != 2 {
+ t.Fatalf("/AF has %d entries, want 2", len(af))
+ }
+ })
+
+ t.Run("no-op when catalogRef is empty", func(t *testing.T) {
+ updateObjects := make(map[string]any)
+ patchCatalogAF("", nil, []string{"obj:2 0 R"}, updateObjects)
+ if len(updateObjects) != 0 {
+ t.Error("expected no updates for empty catalogRef")
+ }
+ })
+}
+
+func TestSetStreamSubtype(t *testing.T) {
+ logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError}))
+
+ t.Run("sets Subtype in stream dict", func(t *testing.T) {
+ objects := map[string]json.RawMessage{
+ "obj:3 0 R": json.RawMessage(`{"stream":{"dict":{"/Type":"/EmbeddedFile"}}}`),
+ }
+ updateObjects := make(map[string]any)
+
+ setStreamSubtype(logger, objects, updateObjects, "obj:3 0 R", "text/xml")
+
+ streamObj := updateObjects["obj:3 0 R"].(map[string]any)["stream"].(map[string]any)
+ dict := streamObj["dict"].(map[string]any)
+ if dict["/Subtype"] != "/text/xml" {
+ t.Errorf("/Subtype = %v, want /text/xml", dict["/Subtype"])
+ }
+ })
+
+ t.Run("auto-adds obj: prefix to ref", func(t *testing.T) {
+ objects := map[string]json.RawMessage{
+ "obj:5 0 R": json.RawMessage(`{"stream":{"dict":{}}}`),
+ }
+ updateObjects := make(map[string]any)
+
+ setStreamSubtype(logger, objects, updateObjects, "5 0 R", "application/pdf")
+
+ if _, ok := updateObjects["obj:5 0 R"]; !ok {
+ t.Error("expected obj:5 0 R in updateObjects")
+ }
+ })
+
+ t.Run("warns on missing object", func(t *testing.T) {
+ objects := map[string]json.RawMessage{}
+ updateObjects := make(map[string]any)
+
+ setStreamSubtype(logger, objects, updateObjects, "obj:99 0 R", "text/xml")
+
+ if len(updateObjects) != 0 {
+ t.Error("expected no updates for missing object")
+ }
+ })
+
+ t.Run("warns on object without stream key", func(t *testing.T) {
+ objects := map[string]json.RawMessage{
+ "obj:3 0 R": json.RawMessage(`{"value":{"/Type":"/Page"}}`),
+ }
+ updateObjects := make(map[string]any)
+
+ setStreamSubtype(logger, objects, updateObjects, "obj:3 0 R", "text/xml")
+
+ if len(updateObjects) != 0 {
+ t.Error("expected no updates for non-stream object")
+ }
+ })
+}
diff --git a/test/integration/features/pdfengines_embed.feature b/test/integration/features/pdfengines_embed.feature
index 4bf9ca7..18543e2 100644
--- a/test/integration/features/pdfengines_embed.feature
+++ b/test/integration/features/pdfengines_embed.feature
@@ -17,6 +17,21 @@ Feature: /forms/pdfengines/embed
Then the response PDF(s) should have the "embed_1.xml" file embedded
Then the response PDF(s) should have the "embed_2.xml" file embedded
+ Scenario: POST /forms/pdfengines/embed with metadata
+ Given I have a default Gotenberg container
+ When I make a "POST" request to Gotenberg at the "/forms/pdfengines/embed" endpoint with the following form data and header(s):
+ | files | testdata/page_1.pdf | file |
+ | embeds | testdata/embed_1.xml | file |
+ | embeds | testdata/embed_2.xml | file |
+ | embedsMetadata | {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"},"embed_2.xml":{"mimeType":"text/xml","relationship":"Alternative"}} | field |
+ Then the response status code should be 200
+ And the response header "Content-Type" should be "application/pdf"
+ And there should be 1 PDF(s) in the response
+ And the response PDF(s) should have the "embed_1.xml" file embedded
+ And the response PDF(s) should have the "embed_1.xml" file embedded with relationship "Data"
+ And the response PDF(s) should have the "embed_2.xml" file embedded
+ And the response PDF(s) should have the "embed_2.xml" file embedded with relationship "Alternative"
+
@download-from
Scenario: POST /forms/pdfengines/embed with (Download From)
Given I have a default Gotenberg container
diff --git a/test/integration/scenario/containers.go b/test/integration/scenario/containers.go
index 90abab4..6ee0ac1 100644
--- a/test/integration/scenario/containers.go
+++ b/test/integration/scenario/containers.go
@@ -9,6 +9,7 @@ import (
"github.com/moby/moby/api/types/container"
"github.com/testcontainers/testcontainers-go"
+ "github.com/testcontainers/testcontainers-go/exec"
"github.com/testcontainers/testcontainers-go/network"
"github.com/testcontainers/testcontainers-go/wait"
)
@@ -138,7 +139,7 @@ func execCommandInIntegrationToolsContainer(ctx context.Context, cmd []string, p
}
}(c, ctx)
- _, output, err := c.Exec(ctx, cmd)
+ _, output, err := c.Exec(ctx, cmd, exec.Multiplexed())
if err != nil {
return "", fmt.Errorf("exec %q: %w", cmd, err)
}
diff --git a/test/integration/scenario/scenario.go b/test/integration/scenario/scenario.go
index 95dd136..419f77c 100644
--- a/test/integration/scenario/scenario.go
+++ b/test/integration/scenario/scenario.go
@@ -1262,6 +1262,68 @@ func (s *scenario) thePdfsShouldHaveEmbeddedFile(ctx context.Context, kind, shou
return nil
}
+func (s *scenario) thePdfsShouldHaveEmbeddedFileWithRelationship(ctx context.Context, kind, embed, relationship string) error {
+ dirPath := s.teststoreDir
+
+ _, err := os.Stat(dirPath)
+ if os.IsNotExist(err) {
+ return fmt.Errorf("directory %q does not exist", dirPath)
+ }
+
+ var paths []string
+ err = filepath.Walk(dirPath, func(path string, info os.FileInfo, pathErr error) error {
+ if pathErr != nil {
+ return pathErr
+ }
+ if strings.EqualFold(filepath.Ext(info.Name()), ".pdf") {
+ paths = append(paths, path)
+ }
+ return nil
+ })
+ if err != nil {
+ return fmt.Errorf("walk %q: %w", dirPath, err)
+ }
+
+ for _, path := range paths {
+ cmd := []string{
+ "verapdf",
+ "--off",
+ "--loglevel",
+ "0",
+ "--extract",
+ "embeddedFile",
+ filepath.Base(path),
+ }
+
+ output, err := execCommandInIntegrationToolsContainer(ctx, cmd, path)
+ if err != nil {
+ return fmt.Errorf("exec %q: %w", cmd, err)
+ }
+
+ fileNameTag := fmt.Sprintf("%s", embed)
+ relationshipTag := fmt.Sprintf("%s", relationship)
+
+ blocks := strings.Split(output, "")
+ found := false
+ for _, block := range blocks {
+ if !strings.Contains(block, fileNameTag) {
+ continue
+ }
+ if !strings.Contains(block, relationshipTag) {
+ return fmt.Errorf("embedded file %q missing afRelationship %q", embed, relationship)
+ }
+ found = true
+ break
+ }
+
+ if !found {
+ return fmt.Errorf("embedded file %q not found in verapdf output", embed)
+ }
+ }
+
+ return nil
+}
+
func InitializeScenario(ctx *godog.ScenarioContext) {
s := &scenario{}
ctx.Before(func(ctx context.Context, sc *godog.Scenario) (context.Context, error) {
@@ -1300,6 +1362,7 @@ func InitializeScenario(ctx *godog.ScenarioContext) {
ctx.Then(`^the (response|webhook request) PDF\(s\) (should|should NOT) be flatten$`, s.thePdfsShouldBeFlatten)
ctx.Then(`^the (response|webhook request) PDF\(s\) (should|should NOT) be encrypted`, s.thePdfsShouldBeEncrypted)
ctx.Then(`^the (response|webhook request) PDF\(s\) (should|should NOT) have the "([^"]*)" file embedded$`, s.thePdfsShouldHaveEmbeddedFile)
+ ctx.Then(`^the (response|webhook request) PDF\(s\) should have the "([^"]*)" file embedded with relationship "([^"]*)"$`, s.thePdfsShouldHaveEmbeddedFileWithRelationship)
ctx.Then(`^the "([^"]*)" PDF should have (\d+) page\(s\)$`, s.thePdfShouldHavePages)
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) be set to landscape orientation$`, s.thePdfShouldBeSetToLandscapeOrientation)
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have the following content at page (\d+):$`, s.thePdfShouldHaveTheFollowingContentAtPage)