feat: add embeds metadata

This commit is contained in:
hubert.lenoir
2026-04-03 18:23:27 +02:00
committed by Julien Neuhart
parent eff9444294
commit 3187980ead
24 changed files with 848 additions and 24 deletions
+4 -1
View File
@@ -11,7 +11,7 @@ post {
}
body:multipart-form {
files: @file(../../test/integration/testdata/page-1-html/index.html)
files: @file(../test/integration/testdata/page-1-html/index.html)
~landscape: false
~printBackground: false
~scale: 1.0
@@ -50,6 +50,9 @@ body:multipart-form {
~metadata: {"Author":"Bruno","Title":"Test"}
~userPassword:
~ownerPassword:
~embeds: @file(../test/integration/testdata/embed_1.xml)
~embeds: @file(../test/integration/testdata/embed_2.xml)
~embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}}
~watermarkSource: text
~watermarkExpression: CONFIDENTIAL
~watermarkPages:
@@ -51,6 +51,9 @@ body:multipart-form {
~metadata: {"Author":"Bruno","Title":"Test"}
~userPassword:
~ownerPassword:
~embeds: @file(../test/integration/testdata/embed_1.xml)
~embeds: @file(../test/integration/testdata/embed_2.xml)
~embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}}
~watermarkSource: text
~watermarkExpression: CONFIDENTIAL
~watermarkPages:
+3
View File
@@ -50,6 +50,9 @@ body:multipart-form {
~metadata: {"Author":"Bruno","Title":"Test"}
~userPassword:
~ownerPassword:
~embeds: @file(../test/integration/testdata/embed_1.xml)
~embeds: @file(../test/integration/testdata/embed_2.xml)
~embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}}
~watermarkSource: text
~watermarkExpression: CONFIDENTIAL
~watermarkPages:
+4 -1
View File
@@ -11,7 +11,7 @@ post {
}
body:multipart-form {
files: @file(../../test/integration/testdata/page_1.docx)
files: @file(../test/integration/testdata/page_1.docx)
~password:
~landscape: false
~nativePageRanges:
@@ -67,6 +67,9 @@ body:multipart-form {
~metadata: {"Author":"Bruno","Title":"Test"}
~userPassword:
~ownerPassword:
~embeds: @file(../test/integration/testdata/embed_1.xml)
~embeds: @file(../test/integration/testdata/embed_2.xml)
~embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}}
~watermarkSource: text
~watermarkExpression: CONFIDENTIAL
~watermarkPages:
+4 -2
View File
@@ -11,8 +11,10 @@ post {
}
body:multipart-form {
files: @file(../../test/integration/testdata/page_1.pdf)
embeds: @file(../../test/integration/testdata/page_1.pdf)
files: @file(../test/integration/testdata/page_1.pdf)
embeds: @file(../test/integration/testdata/embed_1.xml)
embeds: @file(../test/integration/testdata/embed_2.xml)
embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}}
~downloadFrom: [{"url":"https://example.com/attachment.xml","embedded":true}]
}
+2 -1
View File
@@ -76,7 +76,8 @@ PDFENGINES_WATERMARK_ENGINES=pdfcpu,pdftk
PDFENGINES_STAMP_ENGINES=pdfcpu,pdftk
PDFENGINES_ENCRYPT_ENGINES=qpdf,pdfcpu,pdftk
PDFENGINES_ROTATE_ENGINES=pdfcpu,pdftk
PDFENGINES_EMBED_ENGINES=pdfcpu
PDFENGINES_EMBED_ENGINES=qpdf,pdfcpu
PDFENGINES_EMBED_METADATA_ENGINES=qpdf
PROMETHEUS_NAMESPACE=gotenberg
PROMETHEUS_COLLECT_INTERVAL=1s
PROMETHEUS_DISABLE_ROUTE_TELEMETRY=true
+1
View File
@@ -77,6 +77,7 @@ services:
- "--pdfengines-encrypt-engines=${PDFENGINES_ENCRYPT_ENGINES}"
- "--pdfengines-rotate-engines=${PDFENGINES_ROTATE_ENGINES}"
- "--pdfengines-embed-engines=${PDFENGINES_EMBED_ENGINES}"
- "--pdfengines-embed-metadata-engines=${PDFENGINES_EMBED_METADATA_ENGINES}"
- "--pdfengines-disable-routes=${PDFENGINES_DISABLE_ROUTES}"
- "--prometheus-namespace=${PROMETHEUS_NAMESPACE}"
- "--prometheus-collect-interval=${PROMETHEUS_COLLECT_INTERVAL}"
+19 -14
View File
@@ -45,20 +45,21 @@ func (mod *DebuggableMock) Debug() map[string]any {
//
//nolint:dupl
type PdfEngineMock struct {
MergeMock func(ctx context.Context, logger *slog.Logger, inputPaths []string, outputPath string) error
SplitMock func(ctx context.Context, logger *slog.Logger, mode SplitMode, inputPath, outputDirPath string) ([]string, error)
FlattenMock func(ctx context.Context, logger *slog.Logger, inputPath string) error
ConvertMock func(ctx context.Context, logger *slog.Logger, formats PdfFormats, inputPath, outputPath string) error
ReadMetadataMock func(ctx context.Context, logger *slog.Logger, inputPath string) (map[string]any, error)
PageCountMock func(ctx context.Context, logger *slog.Logger, inputPath string) (int, error)
WriteMetadataMock func(ctx context.Context, logger *slog.Logger, metadata map[string]any, inputPath string) error
ReadBookmarksMock func(ctx context.Context, logger *slog.Logger, inputPath string) ([]Bookmark, error)
EncryptMock func(ctx context.Context, logger *slog.Logger, inputPath, userPassword, ownerPassword string) error
EmbedFilesMock func(ctx context.Context, logger *slog.Logger, filePaths []string, inputPath string) error
WriteBookmarksMock func(ctx context.Context, logger *slog.Logger, inputPath string, bookmarks []Bookmark) error
WatermarkMock func(ctx context.Context, logger *slog.Logger, inputPath string, stamp Stamp) error
StampMock func(ctx context.Context, logger *slog.Logger, inputPath string, stamp Stamp) error
RotateMock func(ctx context.Context, logger *slog.Logger, inputPath string, angle int, pages string) error
MergeMock func(ctx context.Context, logger *slog.Logger, inputPaths []string, outputPath string) error
SplitMock func(ctx context.Context, logger *slog.Logger, mode SplitMode, inputPath, outputDirPath string) ([]string, error)
FlattenMock func(ctx context.Context, logger *slog.Logger, inputPath string) error
ConvertMock func(ctx context.Context, logger *slog.Logger, formats PdfFormats, inputPath, outputPath string) error
ReadMetadataMock func(ctx context.Context, logger *slog.Logger, inputPath string) (map[string]any, error)
PageCountMock func(ctx context.Context, logger *slog.Logger, inputPath string) (int, error)
WriteMetadataMock func(ctx context.Context, logger *slog.Logger, metadata map[string]any, inputPath string) error
ReadBookmarksMock func(ctx context.Context, logger *slog.Logger, inputPath string) ([]Bookmark, error)
EncryptMock func(ctx context.Context, logger *slog.Logger, inputPath, userPassword, ownerPassword string) error
EmbedFilesMock func(ctx context.Context, logger *slog.Logger, filePaths []string, inputPath string) error
EmbedFilesMetadataMock func(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error
WriteBookmarksMock func(ctx context.Context, logger *slog.Logger, inputPath string, bookmarks []Bookmark) error
WatermarkMock func(ctx context.Context, logger *slog.Logger, inputPath string, stamp Stamp) error
StampMock func(ctx context.Context, logger *slog.Logger, inputPath string, stamp Stamp) error
RotateMock func(ctx context.Context, logger *slog.Logger, inputPath string, angle int, pages string) error
}
func (engine *PdfEngineMock) Merge(ctx context.Context, logger *slog.Logger, inputPaths []string, outputPath string) error {
@@ -101,6 +102,10 @@ func (engine *PdfEngineMock) EmbedFiles(ctx context.Context, logger *slog.Logger
return engine.EmbedFilesMock(ctx, logger, filePaths, inputPath)
}
func (engine *PdfEngineMock) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
return engine.EmbedFilesMetadataMock(ctx, logger, metadata, inputPath)
}
func (engine *PdfEngineMock) WriteBookmarks(ctx context.Context, logger *slog.Logger, inputPath string, bookmarks []Bookmark) error {
return engine.WriteBookmarksMock(ctx, logger, inputPath, bookmarks)
}
+6
View File
@@ -201,6 +201,12 @@ type PdfEngine interface {
// TODO: attachments instead? Rename the route?
EmbedFiles(ctx context.Context, logger *slog.Logger, filePaths []string, inputPath string) error
// EmbedFilesMetadata sets metadata (such as MIME type and AFRelationship)
// on already-embedded files in a PDF. The metadata map is keyed by
// filename, with each value being a map of property names to values
// (e.g., "mimeType" and "relationship").
EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error
// Watermark applies a watermark (behind page content) to a PDF file.
Watermark(ctx context.Context, logger *slog.Logger, inputPath string, stamp Stamp) error
+33
View File
@@ -1,6 +1,7 @@
package api
import (
"encoding/json"
"errors"
"fmt"
"math"
@@ -391,6 +392,38 @@ func (form *FormData) Embeds(target *[]string) *FormData {
return form
}
// EmbedsMetadata parses the "embedsMetadata" form field (a JSON string) into
// a map keyed by filename. Each value is a map of property names to values
// (e.g., "mimeType" and "relationship").
//
// var metadata map[string]map[string]string
//
// ctx.FormData().EmbedsMetadata(&metadata)
func (form *FormData) EmbedsMetadata(target *map[string]map[string]string) *FormData {
if form.errors != nil {
return form
}
val, ok := form.values["embedsMetadata"]
if !ok || len(val) == 0 || val[0] == "" {
return form
}
raw := val[0]
parsed := make(map[string]map[string]string)
err := json.Unmarshal([]byte(raw), &parsed)
if err != nil {
form.append(
fmt.Errorf("form field 'embedsMetadata' is invalid: %w", err),
)
return form
}
*target = parsed
return form
}
// MandatoryPaths binds the absolute paths of form data files, according to a
// list of file extensions, to a string slice variable. It populates an error
// if there is no file for given file extensions.
+12 -4
View File
@@ -421,6 +421,7 @@ func convertUrlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
stamp := pdfengines.FormDataPdfStamp(form, false)
stampFile := pdfengines.FormDataPdfStampFile(form)
rotateAngle, rotatePages := pdfengines.FormDataPdfRotate(form, false)
embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form)
var url string
err := form.
@@ -437,7 +438,7 @@ func convertUrlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
stamp.Expression = stampFile
}
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, watermark, stamp, rotateAngle, rotatePages)
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, embedsMetadata, watermark, stamp, rotateAngle, rotatePages)
if err != nil {
return fmt.Errorf("convert URL to PDF: %w", err)
}
@@ -496,6 +497,7 @@ func convertHtmlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
stamp := pdfengines.FormDataPdfStamp(form, false)
stampFile := pdfengines.FormDataPdfStampFile(form)
rotateAngle, rotatePages := pdfengines.FormDataPdfRotate(form, false)
embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form)
var inputPath string
err := form.
@@ -514,7 +516,7 @@ func convertHtmlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
url := fmt.Sprintf("file://%s", inputPath)
options.AllowedFilePrefixes = []string{ctx.DirPath()}
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, watermark, stamp, rotateAngle, rotatePages)
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, embedsMetadata, watermark, stamp, rotateAngle, rotatePages)
if err != nil {
return fmt.Errorf("convert HTML to PDF: %w", err)
}
@@ -575,6 +577,7 @@ func convertMarkdownRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
stamp := pdfengines.FormDataPdfStamp(form, false)
stampFile := pdfengines.FormDataPdfStampFile(form)
rotateAngle, rotatePages := pdfengines.FormDataPdfRotate(form, false)
embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form)
var (
inputPath string
@@ -602,7 +605,7 @@ func convertMarkdownRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
}
options.AllowedFilePrefixes = []string{ctx.DirPath()}
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, watermark, stamp, rotateAngle, rotatePages)
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, embedsMetadata, watermark, stamp, rotateAngle, rotatePages)
if err != nil {
return fmt.Errorf("convert markdown to PDF: %w", err)
}
@@ -727,7 +730,7 @@ func markdownToHtml(ctx *api.Context, inputPath string, markdownPaths []string)
return fmt.Sprintf("file://%s", inputPath), nil
}
func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url string, options PdfOptions, mode gotenberg.SplitMode, pdfFormats gotenberg.PdfFormats, metadata map[string]any, userPassword, ownerPassword string, embedPaths []string, watermark, stamp gotenberg.Stamp, rotateAngle int, rotatePages string) error {
func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url string, options PdfOptions, mode gotenberg.SplitMode, pdfFormats gotenberg.PdfFormats, metadata map[string]any, userPassword, ownerPassword string, embedPaths []string, embedsMetadata map[string]map[string]string, watermark, stamp gotenberg.Stamp, rotateAngle int, rotatePages string) error {
outputPath := ctx.GeneratePath(".pdf")
// See https://github.com/gotenberg/gotenberg/issues/1130.
filename := ctx.OutputFilename(outputPath)
@@ -831,6 +834,11 @@ func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url
return fmt.Errorf("embed files into PDFs: %w", err)
}
err = pdfengines.EmbedFilesMetadataStub(ctx, engine, embedsMetadata, convertOutputPaths)
if err != nil {
return fmt.Errorf("set embeds metadata: %w", err)
}
err = pdfengines.EncryptPdfStub(ctx, engine, userPassword, ownerPassword, convertOutputPaths)
if err != nil {
return fmt.Errorf("encrypt PDFs: %w", err)
+5
View File
@@ -531,6 +531,11 @@ func (engine *ExifTool) Rotate(ctx context.Context, logger *slog.Logger, inputPa
return err
}
// EmbedFilesMetadata is not available in this implementation.
func (engine *ExifTool) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
return fmt.Errorf("set embeds metadata with ExifTool: %w", gotenberg.ErrPdfEngineMethodNotSupported)
}
// Interface guards.
var (
_ gotenberg.Module = (*ExifTool)(nil)
@@ -115,6 +115,11 @@ func (engine *LibreOfficePdfEngine) EmbedFiles(ctx context.Context, logger *slog
return fmt.Errorf("embed files with LibreOffice: %w", gotenberg.ErrPdfEngineMethodNotSupported)
}
// EmbedFilesMetadata is not available in this implementation.
func (engine *LibreOfficePdfEngine) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
return fmt.Errorf("set embeds metadata with LibreOffice: %w", gotenberg.ErrPdfEngineMethodNotSupported)
}
// Watermark is not available in this implementation.
func (engine *LibreOfficePdfEngine) Watermark(ctx context.Context, logger *slog.Logger, inputPath string, stamp gotenberg.Stamp) error {
return fmt.Errorf("watermark PDF with LibreOffice: %w", gotenberg.ErrPdfEngineMethodNotSupported)
+6
View File
@@ -37,6 +37,7 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap
stamp := pdfengines.FormDataPdfStamp(form, false)
stampFile := pdfengines.FormDataPdfStampFile(form)
angle, rotatePages := pdfengines.FormDataPdfRotate(form, false)
embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form)
zeroValuedSplitMode := gotenberg.SplitMode{}
@@ -495,6 +496,11 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap
return fmt.Errorf("embed files into PDFs: %w", err)
}
err = pdfengines.EmbedFilesMetadataStub(ctx, engine, embedsMetadata, outputPaths)
if err != nil {
return fmt.Errorf("set embeds metadata: %w", err)
}
err = pdfengines.EncryptPdfStub(ctx, engine, userPassword, ownerPassword, outputPaths)
if err != nil {
return fmt.Errorf("encrypt PDFs: %w", err)
+5
View File
@@ -447,6 +447,11 @@ func (engine *PdfCpu) WriteBookmarks(ctx context.Context, logger *slog.Logger, i
return nil
}
// EmbedFilesMetadata is not available in this implementation.
func (engine *PdfCpu) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
return fmt.Errorf("set embeds metadata with pdfcpu: %w", gotenberg.ErrPdfEngineMethodNotSupported)
}
// EmbedFiles embeds files into a PDF. All files are embedded as file attachments
// without modifying the main PDF content.
func (engine *PdfCpu) EmbedFiles(ctx context.Context, logger *slog.Logger, filePaths []string, inputPath string) error {
+40
View File
@@ -22,6 +22,7 @@ type multiPdfEngines struct {
writeMetadataEngines []gotenberg.PdfEngine
passwordEngines []gotenberg.PdfEngine
embedEngines []gotenberg.PdfEngine
embedMetadataEngines []gotenberg.PdfEngine
readBookmarksEngines []gotenberg.PdfEngine
writeBookmarksEngines []gotenberg.PdfEngine
watermarkEngines []gotenberg.PdfEngine
@@ -38,6 +39,7 @@ func newMultiPdfEngines(
writeMetadataEngines,
passwordEngines,
embedEngines,
embedMetadataEngines,
readBookmarksEngines,
writeBookmarksEngines,
watermarkEngines,
@@ -53,6 +55,7 @@ func newMultiPdfEngines(
writeMetadataEngines: writeMetadataEngines,
passwordEngines: passwordEngines,
embedEngines: embedEngines,
embedMetadataEngines: embedMetadataEngines,
readBookmarksEngines: readBookmarksEngines,
writeBookmarksEngines: writeBookmarksEngines,
watermarkEngines: watermarkEngines,
@@ -603,6 +606,43 @@ func (multi *multiPdfEngines) Rotate(ctx context.Context, logger *slog.Logger, i
return err
}
// EmbedFilesMetadata sets metadata on embedded files using the first available
// engine that supports it.
//
//nolint:dupl
func (multi *multiPdfEngines) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
tracer := gotenberg.Tracer()
ctx, span := tracer.Start(ctx, "pdfengines.EmbedFilesMetadata", trace.WithSpanKind(trace.SpanKindInternal))
defer span.End()
var err error
errChan := make(chan error, 1)
for _, engine := range multi.embedMetadataEngines {
go func(engine gotenberg.PdfEngine) {
errChan <- engine.EmbedFilesMetadata(ctx, logger, metadata, inputPath)
}(engine)
select {
case setErr := <-errChan:
if setErr != nil {
err = errors.Join(err, setErr)
} else {
span.SetStatus(codes.Ok, "")
return nil
}
case <-ctx.Done():
return ctx.Err()
}
}
err = fmt.Errorf("set embeds metadata using multi PDF engines: %w", err)
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
return err
}
// Interface guards.
var (
_ gotenberg.PdfEngine = (*multiPdfEngines)(nil)
+11
View File
@@ -36,6 +36,7 @@ type PdfEngines struct {
writeMetadataNames []string
encryptNames []string
embedNames []string
embedMetadataNames []string
readBookmarksNames []string
writeBookmarksNames []string
watermarkNames []string
@@ -59,6 +60,7 @@ func (mod *PdfEngines) Descriptor() gotenberg.ModuleDescriptor {
fs.StringSlice("pdfengines-write-metadata-engines", []string{"exiftool"}, "Set the PDF engines and their order for the write metadata feature - empty means all")
fs.StringSlice("pdfengines-encrypt-engines", []string{"qpdf", "pdftk", "pdfcpu"}, "Set the PDF engines and their order for the password protection feature - empty means all")
fs.StringSlice("pdfengines-embed-engines", []string{"pdfcpu"}, "Set the PDF engines and their order for the file embedding feature - empty means all")
fs.StringSlice("pdfengines-embed-metadata-engines", []string{"qpdf"}, "Set the PDF engines and their order for the embed metadata feature - empty means all")
fs.StringSlice("pdfengines-read-bookmarks-engines", []string{"pdfcpu"}, "Set the PDF engines and their order for the read bookmarks feature - empty means all")
fs.StringSlice("pdfengines-write-bookmarks-engines", []string{"pdfcpu"}, "Set the PDF engines and their order for the write bookmarks feature - empty means all")
fs.StringSlice("pdfengines-watermark-engines", []string{"pdfcpu", "pdftk"}, "Set the PDF engines and their order for the watermark feature - empty means all")
@@ -91,6 +93,7 @@ func (mod *PdfEngines) Provision(ctx *gotenberg.Context) error {
writeMetadataNames := flags.MustStringSlice("pdfengines-write-metadata-engines")
encryptNames := flags.MustStringSlice("pdfengines-encrypt-engines")
embedNames := flags.MustStringSlice("pdfengines-embed-engines")
embedMetadataNames := flags.MustStringSlice("pdfengines-embed-metadata-engines")
readBookmarksNames := flags.MustStringSlice("pdfengines-read-bookmarks-engines")
writeBookmarksNames := flags.MustStringSlice("pdfengines-write-bookmarks-engines")
watermarkNames := flags.MustStringSlice("pdfengines-watermark-engines")
@@ -162,6 +165,11 @@ func (mod *PdfEngines) Provision(ctx *gotenberg.Context) error {
mod.embedNames = embedNames
}
mod.embedMetadataNames = defaultNames
if len(embedMetadataNames) > 0 {
mod.embedMetadataNames = embedMetadataNames
}
mod.readBookmarksNames = defaultNames
if len(readBookmarksNames) > 0 {
mod.readBookmarksNames = readBookmarksNames
@@ -236,6 +244,7 @@ func (mod *PdfEngines) Validate() error {
findNonExistingEngines(mod.writeMetadataNames)
findNonExistingEngines(mod.encryptNames)
findNonExistingEngines(mod.embedNames)
findNonExistingEngines(mod.embedMetadataNames)
findNonExistingEngines(mod.readBookmarksNames)
findNonExistingEngines(mod.writeBookmarksNames)
findNonExistingEngines(mod.watermarkNames)
@@ -261,6 +270,7 @@ func (mod *PdfEngines) SystemMessages() []string {
fmt.Sprintf("write metadata engines - %s", strings.Join(mod.writeMetadataNames, " ")),
fmt.Sprintf("encrypt engines - %s", strings.Join(mod.encryptNames, " ")),
fmt.Sprintf("embed engines - %s", strings.Join(mod.embedNames, " ")),
fmt.Sprintf("embed metadata engines - %s", strings.Join(mod.embedMetadataNames, " ")),
fmt.Sprintf("read bookmarks engines - %s", strings.Join(mod.readBookmarksNames, " ")),
fmt.Sprintf("write bookmarks engines - %s", strings.Join(mod.writeBookmarksNames, " ")),
fmt.Sprintf("watermark engines - %s", strings.Join(mod.watermarkNames, " ")),
@@ -294,6 +304,7 @@ func (mod *PdfEngines) PdfEngine() (gotenberg.PdfEngine, error) {
engines(mod.writeMetadataNames),
engines(mod.encryptNames),
engines(mod.embedNames),
engines(mod.embedMetadataNames),
engines(mod.readBookmarksNames),
engines(mod.writeBookmarksNames),
engines(mod.watermarkNames),
+42
View File
@@ -443,6 +443,30 @@ func FormDataPdfEmbeds(form *api.FormData) []string {
return embedPaths
}
// FormDataPdfEmbedsMetadata extracts embeds metadata from form data.
// The "embedsMetadata" field is a JSON string keyed by filename.
func FormDataPdfEmbedsMetadata(form *api.FormData) map[string]map[string]string {
var metadata map[string]map[string]string
form.EmbedsMetadata(&metadata)
return metadata
}
// EmbedFilesMetadataStub sets metadata on embedded files in PDFs.
func EmbedFilesMetadataStub(ctx *api.Context, engine gotenberg.PdfEngine, metadata map[string]map[string]string, inputPaths []string) error {
if len(metadata) == 0 {
return nil
}
for _, inputPath := range inputPaths {
err := engine.EmbedFilesMetadata(ctx, ctx.Log(), metadata, inputPath)
if err != nil {
return fmt.Errorf("set embeds metadata on PDF '%s': %w", inputPath, err)
}
}
return nil
}
// FormDataPdfEncrypt extracts encryption parameters from form data.
func FormDataPdfEncrypt(form *api.FormData) (userPassword, ownerPassword string) {
form.String("userPassword", &userPassword, "")
@@ -638,6 +662,7 @@ func mergeRoute(engine gotenberg.PdfEngine) api.Route {
stamp := FormDataPdfStamp(form, false)
stampFile := FormDataPdfStampFile(form)
angle, rotatePages := FormDataPdfRotate(form, false)
embedsMetadata := FormDataPdfEmbedsMetadata(form)
var inputPaths []string
var flatten bool
@@ -754,6 +779,11 @@ func mergeRoute(engine gotenberg.PdfEngine) api.Route {
return fmt.Errorf("embed files into PDFs: %w", err)
}
err = EmbedFilesMetadataStub(ctx, engine, embedsMetadata, outputPaths)
if err != nil {
return fmt.Errorf("set embeds metadata: %w", err)
}
err = EncryptPdfStub(ctx, engine, userPassword, ownerPassword, outputPaths)
if err != nil {
return fmt.Errorf("encrypt PDFs: %w", err)
@@ -789,6 +819,7 @@ func splitRoute(engine gotenberg.PdfEngine) api.Route {
stamp := FormDataPdfStamp(form, false)
stampFile := FormDataPdfStampFile(form)
angle, rotatePages := FormDataPdfRotate(form, false)
embedsMetadata := FormDataPdfEmbedsMetadata(form)
var inputPaths []string
var flatten bool
@@ -856,6 +887,11 @@ func splitRoute(engine gotenberg.PdfEngine) api.Route {
return fmt.Errorf("embed files into PDFs: %w", err)
}
err = EmbedFilesMetadataStub(ctx, engine, embedsMetadata, convertOutputPaths)
if err != nil {
return fmt.Errorf("set embeds metadata: %w", err)
}
err = EncryptPdfStub(ctx, engine, userPassword, ownerPassword, convertOutputPaths)
if err != nil {
return fmt.Errorf("encrypt PDFs: %w", err)
@@ -1180,6 +1216,7 @@ func embedRoute(engine gotenberg.PdfEngine) api.Route {
form := ctx.FormData()
embedPaths := FormDataPdfEmbeds(form)
embedsMetadata := FormDataPdfEmbedsMetadata(form)
var inputPaths []string
err := form.
@@ -1193,6 +1230,11 @@ func embedRoute(engine gotenberg.PdfEngine) api.Route {
return fmt.Errorf("embed files into PDFs: %w", err)
}
err = EmbedFilesMetadataStub(ctx, engine, embedsMetadata, inputPaths)
if err != nil {
return fmt.Errorf("set embeds metadata: %w", err)
}
err = ctx.AddOutputPaths(inputPaths...)
if err != nil {
return fmt.Errorf("add output paths: %w", err)
+5
View File
@@ -495,6 +495,11 @@ func (engine *PdfTk) Rotate(ctx context.Context, logger *slog.Logger, inputPath
return nil
}
// EmbedFilesMetadata is not available in this implementation.
func (engine *PdfTk) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
return fmt.Errorf("set embeds metadata with PDFtk: %w", gotenberg.ErrPdfEngineMethodNotSupported)
}
// Interface guards.
var (
_ gotenberg.Module = (*PdfTk)(nil)
+287
View File
@@ -3,12 +3,14 @@ package qpdf
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"os"
"os/exec"
"path/filepath"
"strings"
"syscall"
"go.opentelemetry.io/otel/codes"
@@ -348,6 +350,291 @@ func (engine *QPdf) EmbedFiles(ctx context.Context, logger *slog.Logger, filePat
return err
}
// EmbedFilesMetadata sets metadata on already-embedded files in a PDF using
// QPDF's JSON manipulation. It sets /AFRelationship on Filespec objects,
// /Subtype on EmbeddedFile streams, and ensures the Catalog /AF array
// references the Filespec objects.
func (engine *QPdf) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
ctx, span := gotenberg.Tracer().Start(ctx, "qpdf.EmbedFilesMetadata",
trace.WithSpanKind(trace.SpanKindClient),
trace.WithAttributes(semconv.ServerAddress(engine.binPath)),
)
defer span.End()
if len(metadata) == 0 {
span.SetStatus(codes.Ok, "")
return nil
}
logger.DebugContext(ctx, fmt.Sprintf("setting embeds metadata on %s with QPDF", inputPath))
args := append([]string{inputPath}, engine.globalArgs...)
args = append(args, "--newline-before-endstream", "--json-output")
output, err := engine.execCaptureOutput(ctx, args...)
if err != nil {
err = fmt.Errorf("get PDF JSON with QPDF: %w", err)
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
return err
}
objects, err := parsePdfObjects(output)
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
return err
}
catalogRef, catalogValue, filespecRefs, updateObjects := patchFilespecMetadata(logger, objects, metadata)
if len(filespecRefs) == 0 {
span.SetStatus(codes.Ok, "")
return nil
}
patchCatalogAF(catalogRef, catalogValue, filespecRefs, updateObjects)
err = engine.writeAndApplyUpdate(ctx, logger, inputPath, updateObjects)
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
return err
}
span.SetStatus(codes.Ok, "")
return nil
}
// execCaptureOutput runs QPDF and returns its stdout. This uses
// exec.CommandContext directly because gotenberg.Cmd does not support
// capturing stdout (it only pipes to debug logs).
func (engine *QPdf) execCaptureOutput(ctx context.Context, args ...string) ([]byte, error) {
cmd := exec.CommandContext(ctx, engine.binPath, args...) //nolint:gosec
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
return cmd.Output()
}
// parsePdfObjects parses QPDF JSON v2 output and returns the objects map.
func parsePdfObjects(output []byte) (map[string]json.RawMessage, error) {
var pdfJSON struct {
Qpdf []json.RawMessage `json:"qpdf"`
}
if err := json.Unmarshal(output, &pdfJSON); err != nil {
return nil, fmt.Errorf("parse PDF JSON: %w", err)
}
if len(pdfJSON.Qpdf) < 2 {
return nil, fmt.Errorf("unexpected QPDF JSON structure: expected at least 2 elements")
}
var objects map[string]json.RawMessage
if err := json.Unmarshal(pdfJSON.Qpdf[1], &objects); err != nil {
return nil, fmt.Errorf("parse QPDF objects: %w", err)
}
return objects, nil
}
// patchFilespecMetadata walks QPDF objects to find Filespecs matching the
// metadata keys. It sets /AFRelationship and /Subtype on matching objects
// and returns the catalog reference, catalog value, filespec references,
// and the update objects map.
func patchFilespecMetadata(logger *slog.Logger, objects map[string]json.RawMessage, metadata map[string]map[string]string) (string, map[string]any, []string, map[string]any) {
updateObjects := make(map[string]any)
var catalogRef string
var catalogValue map[string]any
var filespecRefs []string
for ref, raw := range objects {
var obj map[string]json.RawMessage
if err := json.Unmarshal(raw, &obj); err != nil {
continue
}
valueRaw, hasValue := obj["value"]
if !hasValue {
continue
}
var value map[string]any
if err := json.Unmarshal(valueRaw, &value); err != nil {
continue
}
typeVal, _ := value["/Type"].(string)
if typeVal == "/Catalog" {
catalogRef = ref
catalogValue = value
}
if typeVal == "/Filespec" {
uf, _ := value["/UF"].(string)
if uf == "" {
uf, _ = value["/F"].(string)
}
cleanUf := stripQpdfStringPrefix(uf)
meta, exists := metadata[cleanUf]
if !exists {
continue
}
if rel, ok := meta["relationship"]; ok {
value["/AFRelationship"] = "/" + rel
}
if mimeType, ok := meta["mimeType"]; ok {
if ef, ok := value["/EF"].(map[string]any); ok {
efRef, _ := ef["/F"].(string)
if efRef != "" {
setStreamSubtype(logger, objects, updateObjects, efRef, mimeType)
}
}
}
filespecRefs = append(filespecRefs, ref)
updateObjects[ref] = map[string]any{"value": value}
}
}
return catalogRef, catalogValue, filespecRefs, updateObjects
}
// patchCatalogAF ensures the Catalog /AF array references all filespec objects.
func patchCatalogAF(catalogRef string, catalogValue map[string]any, filespecRefs []string, updateObjects map[string]any) {
if catalogRef == "" || catalogValue == nil {
return
}
afSet := make(map[string]bool)
existingAF, _ := catalogValue["/AF"].([]any)
for _, r := range existingAF {
if s, ok := r.(string); ok {
afSet[s] = true
}
}
for _, ref := range filespecRefs {
// Object references in values use "9 0 R" format,
// not the "obj:9 0 R" key format.
valRef := strings.TrimPrefix(ref, "obj:")
if !afSet[valRef] {
existingAF = append(existingAF, valRef)
}
}
catalogValue["/AF"] = existingAF
updateObjects[catalogRef] = map[string]any{"value": catalogValue}
}
// writeAndApplyUpdate marshals the update objects as QPDF JSON v2, writes
// them to a temp file, and applies the update via --update-from-json.
func (engine *QPdf) writeAndApplyUpdate(ctx context.Context, logger *slog.Logger, inputPath string, updateObjects map[string]any) error {
updateJSON := map[string]any{
"qpdf": []any{
map[string]any{
"jsonversion": 2,
"pushedinheritedpageresources": false,
"calledgetallpages": false,
"maxobjectid": 0,
},
updateObjects,
},
}
jsonBytes, err := json.Marshal(updateJSON)
if err != nil {
return fmt.Errorf("marshal update JSON: %w", err)
}
tmpFile, err := os.CreateTemp(filepath.Dir(inputPath), "qpdf-embeds-metadata-*.json")
if err != nil {
return fmt.Errorf("create temp file for update JSON: %w", err)
}
defer os.Remove(tmpFile.Name())
if _, err := tmpFile.Write(jsonBytes); err != nil {
tmpFile.Close()
return fmt.Errorf("write update JSON: %w", err)
}
if err := tmpFile.Close(); err != nil {
return fmt.Errorf("close temp file: %w", err)
}
updateArgs := make([]string, 0, 5+len(engine.globalArgs))
updateArgs = append(updateArgs, inputPath)
updateArgs = append(updateArgs, engine.globalArgs...)
updateArgs = append(updateArgs, "--newline-before-endstream")
updateArgs = append(updateArgs, "--update-from-json="+tmpFile.Name())
updateArgs = append(updateArgs, "--replace-input")
cmd, err := gotenberg.CommandContext(ctx, logger, engine.binPath, updateArgs...)
if err != nil {
return fmt.Errorf("create command for JSON update: %w", err)
}
_, err = cmd.Exec()
if err != nil {
return fmt.Errorf("update embeds metadata with QPDF: %w", err)
}
return nil
}
// setStreamSubtype finds a stream object by reference and sets the /Subtype
// key in its dict.
func setStreamSubtype(logger *slog.Logger, objects map[string]json.RawMessage, updateObjects map[string]any, ref, mimeType string) {
objKey := ref
if !strings.HasPrefix(objKey, "obj:") {
objKey = "obj:" + objKey
}
raw, ok := objects[objKey]
if !ok {
logger.Warn(fmt.Sprintf("set stream subtype on %s: object not found", ref))
return
}
var obj map[string]json.RawMessage
if err := json.Unmarshal(raw, &obj); err != nil {
logger.Warn(fmt.Sprintf("set stream subtype on %s: unmarshal object: %s", ref, err))
return
}
streamRaw, ok := obj["stream"]
if !ok {
logger.Warn(fmt.Sprintf("set stream subtype on %s: no stream key", ref))
return
}
var stream map[string]any
if err := json.Unmarshal(streamRaw, &stream); err != nil {
logger.Warn(fmt.Sprintf("set stream subtype on %s: unmarshal stream: %s", ref, err))
return
}
dict, ok := stream["dict"].(map[string]any)
if !ok {
logger.Warn(fmt.Sprintf("set stream subtype on %s: stream dict is not a map", ref))
return
}
// QPDF JSON uses literal name syntax; it handles PDF name
// encoding internally when writing the binary PDF.
dict["/Subtype"] = "/" + mimeType
stream["dict"] = dict
updateObjects[objKey] = map[string]any{"stream": stream}
}
// stripQpdfStringPrefix removes the type prefix that QPDF adds to JSON
// string values. Known prefixes: "u:" (Unicode), "b:" (binary), "e:" (encoded).
func stripQpdfStringPrefix(s string) string {
for _, prefix := range []string{"u:", "b:", "e:"} {
if strings.HasPrefix(s, prefix) {
return s[len(prefix):]
}
}
return s
}
// Watermark is not available in this implementation.
func (engine *QPdf) Watermark(ctx context.Context, logger *slog.Logger, inputPath string, stamp gotenberg.Stamp) error {
_, span := gotenberg.Tracer().Start(ctx, "qpdf.Watermark",
+271
View File
@@ -0,0 +1,271 @@
package qpdf
import (
"encoding/json"
"log/slog"
"os"
"testing"
)
func TestStripQpdfStringPrefix(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{"unicode prefix", "u:factur-x.xml", "factur-x.xml"},
{"binary prefix", "b:binary.bin", "binary.bin"},
{"encoded prefix", "e:encoded.txt", "encoded.txt"},
{"no prefix", "plain.xml", "plain.xml"},
{"empty string", "", ""},
{"prefix only", "u:", ""},
{"colon in value", "u:file:name.xml", "file:name.xml"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := stripQpdfStringPrefix(tt.input)
if got != tt.expected {
t.Errorf("stripQpdfStringPrefix(%q) = %q, want %q", tt.input, got, tt.expected)
}
})
}
}
func TestParsePdfObjects(t *testing.T) {
tests := []struct {
name string
input string
wantKeys []string
wantError bool
}{
{
name: "valid QPDF JSON v2",
input: `{"qpdf":[{"jsonversion":2},{"obj:1 0 R":{"value":{"/Type":"/Catalog"}}}]}`,
wantKeys: []string{"obj:1 0 R"},
},
{
name: "invalid JSON",
input: `not json`,
wantError: true,
},
{
name: "empty qpdf array",
input: `{"qpdf":[]}`,
wantError: true,
},
{
name: "only header element",
input: `{"qpdf":[{"jsonversion":2}]}`,
wantError: true,
},
{
name: "multiple objects",
input: `{"qpdf":[{},{"obj:1 0 R":{"value":{}},"obj:2 0 R":{"value":{}}}]}`,
wantKeys: []string{"obj:1 0 R", "obj:2 0 R"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
objects, err := parsePdfObjects([]byte(tt.input))
if tt.wantError {
if err == nil {
t.Error("expected error, got nil")
}
return
}
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
for _, key := range tt.wantKeys {
if _, ok := objects[key]; !ok {
t.Errorf("expected key %q in objects", key)
}
}
})
}
}
func TestPatchFilespecMetadata(t *testing.T) {
logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError}))
t.Run("sets AFRelationship on matching Filespec", func(t *testing.T) {
objects := map[string]json.RawMessage{
"obj:1 0 R": json.RawMessage(`{"value":{"/Type":"/Catalog"}}`),
"obj:2 0 R": json.RawMessage(`{"value":{"/Type":"/Filespec","/UF":"u:factur-x.xml"}}`),
}
metadata := map[string]map[string]string{
"factur-x.xml": {"relationship": "Data"},
}
catalogRef, _, filespecRefs, updateObjects := patchFilespecMetadata(logger, objects, metadata)
if catalogRef != "obj:1 0 R" {
t.Errorf("catalogRef = %q, want %q", catalogRef, "obj:1 0 R")
}
if len(filespecRefs) != 1 || filespecRefs[0] != "obj:2 0 R" {
t.Errorf("filespecRefs = %v, want [obj:2 0 R]", filespecRefs)
}
updated, ok := updateObjects["obj:2 0 R"]
if !ok {
t.Fatal("expected obj:2 0 R in updateObjects")
}
value := updated.(map[string]any)["value"].(map[string]any)
if value["/AFRelationship"] != "/Data" {
t.Errorf("/AFRelationship = %v, want /Data", value["/AFRelationship"])
}
})
t.Run("skips Filespec with no matching metadata", func(t *testing.T) {
objects := map[string]json.RawMessage{
"obj:1 0 R": json.RawMessage(`{"value":{"/Type":"/Filespec","/UF":"u:other.xml"}}`),
}
metadata := map[string]map[string]string{
"factur-x.xml": {"relationship": "Data"},
}
_, _, filespecRefs, _ := patchFilespecMetadata(logger, objects, metadata)
if len(filespecRefs) != 0 {
t.Errorf("filespecRefs = %v, want empty", filespecRefs)
}
})
t.Run("falls back to /F when /UF is absent", func(t *testing.T) {
objects := map[string]json.RawMessage{
"obj:1 0 R": json.RawMessage(`{"value":{"/Type":"/Filespec","/F":"u:factur-x.xml"}}`),
}
metadata := map[string]map[string]string{
"factur-x.xml": {"relationship": "Alternative"},
}
_, _, filespecRefs, updateObjects := patchFilespecMetadata(logger, objects, metadata)
if len(filespecRefs) != 1 {
t.Fatalf("filespecRefs = %v, want 1 entry", filespecRefs)
}
value := updateObjects["obj:1 0 R"].(map[string]any)["value"].(map[string]any)
if value["/AFRelationship"] != "/Alternative" {
t.Errorf("/AFRelationship = %v, want /Alternative", value["/AFRelationship"])
}
})
t.Run("sets stream Subtype via EF reference", func(t *testing.T) {
objects := map[string]json.RawMessage{
"obj:2 0 R": json.RawMessage(`{"value":{"/Type":"/Filespec","/UF":"u:factur-x.xml","/EF":{"/F":"3 0 R"}}}`),
"obj:3 0 R": json.RawMessage(`{"stream":{"dict":{"/Type":"/EmbeddedFile"}}}`),
}
metadata := map[string]map[string]string{
"factur-x.xml": {"mimeType": "text/xml"},
}
_, _, _, updateObjects := patchFilespecMetadata(logger, objects, metadata)
streamObj, ok := updateObjects["obj:3 0 R"]
if !ok {
t.Fatal("expected obj:3 0 R in updateObjects")
}
stream := streamObj.(map[string]any)["stream"].(map[string]any)
dict := stream["dict"].(map[string]any)
if dict["/Subtype"] != "/text/xml" {
t.Errorf("/Subtype = %v, want /text/xml", dict["/Subtype"])
}
})
}
func TestPatchCatalogAF(t *testing.T) {
t.Run("adds filespec refs to AF array", func(t *testing.T) {
catalogValue := map[string]any{"/Type": "/Catalog"}
updateObjects := make(map[string]any)
patchCatalogAF("obj:1 0 R", catalogValue, []string{"obj:2 0 R", "obj:3 0 R"}, updateObjects)
af, ok := catalogValue["/AF"].([]any)
if !ok {
t.Fatal("expected /AF to be []any")
}
if len(af) != 2 {
t.Fatalf("/AF has %d entries, want 2", len(af))
}
if af[0] != "2 0 R" || af[1] != "3 0 R" {
t.Errorf("/AF = %v, want [2 0 R, 3 0 R]", af)
}
})
t.Run("does not duplicate existing refs", func(t *testing.T) {
catalogValue := map[string]any{
"/Type": "/Catalog",
"/AF": []any{"2 0 R"},
}
updateObjects := make(map[string]any)
patchCatalogAF("obj:1 0 R", catalogValue, []string{"obj:2 0 R", "obj:3 0 R"}, updateObjects)
af := catalogValue["/AF"].([]any)
if len(af) != 2 {
t.Fatalf("/AF has %d entries, want 2", len(af))
}
})
t.Run("no-op when catalogRef is empty", func(t *testing.T) {
updateObjects := make(map[string]any)
patchCatalogAF("", nil, []string{"obj:2 0 R"}, updateObjects)
if len(updateObjects) != 0 {
t.Error("expected no updates for empty catalogRef")
}
})
}
func TestSetStreamSubtype(t *testing.T) {
logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError}))
t.Run("sets Subtype in stream dict", func(t *testing.T) {
objects := map[string]json.RawMessage{
"obj:3 0 R": json.RawMessage(`{"stream":{"dict":{"/Type":"/EmbeddedFile"}}}`),
}
updateObjects := make(map[string]any)
setStreamSubtype(logger, objects, updateObjects, "obj:3 0 R", "text/xml")
streamObj := updateObjects["obj:3 0 R"].(map[string]any)["stream"].(map[string]any)
dict := streamObj["dict"].(map[string]any)
if dict["/Subtype"] != "/text/xml" {
t.Errorf("/Subtype = %v, want /text/xml", dict["/Subtype"])
}
})
t.Run("auto-adds obj: prefix to ref", func(t *testing.T) {
objects := map[string]json.RawMessage{
"obj:5 0 R": json.RawMessage(`{"stream":{"dict":{}}}`),
}
updateObjects := make(map[string]any)
setStreamSubtype(logger, objects, updateObjects, "5 0 R", "application/pdf")
if _, ok := updateObjects["obj:5 0 R"]; !ok {
t.Error("expected obj:5 0 R in updateObjects")
}
})
t.Run("warns on missing object", func(t *testing.T) {
objects := map[string]json.RawMessage{}
updateObjects := make(map[string]any)
setStreamSubtype(logger, objects, updateObjects, "obj:99 0 R", "text/xml")
if len(updateObjects) != 0 {
t.Error("expected no updates for missing object")
}
})
t.Run("warns on object without stream key", func(t *testing.T) {
objects := map[string]json.RawMessage{
"obj:3 0 R": json.RawMessage(`{"value":{"/Type":"/Page"}}`),
}
updateObjects := make(map[string]any)
setStreamSubtype(logger, objects, updateObjects, "obj:3 0 R", "text/xml")
if len(updateObjects) != 0 {
t.Error("expected no updates for non-stream object")
}
})
}
@@ -17,6 +17,21 @@ Feature: /forms/pdfengines/embed
Then the response PDF(s) should have the "embed_1.xml" file embedded
Then the response PDF(s) should have the "embed_2.xml" file embedded
Scenario: POST /forms/pdfengines/embed with metadata
Given I have a default Gotenberg container
When I make a "POST" request to Gotenberg at the "/forms/pdfengines/embed" endpoint with the following form data and header(s):
| files | testdata/page_1.pdf | file |
| embeds | testdata/embed_1.xml | file |
| embeds | testdata/embed_2.xml | file |
| embedsMetadata | {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"},"embed_2.xml":{"mimeType":"text/xml","relationship":"Alternative"}} | field |
Then the response status code should be 200
And the response header "Content-Type" should be "application/pdf"
And there should be 1 PDF(s) in the response
And the response PDF(s) should have the "embed_1.xml" file embedded
And the response PDF(s) should have the "embed_1.xml" file embedded with relationship "Data"
And the response PDF(s) should have the "embed_2.xml" file embedded
And the response PDF(s) should have the "embed_2.xml" file embedded with relationship "Alternative"
@download-from
Scenario: POST /forms/pdfengines/embed with (Download From)
Given I have a default Gotenberg container
+2 -1
View File
@@ -9,6 +9,7 @@ import (
"github.com/moby/moby/api/types/container"
"github.com/testcontainers/testcontainers-go"
"github.com/testcontainers/testcontainers-go/exec"
"github.com/testcontainers/testcontainers-go/network"
"github.com/testcontainers/testcontainers-go/wait"
)
@@ -138,7 +139,7 @@ func execCommandInIntegrationToolsContainer(ctx context.Context, cmd []string, p
}
}(c, ctx)
_, output, err := c.Exec(ctx, cmd)
_, output, err := c.Exec(ctx, cmd, exec.Multiplexed())
if err != nil {
return "", fmt.Errorf("exec %q: %w", cmd, err)
}
+63
View File
@@ -1262,6 +1262,68 @@ func (s *scenario) thePdfsShouldHaveEmbeddedFile(ctx context.Context, kind, shou
return nil
}
func (s *scenario) thePdfsShouldHaveEmbeddedFileWithRelationship(ctx context.Context, kind, embed, relationship string) error {
dirPath := s.teststoreDir
_, err := os.Stat(dirPath)
if os.IsNotExist(err) {
return fmt.Errorf("directory %q does not exist", dirPath)
}
var paths []string
err = filepath.Walk(dirPath, func(path string, info os.FileInfo, pathErr error) error {
if pathErr != nil {
return pathErr
}
if strings.EqualFold(filepath.Ext(info.Name()), ".pdf") {
paths = append(paths, path)
}
return nil
})
if err != nil {
return fmt.Errorf("walk %q: %w", dirPath, err)
}
for _, path := range paths {
cmd := []string{
"verapdf",
"--off",
"--loglevel",
"0",
"--extract",
"embeddedFile",
filepath.Base(path),
}
output, err := execCommandInIntegrationToolsContainer(ctx, cmd, path)
if err != nil {
return fmt.Errorf("exec %q: %w", cmd, err)
}
fileNameTag := fmt.Sprintf("<fileName>%s</fileName>", embed)
relationshipTag := fmt.Sprintf("<afRelationship>%s</afRelationship>", relationship)
blocks := strings.Split(output, "</embeddedFile>")
found := false
for _, block := range blocks {
if !strings.Contains(block, fileNameTag) {
continue
}
if !strings.Contains(block, relationshipTag) {
return fmt.Errorf("embedded file %q missing afRelationship %q", embed, relationship)
}
found = true
break
}
if !found {
return fmt.Errorf("embedded file %q not found in verapdf output", embed)
}
}
return nil
}
func InitializeScenario(ctx *godog.ScenarioContext) {
s := &scenario{}
ctx.Before(func(ctx context.Context, sc *godog.Scenario) (context.Context, error) {
@@ -1300,6 +1362,7 @@ func InitializeScenario(ctx *godog.ScenarioContext) {
ctx.Then(`^the (response|webhook request) PDF\(s\) (should|should NOT) be flatten$`, s.thePdfsShouldBeFlatten)
ctx.Then(`^the (response|webhook request) PDF\(s\) (should|should NOT) be encrypted`, s.thePdfsShouldBeEncrypted)
ctx.Then(`^the (response|webhook request) PDF\(s\) (should|should NOT) have the "([^"]*)" file embedded$`, s.thePdfsShouldHaveEmbeddedFile)
ctx.Then(`^the (response|webhook request) PDF\(s\) should have the "([^"]*)" file embedded with relationship "([^"]*)"$`, s.thePdfsShouldHaveEmbeddedFileWithRelationship)
ctx.Then(`^the "([^"]*)" PDF should have (\d+) page\(s\)$`, s.thePdfShouldHavePages)
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) be set to landscape orientation$`, s.thePdfShouldBeSetToLandscapeOrientation)
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have the following content at page (\d+):$`, s.thePdfShouldHaveTheFollowingContentAtPage)