mirror of
https://github.com/gotenberg/gotenberg.git
synced 2026-07-02 00:17:40 +08:00
feat: add embeds metadata
This commit is contained in:
committed by
Julien Neuhart
parent
eff9444294
commit
3187980ead
@@ -11,7 +11,7 @@ post {
|
||||
}
|
||||
|
||||
body:multipart-form {
|
||||
files: @file(../../test/integration/testdata/page-1-html/index.html)
|
||||
files: @file(../test/integration/testdata/page-1-html/index.html)
|
||||
~landscape: false
|
||||
~printBackground: false
|
||||
~scale: 1.0
|
||||
@@ -50,6 +50,9 @@ body:multipart-form {
|
||||
~metadata: {"Author":"Bruno","Title":"Test"}
|
||||
~userPassword:
|
||||
~ownerPassword:
|
||||
~embeds: @file(../test/integration/testdata/embed_1.xml)
|
||||
~embeds: @file(../test/integration/testdata/embed_2.xml)
|
||||
~embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}}
|
||||
~watermarkSource: text
|
||||
~watermarkExpression: CONFIDENTIAL
|
||||
~watermarkPages:
|
||||
|
||||
@@ -51,6 +51,9 @@ body:multipart-form {
|
||||
~metadata: {"Author":"Bruno","Title":"Test"}
|
||||
~userPassword:
|
||||
~ownerPassword:
|
||||
~embeds: @file(../test/integration/testdata/embed_1.xml)
|
||||
~embeds: @file(../test/integration/testdata/embed_2.xml)
|
||||
~embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}}
|
||||
~watermarkSource: text
|
||||
~watermarkExpression: CONFIDENTIAL
|
||||
~watermarkPages:
|
||||
|
||||
@@ -50,6 +50,9 @@ body:multipart-form {
|
||||
~metadata: {"Author":"Bruno","Title":"Test"}
|
||||
~userPassword:
|
||||
~ownerPassword:
|
||||
~embeds: @file(../test/integration/testdata/embed_1.xml)
|
||||
~embeds: @file(../test/integration/testdata/embed_2.xml)
|
||||
~embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}}
|
||||
~watermarkSource: text
|
||||
~watermarkExpression: CONFIDENTIAL
|
||||
~watermarkPages:
|
||||
|
||||
@@ -11,7 +11,7 @@ post {
|
||||
}
|
||||
|
||||
body:multipart-form {
|
||||
files: @file(../../test/integration/testdata/page_1.docx)
|
||||
files: @file(../test/integration/testdata/page_1.docx)
|
||||
~password:
|
||||
~landscape: false
|
||||
~nativePageRanges:
|
||||
@@ -67,6 +67,9 @@ body:multipart-form {
|
||||
~metadata: {"Author":"Bruno","Title":"Test"}
|
||||
~userPassword:
|
||||
~ownerPassword:
|
||||
~embeds: @file(../test/integration/testdata/embed_1.xml)
|
||||
~embeds: @file(../test/integration/testdata/embed_2.xml)
|
||||
~embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}}
|
||||
~watermarkSource: text
|
||||
~watermarkExpression: CONFIDENTIAL
|
||||
~watermarkPages:
|
||||
|
||||
@@ -11,8 +11,10 @@ post {
|
||||
}
|
||||
|
||||
body:multipart-form {
|
||||
files: @file(../../test/integration/testdata/page_1.pdf)
|
||||
embeds: @file(../../test/integration/testdata/page_1.pdf)
|
||||
files: @file(../test/integration/testdata/page_1.pdf)
|
||||
embeds: @file(../test/integration/testdata/embed_1.xml)
|
||||
embeds: @file(../test/integration/testdata/embed_2.xml)
|
||||
embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}}
|
||||
~downloadFrom: [{"url":"https://example.com/attachment.xml","embedded":true}]
|
||||
}
|
||||
|
||||
|
||||
@@ -76,7 +76,8 @@ PDFENGINES_WATERMARK_ENGINES=pdfcpu,pdftk
|
||||
PDFENGINES_STAMP_ENGINES=pdfcpu,pdftk
|
||||
PDFENGINES_ENCRYPT_ENGINES=qpdf,pdfcpu,pdftk
|
||||
PDFENGINES_ROTATE_ENGINES=pdfcpu,pdftk
|
||||
PDFENGINES_EMBED_ENGINES=pdfcpu
|
||||
PDFENGINES_EMBED_ENGINES=qpdf,pdfcpu
|
||||
PDFENGINES_EMBED_METADATA_ENGINES=qpdf
|
||||
PROMETHEUS_NAMESPACE=gotenberg
|
||||
PROMETHEUS_COLLECT_INTERVAL=1s
|
||||
PROMETHEUS_DISABLE_ROUTE_TELEMETRY=true
|
||||
|
||||
@@ -77,6 +77,7 @@ services:
|
||||
- "--pdfengines-encrypt-engines=${PDFENGINES_ENCRYPT_ENGINES}"
|
||||
- "--pdfengines-rotate-engines=${PDFENGINES_ROTATE_ENGINES}"
|
||||
- "--pdfengines-embed-engines=${PDFENGINES_EMBED_ENGINES}"
|
||||
- "--pdfengines-embed-metadata-engines=${PDFENGINES_EMBED_METADATA_ENGINES}"
|
||||
- "--pdfengines-disable-routes=${PDFENGINES_DISABLE_ROUTES}"
|
||||
- "--prometheus-namespace=${PROMETHEUS_NAMESPACE}"
|
||||
- "--prometheus-collect-interval=${PROMETHEUS_COLLECT_INTERVAL}"
|
||||
|
||||
+19
-14
@@ -45,20 +45,21 @@ func (mod *DebuggableMock) Debug() map[string]any {
|
||||
//
|
||||
//nolint:dupl
|
||||
type PdfEngineMock struct {
|
||||
MergeMock func(ctx context.Context, logger *slog.Logger, inputPaths []string, outputPath string) error
|
||||
SplitMock func(ctx context.Context, logger *slog.Logger, mode SplitMode, inputPath, outputDirPath string) ([]string, error)
|
||||
FlattenMock func(ctx context.Context, logger *slog.Logger, inputPath string) error
|
||||
ConvertMock func(ctx context.Context, logger *slog.Logger, formats PdfFormats, inputPath, outputPath string) error
|
||||
ReadMetadataMock func(ctx context.Context, logger *slog.Logger, inputPath string) (map[string]any, error)
|
||||
PageCountMock func(ctx context.Context, logger *slog.Logger, inputPath string) (int, error)
|
||||
WriteMetadataMock func(ctx context.Context, logger *slog.Logger, metadata map[string]any, inputPath string) error
|
||||
ReadBookmarksMock func(ctx context.Context, logger *slog.Logger, inputPath string) ([]Bookmark, error)
|
||||
EncryptMock func(ctx context.Context, logger *slog.Logger, inputPath, userPassword, ownerPassword string) error
|
||||
EmbedFilesMock func(ctx context.Context, logger *slog.Logger, filePaths []string, inputPath string) error
|
||||
WriteBookmarksMock func(ctx context.Context, logger *slog.Logger, inputPath string, bookmarks []Bookmark) error
|
||||
WatermarkMock func(ctx context.Context, logger *slog.Logger, inputPath string, stamp Stamp) error
|
||||
StampMock func(ctx context.Context, logger *slog.Logger, inputPath string, stamp Stamp) error
|
||||
RotateMock func(ctx context.Context, logger *slog.Logger, inputPath string, angle int, pages string) error
|
||||
MergeMock func(ctx context.Context, logger *slog.Logger, inputPaths []string, outputPath string) error
|
||||
SplitMock func(ctx context.Context, logger *slog.Logger, mode SplitMode, inputPath, outputDirPath string) ([]string, error)
|
||||
FlattenMock func(ctx context.Context, logger *slog.Logger, inputPath string) error
|
||||
ConvertMock func(ctx context.Context, logger *slog.Logger, formats PdfFormats, inputPath, outputPath string) error
|
||||
ReadMetadataMock func(ctx context.Context, logger *slog.Logger, inputPath string) (map[string]any, error)
|
||||
PageCountMock func(ctx context.Context, logger *slog.Logger, inputPath string) (int, error)
|
||||
WriteMetadataMock func(ctx context.Context, logger *slog.Logger, metadata map[string]any, inputPath string) error
|
||||
ReadBookmarksMock func(ctx context.Context, logger *slog.Logger, inputPath string) ([]Bookmark, error)
|
||||
EncryptMock func(ctx context.Context, logger *slog.Logger, inputPath, userPassword, ownerPassword string) error
|
||||
EmbedFilesMock func(ctx context.Context, logger *slog.Logger, filePaths []string, inputPath string) error
|
||||
EmbedFilesMetadataMock func(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error
|
||||
WriteBookmarksMock func(ctx context.Context, logger *slog.Logger, inputPath string, bookmarks []Bookmark) error
|
||||
WatermarkMock func(ctx context.Context, logger *slog.Logger, inputPath string, stamp Stamp) error
|
||||
StampMock func(ctx context.Context, logger *slog.Logger, inputPath string, stamp Stamp) error
|
||||
RotateMock func(ctx context.Context, logger *slog.Logger, inputPath string, angle int, pages string) error
|
||||
}
|
||||
|
||||
func (engine *PdfEngineMock) Merge(ctx context.Context, logger *slog.Logger, inputPaths []string, outputPath string) error {
|
||||
@@ -101,6 +102,10 @@ func (engine *PdfEngineMock) EmbedFiles(ctx context.Context, logger *slog.Logger
|
||||
return engine.EmbedFilesMock(ctx, logger, filePaths, inputPath)
|
||||
}
|
||||
|
||||
func (engine *PdfEngineMock) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
|
||||
return engine.EmbedFilesMetadataMock(ctx, logger, metadata, inputPath)
|
||||
}
|
||||
|
||||
func (engine *PdfEngineMock) WriteBookmarks(ctx context.Context, logger *slog.Logger, inputPath string, bookmarks []Bookmark) error {
|
||||
return engine.WriteBookmarksMock(ctx, logger, inputPath, bookmarks)
|
||||
}
|
||||
|
||||
@@ -201,6 +201,12 @@ type PdfEngine interface {
|
||||
// TODO: attachments instead? Rename the route?
|
||||
EmbedFiles(ctx context.Context, logger *slog.Logger, filePaths []string, inputPath string) error
|
||||
|
||||
// EmbedFilesMetadata sets metadata (such as MIME type and AFRelationship)
|
||||
// on already-embedded files in a PDF. The metadata map is keyed by
|
||||
// filename, with each value being a map of property names to values
|
||||
// (e.g., "mimeType" and "relationship").
|
||||
EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error
|
||||
|
||||
// Watermark applies a watermark (behind page content) to a PDF file.
|
||||
Watermark(ctx context.Context, logger *slog.Logger, inputPath string, stamp Stamp) error
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
@@ -391,6 +392,38 @@ func (form *FormData) Embeds(target *[]string) *FormData {
|
||||
return form
|
||||
}
|
||||
|
||||
// EmbedsMetadata parses the "embedsMetadata" form field (a JSON string) into
|
||||
// a map keyed by filename. Each value is a map of property names to values
|
||||
// (e.g., "mimeType" and "relationship").
|
||||
//
|
||||
// var metadata map[string]map[string]string
|
||||
//
|
||||
// ctx.FormData().EmbedsMetadata(&metadata)
|
||||
func (form *FormData) EmbedsMetadata(target *map[string]map[string]string) *FormData {
|
||||
if form.errors != nil {
|
||||
return form
|
||||
}
|
||||
|
||||
val, ok := form.values["embedsMetadata"]
|
||||
if !ok || len(val) == 0 || val[0] == "" {
|
||||
return form
|
||||
}
|
||||
|
||||
raw := val[0]
|
||||
parsed := make(map[string]map[string]string)
|
||||
|
||||
err := json.Unmarshal([]byte(raw), &parsed)
|
||||
if err != nil {
|
||||
form.append(
|
||||
fmt.Errorf("form field 'embedsMetadata' is invalid: %w", err),
|
||||
)
|
||||
return form
|
||||
}
|
||||
|
||||
*target = parsed
|
||||
return form
|
||||
}
|
||||
|
||||
// MandatoryPaths binds the absolute paths of form data files, according to a
|
||||
// list of file extensions, to a string slice variable. It populates an error
|
||||
// if there is no file for given file extensions.
|
||||
|
||||
@@ -421,6 +421,7 @@ func convertUrlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
|
||||
stamp := pdfengines.FormDataPdfStamp(form, false)
|
||||
stampFile := pdfengines.FormDataPdfStampFile(form)
|
||||
rotateAngle, rotatePages := pdfengines.FormDataPdfRotate(form, false)
|
||||
embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form)
|
||||
|
||||
var url string
|
||||
err := form.
|
||||
@@ -437,7 +438,7 @@ func convertUrlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
|
||||
stamp.Expression = stampFile
|
||||
}
|
||||
|
||||
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, watermark, stamp, rotateAngle, rotatePages)
|
||||
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, embedsMetadata, watermark, stamp, rotateAngle, rotatePages)
|
||||
if err != nil {
|
||||
return fmt.Errorf("convert URL to PDF: %w", err)
|
||||
}
|
||||
@@ -496,6 +497,7 @@ func convertHtmlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
|
||||
stamp := pdfengines.FormDataPdfStamp(form, false)
|
||||
stampFile := pdfengines.FormDataPdfStampFile(form)
|
||||
rotateAngle, rotatePages := pdfengines.FormDataPdfRotate(form, false)
|
||||
embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form)
|
||||
|
||||
var inputPath string
|
||||
err := form.
|
||||
@@ -514,7 +516,7 @@ func convertHtmlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
|
||||
|
||||
url := fmt.Sprintf("file://%s", inputPath)
|
||||
options.AllowedFilePrefixes = []string{ctx.DirPath()}
|
||||
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, watermark, stamp, rotateAngle, rotatePages)
|
||||
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, embedsMetadata, watermark, stamp, rotateAngle, rotatePages)
|
||||
if err != nil {
|
||||
return fmt.Errorf("convert HTML to PDF: %w", err)
|
||||
}
|
||||
@@ -575,6 +577,7 @@ func convertMarkdownRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
|
||||
stamp := pdfengines.FormDataPdfStamp(form, false)
|
||||
stampFile := pdfengines.FormDataPdfStampFile(form)
|
||||
rotateAngle, rotatePages := pdfengines.FormDataPdfRotate(form, false)
|
||||
embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form)
|
||||
|
||||
var (
|
||||
inputPath string
|
||||
@@ -602,7 +605,7 @@ func convertMarkdownRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
|
||||
}
|
||||
|
||||
options.AllowedFilePrefixes = []string{ctx.DirPath()}
|
||||
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, watermark, stamp, rotateAngle, rotatePages)
|
||||
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, embedsMetadata, watermark, stamp, rotateAngle, rotatePages)
|
||||
if err != nil {
|
||||
return fmt.Errorf("convert markdown to PDF: %w", err)
|
||||
}
|
||||
@@ -727,7 +730,7 @@ func markdownToHtml(ctx *api.Context, inputPath string, markdownPaths []string)
|
||||
return fmt.Sprintf("file://%s", inputPath), nil
|
||||
}
|
||||
|
||||
func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url string, options PdfOptions, mode gotenberg.SplitMode, pdfFormats gotenberg.PdfFormats, metadata map[string]any, userPassword, ownerPassword string, embedPaths []string, watermark, stamp gotenberg.Stamp, rotateAngle int, rotatePages string) error {
|
||||
func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url string, options PdfOptions, mode gotenberg.SplitMode, pdfFormats gotenberg.PdfFormats, metadata map[string]any, userPassword, ownerPassword string, embedPaths []string, embedsMetadata map[string]map[string]string, watermark, stamp gotenberg.Stamp, rotateAngle int, rotatePages string) error {
|
||||
outputPath := ctx.GeneratePath(".pdf")
|
||||
// See https://github.com/gotenberg/gotenberg/issues/1130.
|
||||
filename := ctx.OutputFilename(outputPath)
|
||||
@@ -831,6 +834,11 @@ func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url
|
||||
return fmt.Errorf("embed files into PDFs: %w", err)
|
||||
}
|
||||
|
||||
err = pdfengines.EmbedFilesMetadataStub(ctx, engine, embedsMetadata, convertOutputPaths)
|
||||
if err != nil {
|
||||
return fmt.Errorf("set embeds metadata: %w", err)
|
||||
}
|
||||
|
||||
err = pdfengines.EncryptPdfStub(ctx, engine, userPassword, ownerPassword, convertOutputPaths)
|
||||
if err != nil {
|
||||
return fmt.Errorf("encrypt PDFs: %w", err)
|
||||
|
||||
@@ -531,6 +531,11 @@ func (engine *ExifTool) Rotate(ctx context.Context, logger *slog.Logger, inputPa
|
||||
return err
|
||||
}
|
||||
|
||||
// EmbedFilesMetadata is not available in this implementation.
|
||||
func (engine *ExifTool) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
|
||||
return fmt.Errorf("set embeds metadata with ExifTool: %w", gotenberg.ErrPdfEngineMethodNotSupported)
|
||||
}
|
||||
|
||||
// Interface guards.
|
||||
var (
|
||||
_ gotenberg.Module = (*ExifTool)(nil)
|
||||
|
||||
@@ -115,6 +115,11 @@ func (engine *LibreOfficePdfEngine) EmbedFiles(ctx context.Context, logger *slog
|
||||
return fmt.Errorf("embed files with LibreOffice: %w", gotenberg.ErrPdfEngineMethodNotSupported)
|
||||
}
|
||||
|
||||
// EmbedFilesMetadata is not available in this implementation.
|
||||
func (engine *LibreOfficePdfEngine) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
|
||||
return fmt.Errorf("set embeds metadata with LibreOffice: %w", gotenberg.ErrPdfEngineMethodNotSupported)
|
||||
}
|
||||
|
||||
// Watermark is not available in this implementation.
|
||||
func (engine *LibreOfficePdfEngine) Watermark(ctx context.Context, logger *slog.Logger, inputPath string, stamp gotenberg.Stamp) error {
|
||||
return fmt.Errorf("watermark PDF with LibreOffice: %w", gotenberg.ErrPdfEngineMethodNotSupported)
|
||||
|
||||
@@ -37,6 +37,7 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap
|
||||
stamp := pdfengines.FormDataPdfStamp(form, false)
|
||||
stampFile := pdfengines.FormDataPdfStampFile(form)
|
||||
angle, rotatePages := pdfengines.FormDataPdfRotate(form, false)
|
||||
embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form)
|
||||
|
||||
zeroValuedSplitMode := gotenberg.SplitMode{}
|
||||
|
||||
@@ -495,6 +496,11 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap
|
||||
return fmt.Errorf("embed files into PDFs: %w", err)
|
||||
}
|
||||
|
||||
err = pdfengines.EmbedFilesMetadataStub(ctx, engine, embedsMetadata, outputPaths)
|
||||
if err != nil {
|
||||
return fmt.Errorf("set embeds metadata: %w", err)
|
||||
}
|
||||
|
||||
err = pdfengines.EncryptPdfStub(ctx, engine, userPassword, ownerPassword, outputPaths)
|
||||
if err != nil {
|
||||
return fmt.Errorf("encrypt PDFs: %w", err)
|
||||
|
||||
@@ -447,6 +447,11 @@ func (engine *PdfCpu) WriteBookmarks(ctx context.Context, logger *slog.Logger, i
|
||||
return nil
|
||||
}
|
||||
|
||||
// EmbedFilesMetadata is not available in this implementation.
|
||||
func (engine *PdfCpu) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
|
||||
return fmt.Errorf("set embeds metadata with pdfcpu: %w", gotenberg.ErrPdfEngineMethodNotSupported)
|
||||
}
|
||||
|
||||
// EmbedFiles embeds files into a PDF. All files are embedded as file attachments
|
||||
// without modifying the main PDF content.
|
||||
func (engine *PdfCpu) EmbedFiles(ctx context.Context, logger *slog.Logger, filePaths []string, inputPath string) error {
|
||||
|
||||
@@ -22,6 +22,7 @@ type multiPdfEngines struct {
|
||||
writeMetadataEngines []gotenberg.PdfEngine
|
||||
passwordEngines []gotenberg.PdfEngine
|
||||
embedEngines []gotenberg.PdfEngine
|
||||
embedMetadataEngines []gotenberg.PdfEngine
|
||||
readBookmarksEngines []gotenberg.PdfEngine
|
||||
writeBookmarksEngines []gotenberg.PdfEngine
|
||||
watermarkEngines []gotenberg.PdfEngine
|
||||
@@ -38,6 +39,7 @@ func newMultiPdfEngines(
|
||||
writeMetadataEngines,
|
||||
passwordEngines,
|
||||
embedEngines,
|
||||
embedMetadataEngines,
|
||||
readBookmarksEngines,
|
||||
writeBookmarksEngines,
|
||||
watermarkEngines,
|
||||
@@ -53,6 +55,7 @@ func newMultiPdfEngines(
|
||||
writeMetadataEngines: writeMetadataEngines,
|
||||
passwordEngines: passwordEngines,
|
||||
embedEngines: embedEngines,
|
||||
embedMetadataEngines: embedMetadataEngines,
|
||||
readBookmarksEngines: readBookmarksEngines,
|
||||
writeBookmarksEngines: writeBookmarksEngines,
|
||||
watermarkEngines: watermarkEngines,
|
||||
@@ -603,6 +606,43 @@ func (multi *multiPdfEngines) Rotate(ctx context.Context, logger *slog.Logger, i
|
||||
return err
|
||||
}
|
||||
|
||||
// EmbedFilesMetadata sets metadata on embedded files using the first available
|
||||
// engine that supports it.
|
||||
//
|
||||
//nolint:dupl
|
||||
func (multi *multiPdfEngines) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
|
||||
tracer := gotenberg.Tracer()
|
||||
ctx, span := tracer.Start(ctx, "pdfengines.EmbedFilesMetadata", trace.WithSpanKind(trace.SpanKindInternal))
|
||||
defer span.End()
|
||||
|
||||
var err error
|
||||
errChan := make(chan error, 1)
|
||||
|
||||
for _, engine := range multi.embedMetadataEngines {
|
||||
go func(engine gotenberg.PdfEngine) {
|
||||
errChan <- engine.EmbedFilesMetadata(ctx, logger, metadata, inputPath)
|
||||
}(engine)
|
||||
|
||||
select {
|
||||
case setErr := <-errChan:
|
||||
if setErr != nil {
|
||||
err = errors.Join(err, setErr)
|
||||
} else {
|
||||
span.SetStatus(codes.Ok, "")
|
||||
return nil
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
err = fmt.Errorf("set embeds metadata using multi PDF engines: %w", err)
|
||||
span.RecordError(err)
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// Interface guards.
|
||||
var (
|
||||
_ gotenberg.PdfEngine = (*multiPdfEngines)(nil)
|
||||
|
||||
@@ -36,6 +36,7 @@ type PdfEngines struct {
|
||||
writeMetadataNames []string
|
||||
encryptNames []string
|
||||
embedNames []string
|
||||
embedMetadataNames []string
|
||||
readBookmarksNames []string
|
||||
writeBookmarksNames []string
|
||||
watermarkNames []string
|
||||
@@ -59,6 +60,7 @@ func (mod *PdfEngines) Descriptor() gotenberg.ModuleDescriptor {
|
||||
fs.StringSlice("pdfengines-write-metadata-engines", []string{"exiftool"}, "Set the PDF engines and their order for the write metadata feature - empty means all")
|
||||
fs.StringSlice("pdfengines-encrypt-engines", []string{"qpdf", "pdftk", "pdfcpu"}, "Set the PDF engines and their order for the password protection feature - empty means all")
|
||||
fs.StringSlice("pdfengines-embed-engines", []string{"pdfcpu"}, "Set the PDF engines and their order for the file embedding feature - empty means all")
|
||||
fs.StringSlice("pdfengines-embed-metadata-engines", []string{"qpdf"}, "Set the PDF engines and their order for the embed metadata feature - empty means all")
|
||||
fs.StringSlice("pdfengines-read-bookmarks-engines", []string{"pdfcpu"}, "Set the PDF engines and their order for the read bookmarks feature - empty means all")
|
||||
fs.StringSlice("pdfengines-write-bookmarks-engines", []string{"pdfcpu"}, "Set the PDF engines and their order for the write bookmarks feature - empty means all")
|
||||
fs.StringSlice("pdfengines-watermark-engines", []string{"pdfcpu", "pdftk"}, "Set the PDF engines and their order for the watermark feature - empty means all")
|
||||
@@ -91,6 +93,7 @@ func (mod *PdfEngines) Provision(ctx *gotenberg.Context) error {
|
||||
writeMetadataNames := flags.MustStringSlice("pdfengines-write-metadata-engines")
|
||||
encryptNames := flags.MustStringSlice("pdfengines-encrypt-engines")
|
||||
embedNames := flags.MustStringSlice("pdfengines-embed-engines")
|
||||
embedMetadataNames := flags.MustStringSlice("pdfengines-embed-metadata-engines")
|
||||
readBookmarksNames := flags.MustStringSlice("pdfengines-read-bookmarks-engines")
|
||||
writeBookmarksNames := flags.MustStringSlice("pdfengines-write-bookmarks-engines")
|
||||
watermarkNames := flags.MustStringSlice("pdfengines-watermark-engines")
|
||||
@@ -162,6 +165,11 @@ func (mod *PdfEngines) Provision(ctx *gotenberg.Context) error {
|
||||
mod.embedNames = embedNames
|
||||
}
|
||||
|
||||
mod.embedMetadataNames = defaultNames
|
||||
if len(embedMetadataNames) > 0 {
|
||||
mod.embedMetadataNames = embedMetadataNames
|
||||
}
|
||||
|
||||
mod.readBookmarksNames = defaultNames
|
||||
if len(readBookmarksNames) > 0 {
|
||||
mod.readBookmarksNames = readBookmarksNames
|
||||
@@ -236,6 +244,7 @@ func (mod *PdfEngines) Validate() error {
|
||||
findNonExistingEngines(mod.writeMetadataNames)
|
||||
findNonExistingEngines(mod.encryptNames)
|
||||
findNonExistingEngines(mod.embedNames)
|
||||
findNonExistingEngines(mod.embedMetadataNames)
|
||||
findNonExistingEngines(mod.readBookmarksNames)
|
||||
findNonExistingEngines(mod.writeBookmarksNames)
|
||||
findNonExistingEngines(mod.watermarkNames)
|
||||
@@ -261,6 +270,7 @@ func (mod *PdfEngines) SystemMessages() []string {
|
||||
fmt.Sprintf("write metadata engines - %s", strings.Join(mod.writeMetadataNames, " ")),
|
||||
fmt.Sprintf("encrypt engines - %s", strings.Join(mod.encryptNames, " ")),
|
||||
fmt.Sprintf("embed engines - %s", strings.Join(mod.embedNames, " ")),
|
||||
fmt.Sprintf("embed metadata engines - %s", strings.Join(mod.embedMetadataNames, " ")),
|
||||
fmt.Sprintf("read bookmarks engines - %s", strings.Join(mod.readBookmarksNames, " ")),
|
||||
fmt.Sprintf("write bookmarks engines - %s", strings.Join(mod.writeBookmarksNames, " ")),
|
||||
fmt.Sprintf("watermark engines - %s", strings.Join(mod.watermarkNames, " ")),
|
||||
@@ -294,6 +304,7 @@ func (mod *PdfEngines) PdfEngine() (gotenberg.PdfEngine, error) {
|
||||
engines(mod.writeMetadataNames),
|
||||
engines(mod.encryptNames),
|
||||
engines(mod.embedNames),
|
||||
engines(mod.embedMetadataNames),
|
||||
engines(mod.readBookmarksNames),
|
||||
engines(mod.writeBookmarksNames),
|
||||
engines(mod.watermarkNames),
|
||||
|
||||
@@ -443,6 +443,30 @@ func FormDataPdfEmbeds(form *api.FormData) []string {
|
||||
return embedPaths
|
||||
}
|
||||
|
||||
// FormDataPdfEmbedsMetadata extracts embeds metadata from form data.
|
||||
// The "embedsMetadata" field is a JSON string keyed by filename.
|
||||
func FormDataPdfEmbedsMetadata(form *api.FormData) map[string]map[string]string {
|
||||
var metadata map[string]map[string]string
|
||||
form.EmbedsMetadata(&metadata)
|
||||
return metadata
|
||||
}
|
||||
|
||||
// EmbedFilesMetadataStub sets metadata on embedded files in PDFs.
|
||||
func EmbedFilesMetadataStub(ctx *api.Context, engine gotenberg.PdfEngine, metadata map[string]map[string]string, inputPaths []string) error {
|
||||
if len(metadata) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, inputPath := range inputPaths {
|
||||
err := engine.EmbedFilesMetadata(ctx, ctx.Log(), metadata, inputPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("set embeds metadata on PDF '%s': %w", inputPath, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// FormDataPdfEncrypt extracts encryption parameters from form data.
|
||||
func FormDataPdfEncrypt(form *api.FormData) (userPassword, ownerPassword string) {
|
||||
form.String("userPassword", &userPassword, "")
|
||||
@@ -638,6 +662,7 @@ func mergeRoute(engine gotenberg.PdfEngine) api.Route {
|
||||
stamp := FormDataPdfStamp(form, false)
|
||||
stampFile := FormDataPdfStampFile(form)
|
||||
angle, rotatePages := FormDataPdfRotate(form, false)
|
||||
embedsMetadata := FormDataPdfEmbedsMetadata(form)
|
||||
|
||||
var inputPaths []string
|
||||
var flatten bool
|
||||
@@ -754,6 +779,11 @@ func mergeRoute(engine gotenberg.PdfEngine) api.Route {
|
||||
return fmt.Errorf("embed files into PDFs: %w", err)
|
||||
}
|
||||
|
||||
err = EmbedFilesMetadataStub(ctx, engine, embedsMetadata, outputPaths)
|
||||
if err != nil {
|
||||
return fmt.Errorf("set embeds metadata: %w", err)
|
||||
}
|
||||
|
||||
err = EncryptPdfStub(ctx, engine, userPassword, ownerPassword, outputPaths)
|
||||
if err != nil {
|
||||
return fmt.Errorf("encrypt PDFs: %w", err)
|
||||
@@ -789,6 +819,7 @@ func splitRoute(engine gotenberg.PdfEngine) api.Route {
|
||||
stamp := FormDataPdfStamp(form, false)
|
||||
stampFile := FormDataPdfStampFile(form)
|
||||
angle, rotatePages := FormDataPdfRotate(form, false)
|
||||
embedsMetadata := FormDataPdfEmbedsMetadata(form)
|
||||
|
||||
var inputPaths []string
|
||||
var flatten bool
|
||||
@@ -856,6 +887,11 @@ func splitRoute(engine gotenberg.PdfEngine) api.Route {
|
||||
return fmt.Errorf("embed files into PDFs: %w", err)
|
||||
}
|
||||
|
||||
err = EmbedFilesMetadataStub(ctx, engine, embedsMetadata, convertOutputPaths)
|
||||
if err != nil {
|
||||
return fmt.Errorf("set embeds metadata: %w", err)
|
||||
}
|
||||
|
||||
err = EncryptPdfStub(ctx, engine, userPassword, ownerPassword, convertOutputPaths)
|
||||
if err != nil {
|
||||
return fmt.Errorf("encrypt PDFs: %w", err)
|
||||
@@ -1180,6 +1216,7 @@ func embedRoute(engine gotenberg.PdfEngine) api.Route {
|
||||
|
||||
form := ctx.FormData()
|
||||
embedPaths := FormDataPdfEmbeds(form)
|
||||
embedsMetadata := FormDataPdfEmbedsMetadata(form)
|
||||
|
||||
var inputPaths []string
|
||||
err := form.
|
||||
@@ -1193,6 +1230,11 @@ func embedRoute(engine gotenberg.PdfEngine) api.Route {
|
||||
return fmt.Errorf("embed files into PDFs: %w", err)
|
||||
}
|
||||
|
||||
err = EmbedFilesMetadataStub(ctx, engine, embedsMetadata, inputPaths)
|
||||
if err != nil {
|
||||
return fmt.Errorf("set embeds metadata: %w", err)
|
||||
}
|
||||
|
||||
err = ctx.AddOutputPaths(inputPaths...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("add output paths: %w", err)
|
||||
|
||||
@@ -495,6 +495,11 @@ func (engine *PdfTk) Rotate(ctx context.Context, logger *slog.Logger, inputPath
|
||||
return nil
|
||||
}
|
||||
|
||||
// EmbedFilesMetadata is not available in this implementation.
|
||||
func (engine *PdfTk) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
|
||||
return fmt.Errorf("set embeds metadata with PDFtk: %w", gotenberg.ErrPdfEngineMethodNotSupported)
|
||||
}
|
||||
|
||||
// Interface guards.
|
||||
var (
|
||||
_ gotenberg.Module = (*PdfTk)(nil)
|
||||
|
||||
@@ -3,12 +3,14 @@ package qpdf
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
@@ -348,6 +350,291 @@ func (engine *QPdf) EmbedFiles(ctx context.Context, logger *slog.Logger, filePat
|
||||
return err
|
||||
}
|
||||
|
||||
// EmbedFilesMetadata sets metadata on already-embedded files in a PDF using
|
||||
// QPDF's JSON manipulation. It sets /AFRelationship on Filespec objects,
|
||||
// /Subtype on EmbeddedFile streams, and ensures the Catalog /AF array
|
||||
// references the Filespec objects.
|
||||
func (engine *QPdf) EmbedFilesMetadata(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error {
|
||||
ctx, span := gotenberg.Tracer().Start(ctx, "qpdf.EmbedFilesMetadata",
|
||||
trace.WithSpanKind(trace.SpanKindClient),
|
||||
trace.WithAttributes(semconv.ServerAddress(engine.binPath)),
|
||||
)
|
||||
defer span.End()
|
||||
|
||||
if len(metadata) == 0 {
|
||||
span.SetStatus(codes.Ok, "")
|
||||
return nil
|
||||
}
|
||||
|
||||
logger.DebugContext(ctx, fmt.Sprintf("setting embeds metadata on %s with QPDF", inputPath))
|
||||
|
||||
args := append([]string{inputPath}, engine.globalArgs...)
|
||||
args = append(args, "--newline-before-endstream", "--json-output")
|
||||
|
||||
output, err := engine.execCaptureOutput(ctx, args...)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("get PDF JSON with QPDF: %w", err)
|
||||
span.RecordError(err)
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
objects, err := parsePdfObjects(output)
|
||||
if err != nil {
|
||||
span.RecordError(err)
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
catalogRef, catalogValue, filespecRefs, updateObjects := patchFilespecMetadata(logger, objects, metadata)
|
||||
if len(filespecRefs) == 0 {
|
||||
span.SetStatus(codes.Ok, "")
|
||||
return nil
|
||||
}
|
||||
|
||||
patchCatalogAF(catalogRef, catalogValue, filespecRefs, updateObjects)
|
||||
|
||||
err = engine.writeAndApplyUpdate(ctx, logger, inputPath, updateObjects)
|
||||
if err != nil {
|
||||
span.RecordError(err)
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
span.SetStatus(codes.Ok, "")
|
||||
return nil
|
||||
}
|
||||
|
||||
// execCaptureOutput runs QPDF and returns its stdout. This uses
|
||||
// exec.CommandContext directly because gotenberg.Cmd does not support
|
||||
// capturing stdout (it only pipes to debug logs).
|
||||
func (engine *QPdf) execCaptureOutput(ctx context.Context, args ...string) ([]byte, error) {
|
||||
cmd := exec.CommandContext(ctx, engine.binPath, args...) //nolint:gosec
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
|
||||
return cmd.Output()
|
||||
}
|
||||
|
||||
// parsePdfObjects parses QPDF JSON v2 output and returns the objects map.
|
||||
func parsePdfObjects(output []byte) (map[string]json.RawMessage, error) {
|
||||
var pdfJSON struct {
|
||||
Qpdf []json.RawMessage `json:"qpdf"`
|
||||
}
|
||||
if err := json.Unmarshal(output, &pdfJSON); err != nil {
|
||||
return nil, fmt.Errorf("parse PDF JSON: %w", err)
|
||||
}
|
||||
if len(pdfJSON.Qpdf) < 2 {
|
||||
return nil, fmt.Errorf("unexpected QPDF JSON structure: expected at least 2 elements")
|
||||
}
|
||||
|
||||
var objects map[string]json.RawMessage
|
||||
if err := json.Unmarshal(pdfJSON.Qpdf[1], &objects); err != nil {
|
||||
return nil, fmt.Errorf("parse QPDF objects: %w", err)
|
||||
}
|
||||
|
||||
return objects, nil
|
||||
}
|
||||
|
||||
// patchFilespecMetadata walks QPDF objects to find Filespecs matching the
|
||||
// metadata keys. It sets /AFRelationship and /Subtype on matching objects
|
||||
// and returns the catalog reference, catalog value, filespec references,
|
||||
// and the update objects map.
|
||||
func patchFilespecMetadata(logger *slog.Logger, objects map[string]json.RawMessage, metadata map[string]map[string]string) (string, map[string]any, []string, map[string]any) {
|
||||
updateObjects := make(map[string]any)
|
||||
var catalogRef string
|
||||
var catalogValue map[string]any
|
||||
var filespecRefs []string
|
||||
|
||||
for ref, raw := range objects {
|
||||
var obj map[string]json.RawMessage
|
||||
if err := json.Unmarshal(raw, &obj); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
valueRaw, hasValue := obj["value"]
|
||||
if !hasValue {
|
||||
continue
|
||||
}
|
||||
|
||||
var value map[string]any
|
||||
if err := json.Unmarshal(valueRaw, &value); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
typeVal, _ := value["/Type"].(string)
|
||||
|
||||
if typeVal == "/Catalog" {
|
||||
catalogRef = ref
|
||||
catalogValue = value
|
||||
}
|
||||
|
||||
if typeVal == "/Filespec" {
|
||||
uf, _ := value["/UF"].(string)
|
||||
if uf == "" {
|
||||
uf, _ = value["/F"].(string)
|
||||
}
|
||||
|
||||
cleanUf := stripQpdfStringPrefix(uf)
|
||||
|
||||
meta, exists := metadata[cleanUf]
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
|
||||
if rel, ok := meta["relationship"]; ok {
|
||||
value["/AFRelationship"] = "/" + rel
|
||||
}
|
||||
|
||||
if mimeType, ok := meta["mimeType"]; ok {
|
||||
if ef, ok := value["/EF"].(map[string]any); ok {
|
||||
efRef, _ := ef["/F"].(string)
|
||||
if efRef != "" {
|
||||
setStreamSubtype(logger, objects, updateObjects, efRef, mimeType)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
filespecRefs = append(filespecRefs, ref)
|
||||
updateObjects[ref] = map[string]any{"value": value}
|
||||
}
|
||||
}
|
||||
|
||||
return catalogRef, catalogValue, filespecRefs, updateObjects
|
||||
}
|
||||
|
||||
// patchCatalogAF ensures the Catalog /AF array references all filespec objects.
|
||||
func patchCatalogAF(catalogRef string, catalogValue map[string]any, filespecRefs []string, updateObjects map[string]any) {
|
||||
if catalogRef == "" || catalogValue == nil {
|
||||
return
|
||||
}
|
||||
|
||||
afSet := make(map[string]bool)
|
||||
existingAF, _ := catalogValue["/AF"].([]any)
|
||||
for _, r := range existingAF {
|
||||
if s, ok := r.(string); ok {
|
||||
afSet[s] = true
|
||||
}
|
||||
}
|
||||
for _, ref := range filespecRefs {
|
||||
// Object references in values use "9 0 R" format,
|
||||
// not the "obj:9 0 R" key format.
|
||||
valRef := strings.TrimPrefix(ref, "obj:")
|
||||
if !afSet[valRef] {
|
||||
existingAF = append(existingAF, valRef)
|
||||
}
|
||||
}
|
||||
catalogValue["/AF"] = existingAF
|
||||
updateObjects[catalogRef] = map[string]any{"value": catalogValue}
|
||||
}
|
||||
|
||||
// writeAndApplyUpdate marshals the update objects as QPDF JSON v2, writes
|
||||
// them to a temp file, and applies the update via --update-from-json.
|
||||
func (engine *QPdf) writeAndApplyUpdate(ctx context.Context, logger *slog.Logger, inputPath string, updateObjects map[string]any) error {
|
||||
updateJSON := map[string]any{
|
||||
"qpdf": []any{
|
||||
map[string]any{
|
||||
"jsonversion": 2,
|
||||
"pushedinheritedpageresources": false,
|
||||
"calledgetallpages": false,
|
||||
"maxobjectid": 0,
|
||||
},
|
||||
updateObjects,
|
||||
},
|
||||
}
|
||||
|
||||
jsonBytes, err := json.Marshal(updateJSON)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal update JSON: %w", err)
|
||||
}
|
||||
|
||||
tmpFile, err := os.CreateTemp(filepath.Dir(inputPath), "qpdf-embeds-metadata-*.json")
|
||||
if err != nil {
|
||||
return fmt.Errorf("create temp file for update JSON: %w", err)
|
||||
}
|
||||
defer os.Remove(tmpFile.Name())
|
||||
|
||||
if _, err := tmpFile.Write(jsonBytes); err != nil {
|
||||
tmpFile.Close()
|
||||
return fmt.Errorf("write update JSON: %w", err)
|
||||
}
|
||||
if err := tmpFile.Close(); err != nil {
|
||||
return fmt.Errorf("close temp file: %w", err)
|
||||
}
|
||||
|
||||
updateArgs := make([]string, 0, 5+len(engine.globalArgs))
|
||||
updateArgs = append(updateArgs, inputPath)
|
||||
updateArgs = append(updateArgs, engine.globalArgs...)
|
||||
updateArgs = append(updateArgs, "--newline-before-endstream")
|
||||
updateArgs = append(updateArgs, "--update-from-json="+tmpFile.Name())
|
||||
updateArgs = append(updateArgs, "--replace-input")
|
||||
|
||||
cmd, err := gotenberg.CommandContext(ctx, logger, engine.binPath, updateArgs...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("create command for JSON update: %w", err)
|
||||
}
|
||||
|
||||
_, err = cmd.Exec()
|
||||
if err != nil {
|
||||
return fmt.Errorf("update embeds metadata with QPDF: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// setStreamSubtype finds a stream object by reference and sets the /Subtype
|
||||
// key in its dict.
|
||||
func setStreamSubtype(logger *slog.Logger, objects map[string]json.RawMessage, updateObjects map[string]any, ref, mimeType string) {
|
||||
objKey := ref
|
||||
if !strings.HasPrefix(objKey, "obj:") {
|
||||
objKey = "obj:" + objKey
|
||||
}
|
||||
raw, ok := objects[objKey]
|
||||
if !ok {
|
||||
logger.Warn(fmt.Sprintf("set stream subtype on %s: object not found", ref))
|
||||
return
|
||||
}
|
||||
|
||||
var obj map[string]json.RawMessage
|
||||
if err := json.Unmarshal(raw, &obj); err != nil {
|
||||
logger.Warn(fmt.Sprintf("set stream subtype on %s: unmarshal object: %s", ref, err))
|
||||
return
|
||||
}
|
||||
|
||||
streamRaw, ok := obj["stream"]
|
||||
if !ok {
|
||||
logger.Warn(fmt.Sprintf("set stream subtype on %s: no stream key", ref))
|
||||
return
|
||||
}
|
||||
|
||||
var stream map[string]any
|
||||
if err := json.Unmarshal(streamRaw, &stream); err != nil {
|
||||
logger.Warn(fmt.Sprintf("set stream subtype on %s: unmarshal stream: %s", ref, err))
|
||||
return
|
||||
}
|
||||
|
||||
dict, ok := stream["dict"].(map[string]any)
|
||||
if !ok {
|
||||
logger.Warn(fmt.Sprintf("set stream subtype on %s: stream dict is not a map", ref))
|
||||
return
|
||||
}
|
||||
|
||||
// QPDF JSON uses literal name syntax; it handles PDF name
|
||||
// encoding internally when writing the binary PDF.
|
||||
dict["/Subtype"] = "/" + mimeType
|
||||
stream["dict"] = dict
|
||||
updateObjects[objKey] = map[string]any{"stream": stream}
|
||||
}
|
||||
|
||||
// stripQpdfStringPrefix removes the type prefix that QPDF adds to JSON
|
||||
// string values. Known prefixes: "u:" (Unicode), "b:" (binary), "e:" (encoded).
|
||||
func stripQpdfStringPrefix(s string) string {
|
||||
for _, prefix := range []string{"u:", "b:", "e:"} {
|
||||
if strings.HasPrefix(s, prefix) {
|
||||
return s[len(prefix):]
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Watermark is not available in this implementation.
|
||||
func (engine *QPdf) Watermark(ctx context.Context, logger *slog.Logger, inputPath string, stamp gotenberg.Stamp) error {
|
||||
_, span := gotenberg.Tracer().Start(ctx, "qpdf.Watermark",
|
||||
|
||||
@@ -0,0 +1,271 @@
|
||||
package qpdf
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"log/slog"
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestStripQpdfStringPrefix(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"unicode prefix", "u:factur-x.xml", "factur-x.xml"},
|
||||
{"binary prefix", "b:binary.bin", "binary.bin"},
|
||||
{"encoded prefix", "e:encoded.txt", "encoded.txt"},
|
||||
{"no prefix", "plain.xml", "plain.xml"},
|
||||
{"empty string", "", ""},
|
||||
{"prefix only", "u:", ""},
|
||||
{"colon in value", "u:file:name.xml", "file:name.xml"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := stripQpdfStringPrefix(tt.input)
|
||||
if got != tt.expected {
|
||||
t.Errorf("stripQpdfStringPrefix(%q) = %q, want %q", tt.input, got, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePdfObjects(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
wantKeys []string
|
||||
wantError bool
|
||||
}{
|
||||
{
|
||||
name: "valid QPDF JSON v2",
|
||||
input: `{"qpdf":[{"jsonversion":2},{"obj:1 0 R":{"value":{"/Type":"/Catalog"}}}]}`,
|
||||
wantKeys: []string{"obj:1 0 R"},
|
||||
},
|
||||
{
|
||||
name: "invalid JSON",
|
||||
input: `not json`,
|
||||
wantError: true,
|
||||
},
|
||||
{
|
||||
name: "empty qpdf array",
|
||||
input: `{"qpdf":[]}`,
|
||||
wantError: true,
|
||||
},
|
||||
{
|
||||
name: "only header element",
|
||||
input: `{"qpdf":[{"jsonversion":2}]}`,
|
||||
wantError: true,
|
||||
},
|
||||
{
|
||||
name: "multiple objects",
|
||||
input: `{"qpdf":[{},{"obj:1 0 R":{"value":{}},"obj:2 0 R":{"value":{}}}]}`,
|
||||
wantKeys: []string{"obj:1 0 R", "obj:2 0 R"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
objects, err := parsePdfObjects([]byte(tt.input))
|
||||
if tt.wantError {
|
||||
if err == nil {
|
||||
t.Error("expected error, got nil")
|
||||
}
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
for _, key := range tt.wantKeys {
|
||||
if _, ok := objects[key]; !ok {
|
||||
t.Errorf("expected key %q in objects", key)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPatchFilespecMetadata(t *testing.T) {
|
||||
logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError}))
|
||||
|
||||
t.Run("sets AFRelationship on matching Filespec", func(t *testing.T) {
|
||||
objects := map[string]json.RawMessage{
|
||||
"obj:1 0 R": json.RawMessage(`{"value":{"/Type":"/Catalog"}}`),
|
||||
"obj:2 0 R": json.RawMessage(`{"value":{"/Type":"/Filespec","/UF":"u:factur-x.xml"}}`),
|
||||
}
|
||||
metadata := map[string]map[string]string{
|
||||
"factur-x.xml": {"relationship": "Data"},
|
||||
}
|
||||
|
||||
catalogRef, _, filespecRefs, updateObjects := patchFilespecMetadata(logger, objects, metadata)
|
||||
|
||||
if catalogRef != "obj:1 0 R" {
|
||||
t.Errorf("catalogRef = %q, want %q", catalogRef, "obj:1 0 R")
|
||||
}
|
||||
if len(filespecRefs) != 1 || filespecRefs[0] != "obj:2 0 R" {
|
||||
t.Errorf("filespecRefs = %v, want [obj:2 0 R]", filespecRefs)
|
||||
}
|
||||
updated, ok := updateObjects["obj:2 0 R"]
|
||||
if !ok {
|
||||
t.Fatal("expected obj:2 0 R in updateObjects")
|
||||
}
|
||||
value := updated.(map[string]any)["value"].(map[string]any)
|
||||
if value["/AFRelationship"] != "/Data" {
|
||||
t.Errorf("/AFRelationship = %v, want /Data", value["/AFRelationship"])
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("skips Filespec with no matching metadata", func(t *testing.T) {
|
||||
objects := map[string]json.RawMessage{
|
||||
"obj:1 0 R": json.RawMessage(`{"value":{"/Type":"/Filespec","/UF":"u:other.xml"}}`),
|
||||
}
|
||||
metadata := map[string]map[string]string{
|
||||
"factur-x.xml": {"relationship": "Data"},
|
||||
}
|
||||
|
||||
_, _, filespecRefs, _ := patchFilespecMetadata(logger, objects, metadata)
|
||||
if len(filespecRefs) != 0 {
|
||||
t.Errorf("filespecRefs = %v, want empty", filespecRefs)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("falls back to /F when /UF is absent", func(t *testing.T) {
|
||||
objects := map[string]json.RawMessage{
|
||||
"obj:1 0 R": json.RawMessage(`{"value":{"/Type":"/Filespec","/F":"u:factur-x.xml"}}`),
|
||||
}
|
||||
metadata := map[string]map[string]string{
|
||||
"factur-x.xml": {"relationship": "Alternative"},
|
||||
}
|
||||
|
||||
_, _, filespecRefs, updateObjects := patchFilespecMetadata(logger, objects, metadata)
|
||||
if len(filespecRefs) != 1 {
|
||||
t.Fatalf("filespecRefs = %v, want 1 entry", filespecRefs)
|
||||
}
|
||||
value := updateObjects["obj:1 0 R"].(map[string]any)["value"].(map[string]any)
|
||||
if value["/AFRelationship"] != "/Alternative" {
|
||||
t.Errorf("/AFRelationship = %v, want /Alternative", value["/AFRelationship"])
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("sets stream Subtype via EF reference", func(t *testing.T) {
|
||||
objects := map[string]json.RawMessage{
|
||||
"obj:2 0 R": json.RawMessage(`{"value":{"/Type":"/Filespec","/UF":"u:factur-x.xml","/EF":{"/F":"3 0 R"}}}`),
|
||||
"obj:3 0 R": json.RawMessage(`{"stream":{"dict":{"/Type":"/EmbeddedFile"}}}`),
|
||||
}
|
||||
metadata := map[string]map[string]string{
|
||||
"factur-x.xml": {"mimeType": "text/xml"},
|
||||
}
|
||||
|
||||
_, _, _, updateObjects := patchFilespecMetadata(logger, objects, metadata)
|
||||
streamObj, ok := updateObjects["obj:3 0 R"]
|
||||
if !ok {
|
||||
t.Fatal("expected obj:3 0 R in updateObjects")
|
||||
}
|
||||
stream := streamObj.(map[string]any)["stream"].(map[string]any)
|
||||
dict := stream["dict"].(map[string]any)
|
||||
if dict["/Subtype"] != "/text/xml" {
|
||||
t.Errorf("/Subtype = %v, want /text/xml", dict["/Subtype"])
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestPatchCatalogAF(t *testing.T) {
|
||||
t.Run("adds filespec refs to AF array", func(t *testing.T) {
|
||||
catalogValue := map[string]any{"/Type": "/Catalog"}
|
||||
updateObjects := make(map[string]any)
|
||||
|
||||
patchCatalogAF("obj:1 0 R", catalogValue, []string{"obj:2 0 R", "obj:3 0 R"}, updateObjects)
|
||||
|
||||
af, ok := catalogValue["/AF"].([]any)
|
||||
if !ok {
|
||||
t.Fatal("expected /AF to be []any")
|
||||
}
|
||||
if len(af) != 2 {
|
||||
t.Fatalf("/AF has %d entries, want 2", len(af))
|
||||
}
|
||||
if af[0] != "2 0 R" || af[1] != "3 0 R" {
|
||||
t.Errorf("/AF = %v, want [2 0 R, 3 0 R]", af)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("does not duplicate existing refs", func(t *testing.T) {
|
||||
catalogValue := map[string]any{
|
||||
"/Type": "/Catalog",
|
||||
"/AF": []any{"2 0 R"},
|
||||
}
|
||||
updateObjects := make(map[string]any)
|
||||
|
||||
patchCatalogAF("obj:1 0 R", catalogValue, []string{"obj:2 0 R", "obj:3 0 R"}, updateObjects)
|
||||
|
||||
af := catalogValue["/AF"].([]any)
|
||||
if len(af) != 2 {
|
||||
t.Fatalf("/AF has %d entries, want 2", len(af))
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("no-op when catalogRef is empty", func(t *testing.T) {
|
||||
updateObjects := make(map[string]any)
|
||||
patchCatalogAF("", nil, []string{"obj:2 0 R"}, updateObjects)
|
||||
if len(updateObjects) != 0 {
|
||||
t.Error("expected no updates for empty catalogRef")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestSetStreamSubtype(t *testing.T) {
|
||||
logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError}))
|
||||
|
||||
t.Run("sets Subtype in stream dict", func(t *testing.T) {
|
||||
objects := map[string]json.RawMessage{
|
||||
"obj:3 0 R": json.RawMessage(`{"stream":{"dict":{"/Type":"/EmbeddedFile"}}}`),
|
||||
}
|
||||
updateObjects := make(map[string]any)
|
||||
|
||||
setStreamSubtype(logger, objects, updateObjects, "obj:3 0 R", "text/xml")
|
||||
|
||||
streamObj := updateObjects["obj:3 0 R"].(map[string]any)["stream"].(map[string]any)
|
||||
dict := streamObj["dict"].(map[string]any)
|
||||
if dict["/Subtype"] != "/text/xml" {
|
||||
t.Errorf("/Subtype = %v, want /text/xml", dict["/Subtype"])
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("auto-adds obj: prefix to ref", func(t *testing.T) {
|
||||
objects := map[string]json.RawMessage{
|
||||
"obj:5 0 R": json.RawMessage(`{"stream":{"dict":{}}}`),
|
||||
}
|
||||
updateObjects := make(map[string]any)
|
||||
|
||||
setStreamSubtype(logger, objects, updateObjects, "5 0 R", "application/pdf")
|
||||
|
||||
if _, ok := updateObjects["obj:5 0 R"]; !ok {
|
||||
t.Error("expected obj:5 0 R in updateObjects")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("warns on missing object", func(t *testing.T) {
|
||||
objects := map[string]json.RawMessage{}
|
||||
updateObjects := make(map[string]any)
|
||||
|
||||
setStreamSubtype(logger, objects, updateObjects, "obj:99 0 R", "text/xml")
|
||||
|
||||
if len(updateObjects) != 0 {
|
||||
t.Error("expected no updates for missing object")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("warns on object without stream key", func(t *testing.T) {
|
||||
objects := map[string]json.RawMessage{
|
||||
"obj:3 0 R": json.RawMessage(`{"value":{"/Type":"/Page"}}`),
|
||||
}
|
||||
updateObjects := make(map[string]any)
|
||||
|
||||
setStreamSubtype(logger, objects, updateObjects, "obj:3 0 R", "text/xml")
|
||||
|
||||
if len(updateObjects) != 0 {
|
||||
t.Error("expected no updates for non-stream object")
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -17,6 +17,21 @@ Feature: /forms/pdfengines/embed
|
||||
Then the response PDF(s) should have the "embed_1.xml" file embedded
|
||||
Then the response PDF(s) should have the "embed_2.xml" file embedded
|
||||
|
||||
Scenario: POST /forms/pdfengines/embed with metadata
|
||||
Given I have a default Gotenberg container
|
||||
When I make a "POST" request to Gotenberg at the "/forms/pdfengines/embed" endpoint with the following form data and header(s):
|
||||
| files | testdata/page_1.pdf | file |
|
||||
| embeds | testdata/embed_1.xml | file |
|
||||
| embeds | testdata/embed_2.xml | file |
|
||||
| embedsMetadata | {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"},"embed_2.xml":{"mimeType":"text/xml","relationship":"Alternative"}} | field |
|
||||
Then the response status code should be 200
|
||||
And the response header "Content-Type" should be "application/pdf"
|
||||
And there should be 1 PDF(s) in the response
|
||||
And the response PDF(s) should have the "embed_1.xml" file embedded
|
||||
And the response PDF(s) should have the "embed_1.xml" file embedded with relationship "Data"
|
||||
And the response PDF(s) should have the "embed_2.xml" file embedded
|
||||
And the response PDF(s) should have the "embed_2.xml" file embedded with relationship "Alternative"
|
||||
|
||||
@download-from
|
||||
Scenario: POST /forms/pdfengines/embed with (Download From)
|
||||
Given I have a default Gotenberg container
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
|
||||
"github.com/moby/moby/api/types/container"
|
||||
"github.com/testcontainers/testcontainers-go"
|
||||
"github.com/testcontainers/testcontainers-go/exec"
|
||||
"github.com/testcontainers/testcontainers-go/network"
|
||||
"github.com/testcontainers/testcontainers-go/wait"
|
||||
)
|
||||
@@ -138,7 +139,7 @@ func execCommandInIntegrationToolsContainer(ctx context.Context, cmd []string, p
|
||||
}
|
||||
}(c, ctx)
|
||||
|
||||
_, output, err := c.Exec(ctx, cmd)
|
||||
_, output, err := c.Exec(ctx, cmd, exec.Multiplexed())
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("exec %q: %w", cmd, err)
|
||||
}
|
||||
|
||||
@@ -1262,6 +1262,68 @@ func (s *scenario) thePdfsShouldHaveEmbeddedFile(ctx context.Context, kind, shou
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *scenario) thePdfsShouldHaveEmbeddedFileWithRelationship(ctx context.Context, kind, embed, relationship string) error {
|
||||
dirPath := s.teststoreDir
|
||||
|
||||
_, err := os.Stat(dirPath)
|
||||
if os.IsNotExist(err) {
|
||||
return fmt.Errorf("directory %q does not exist", dirPath)
|
||||
}
|
||||
|
||||
var paths []string
|
||||
err = filepath.Walk(dirPath, func(path string, info os.FileInfo, pathErr error) error {
|
||||
if pathErr != nil {
|
||||
return pathErr
|
||||
}
|
||||
if strings.EqualFold(filepath.Ext(info.Name()), ".pdf") {
|
||||
paths = append(paths, path)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("walk %q: %w", dirPath, err)
|
||||
}
|
||||
|
||||
for _, path := range paths {
|
||||
cmd := []string{
|
||||
"verapdf",
|
||||
"--off",
|
||||
"--loglevel",
|
||||
"0",
|
||||
"--extract",
|
||||
"embeddedFile",
|
||||
filepath.Base(path),
|
||||
}
|
||||
|
||||
output, err := execCommandInIntegrationToolsContainer(ctx, cmd, path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("exec %q: %w", cmd, err)
|
||||
}
|
||||
|
||||
fileNameTag := fmt.Sprintf("<fileName>%s</fileName>", embed)
|
||||
relationshipTag := fmt.Sprintf("<afRelationship>%s</afRelationship>", relationship)
|
||||
|
||||
blocks := strings.Split(output, "</embeddedFile>")
|
||||
found := false
|
||||
for _, block := range blocks {
|
||||
if !strings.Contains(block, fileNameTag) {
|
||||
continue
|
||||
}
|
||||
if !strings.Contains(block, relationshipTag) {
|
||||
return fmt.Errorf("embedded file %q missing afRelationship %q", embed, relationship)
|
||||
}
|
||||
found = true
|
||||
break
|
||||
}
|
||||
|
||||
if !found {
|
||||
return fmt.Errorf("embedded file %q not found in verapdf output", embed)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func InitializeScenario(ctx *godog.ScenarioContext) {
|
||||
s := &scenario{}
|
||||
ctx.Before(func(ctx context.Context, sc *godog.Scenario) (context.Context, error) {
|
||||
@@ -1300,6 +1362,7 @@ func InitializeScenario(ctx *godog.ScenarioContext) {
|
||||
ctx.Then(`^the (response|webhook request) PDF\(s\) (should|should NOT) be flatten$`, s.thePdfsShouldBeFlatten)
|
||||
ctx.Then(`^the (response|webhook request) PDF\(s\) (should|should NOT) be encrypted`, s.thePdfsShouldBeEncrypted)
|
||||
ctx.Then(`^the (response|webhook request) PDF\(s\) (should|should NOT) have the "([^"]*)" file embedded$`, s.thePdfsShouldHaveEmbeddedFile)
|
||||
ctx.Then(`^the (response|webhook request) PDF\(s\) should have the "([^"]*)" file embedded with relationship "([^"]*)"$`, s.thePdfsShouldHaveEmbeddedFileWithRelationship)
|
||||
ctx.Then(`^the "([^"]*)" PDF should have (\d+) page\(s\)$`, s.thePdfShouldHavePages)
|
||||
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) be set to landscape orientation$`, s.thePdfShouldBeSetToLandscapeOrientation)
|
||||
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have the following content at page (\d+):$`, s.thePdfShouldHaveTheFollowingContentAtPage)
|
||||
|
||||
Reference in New Issue
Block a user