diff --git a/Makefile b/Makefile index 52aa35e..3e66342 100644 --- a/Makefile +++ b/Makefile @@ -63,6 +63,7 @@ PDFENGINES_READ_METADATA_ENGINES=exiftool PDFENGINES_WRITE_METADATA_ENGINES=exiftool PDFENGINES_ENCRYPT_ENGINES=qpdf,pdfcpu,pdftk PDFENGINES_DISABLE_ROUTES=false +PDFENGINES_EMBED_ENGINES=pdfcpu PROMETHEUS_NAMESPACE=gotenberg PROMETHEUS_COLLECT_INTERVAL=1s PROMETHEUS_DISABLE_ROUTE_LOGGING=false @@ -137,6 +138,7 @@ run: ## Start a Gotenberg container --pdfengines-write-metadata-engines=$(PDFENGINES_WRITE_METADATA_ENGINES) \ --pdfengines-encrypt-engines=$(PDFENGINES_ENCRYPT_ENGINES) \ --pdfengines-disable-routes=$(PDFENGINES_DISABLE_ROUTES) \ + --pdfengines-embed-engines=$(PDFENGINES_EMBED_ENGINES) \ --prometheus-namespace=$(PROMETHEUS_NAMESPACE) \ --prometheus-collect-interval=$(PROMETHEUS_COLLECT_INTERVAL) \ --prometheus-disable-route-logging=$(PROMETHEUS_DISABLE_ROUTE_LOGGING) \ diff --git a/pkg/gotenberg/mocks.go b/pkg/gotenberg/mocks.go index faa8574..4478256 100644 --- a/pkg/gotenberg/mocks.go +++ b/pkg/gotenberg/mocks.go @@ -53,6 +53,7 @@ type PdfEngineMock struct { ReadMetadataMock func(ctx context.Context, logger *zap.Logger, inputPath string) (map[string]interface{}, error) WriteMetadataMock func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error EncryptMock func(ctx context.Context, logger *zap.Logger, inputPath, userPassword, ownerPassword string) error + EmbedFilesMock func(ctx context.Context, logger *zap.Logger, filePaths []string, inputPath string) error } func (engine *PdfEngineMock) Merge(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { @@ -83,6 +84,10 @@ func (engine *PdfEngineMock) Encrypt(ctx context.Context, logger *zap.Logger, in return engine.EncryptMock(ctx, logger, inputPath, userPassword, ownerPassword) } +func (engine *PdfEngineMock) EmbedFiles(ctx context.Context, logger *zap.Logger, filePaths []string, inputPath string) error { + return engine.EmbedFilesMock(ctx, logger, filePaths, inputPath) +} + // PdfEngineProviderMock is a mock for the [PdfEngineProvider] interface. type PdfEngineProviderMock struct { PdfEngineMock func() (PdfEngine, error) diff --git a/pkg/gotenberg/pdfengine.go b/pkg/gotenberg/pdfengine.go index e1e37ed..5e9dceb 100644 --- a/pkg/gotenberg/pdfengine.go +++ b/pkg/gotenberg/pdfengine.go @@ -143,6 +143,10 @@ type PdfEngine interface { // The ownerPassword provides full access to the document. // If the ownerPassword is empty, it defaults to the userPassword. Encrypt(ctx context.Context, logger *zap.Logger, inputPath, userPassword, ownerPassword string) error + + // EmbedFiles embeds files into a PDF. All files are embedded as file attachments + // without modifying the main PDF content. + EmbedFiles(ctx context.Context, logger *zap.Logger, filePaths []string, inputPath string) error } // PdfEngineProvider offers an interface to instantiate a [PdfEngine]. diff --git a/pkg/modules/api/context.go b/pkg/modules/api/context.go index 61c6dcb..4b390da 100644 --- a/pkg/modules/api/context.go +++ b/pkg/modules/api/context.go @@ -38,11 +38,12 @@ var ( // Context is the request context for a "multipart/form-data" requests. type Context struct { - dirPath string - values map[string][]string - files map[string]string - outputPaths []string - cancelled bool + dirPath string + values map[string][]string + files map[string]string + filesByField map[string][]string + outputPaths []string + cancelled bool logger *zap.Logger echoCtx echo.Context @@ -79,6 +80,9 @@ type downloadFrom struct { // ExtraHttpHeaders are the HTTP headers to send alongside. ExtraHttpHeaders map[string]string `json:"extraHttpHeaders"` + + // Download as embed file + Embedded bool `json:"embedded"` } // newContext returns a [Context] by parsing a "multipart/form-data" request. @@ -184,6 +188,7 @@ func newContext(echoCtx echo.Context, logger *zap.Logger, fs *gotenberg.FileSyst ctx.dirPath = dirPath ctx.values = form.Value ctx.files = make(map[string]string) + ctx.filesByField = make(map[string][]string) // First, try to download files listed in the "downloadFrom" form field, if // any. @@ -318,6 +323,9 @@ func newContext(echoCtx echo.Context, logger *zap.Logger, fs *gotenberg.FileSyst } ctx.files[filename] = path + if dl.Embedded { + ctx.filesByField[EmbedsFormField] = append(ctx.filesByField[EmbedsFormField], path) + } return nil }) @@ -373,17 +381,22 @@ func newContext(echoCtx echo.Context, logger *zap.Logger, fs *gotenberg.FileSyst } // Then, copy the form files, if any. - for _, files := range form.File { + for fieldName, files := range form.File { for _, fh := range files { err = copyToDisk(fh) if err != nil { return ctx, cancel, fmt.Errorf("copy to disk: %w", err) } + // Track files by field name + filename := norm.NFC.String(filepath.Base(fh.Filename)) + filePath := ctx.files[filename] + ctx.filesByField[fieldName] = append(ctx.filesByField[fieldName], filePath) } } ctx.Log().Debug(fmt.Sprintf("form fields: %+v", ctx.values)) ctx.Log().Debug(fmt.Sprintf("form files: %+v", ctx.files)) + ctx.Log().Debug(fmt.Sprintf("form files by field: %+v", ctx.filesByField)) ctx.Log().Debug(fmt.Sprintf("total bytes: %d", totalBytesRead.Load())) return ctx, cancel, err @@ -397,9 +410,10 @@ func (ctx *Context) Request() *http.Request { // FormData return a [FormData]. func (ctx *Context) FormData() *FormData { return &FormData{ - values: ctx.values, - files: ctx.files, - errors: nil, + values: ctx.values, + files: ctx.files, + filesByField: ctx.filesByField, + errors: nil, } } diff --git a/pkg/modules/api/formdata.go b/pkg/modules/api/formdata.go index fd6b664..d2d5bf7 100644 --- a/pkg/modules/api/formdata.go +++ b/pkg/modules/api/formdata.go @@ -6,6 +6,7 @@ import ( "net/http" "os" "path/filepath" + "slices" "sort" "strconv" "strings" @@ -16,14 +17,20 @@ import ( "github.com/gotenberg/gotenberg/v8/pkg/gotenberg" ) +// EmbedsFormField represents the form field name for embedding files. +const ( + EmbedsFormField string = "embeds" +) + // FormData is a helper for validating and hydrating values from a // "multipart/form-data" request. // // form := ctx.FormData() type FormData struct { - values map[string][]string - files map[string]string - errors error + values map[string][]string + files map[string]string + filesByField map[string][]string + errors error } // Validate returns nil or an error related to the [FormData] values, with a @@ -358,6 +365,26 @@ func (form *FormData) Paths(extensions []string, target *[]string) *FormData { return form.paths(extensions, target) } +// Embeds binds the absolute paths of form data files that should be +// embedded in the PDF. Only files uploaded with the "embeds" field name +// will be included. +// +// var embeds []string +// +// ctx.FormData().Embeds(&embeds) +func (form *FormData) Embeds(target *[]string) *FormData { + if form.errors != nil { + return form + } + + // Get files from the "embeds" field + if paths, ok := form.filesByField[EmbedsFormField]; ok { + *target = append(*target, paths...) + } + + return form +} + // MandatoryPaths binds the absolute paths of form data files, according to a // list of file extensions, to a string slice variable. It populates an error // if there is no file for given file extensions. @@ -381,8 +408,15 @@ func (form *FormData) MandatoryPaths(extensions []string, target *[]string) *For // paths bind the absolute paths of form data files, according to a list of // file extensions, to a string slice variable. +// embeds are excluded. func (form *FormData) paths(extensions []string, target *[]string) *FormData { + embeds, ok := form.filesByField[EmbedsFormField] + for filename, path := range form.files { + if ok && slices.Contains(embeds, path) { + continue + } + for _, ext := range extensions { // See https://github.com/gotenberg/gotenberg/issues/228. if strings.ToLower(filepath.Ext(filename)) == ext { diff --git a/pkg/modules/api/formdata_test.go b/pkg/modules/api/formdata_test.go index e3ce8f4..ca57583 100644 --- a/pkg/modules/api/formdata_test.go +++ b/pkg/modules/api/formdata_test.go @@ -1612,6 +1612,24 @@ func TestFormData_Paths(t *testing.T) { }, expectCount: 2, }, + { + scenario: "files except embeds", + form: &FormData{ + files: map[string]string{ + "foo.pdf": "/foo.pdf", + "embed_1.pdf": "/embed_1.pdf", + "embed_2.xml": "/embed_2.xml", + }, + filesByField: map[string][]string{ + "embeds": {"/embed_1.pdf", "/embed_2.xml"}, + }, + }, + extensions: []string{".pdf"}, + expect: []string{ + "/foo.pdf", + }, + expectCount: 1, + }, } { t.Run(tc.scenario, func(t *testing.T) { var actual []string @@ -1740,3 +1758,28 @@ func TestFormData_mustAssign(t *testing.T) { var target []string form.mustAssign("foo", "foo", &target) } + +func TestFormData_Embeds(t *testing.T) { + expected := []string{"/bar.xml", "/baz.xml"} + + var actual []string + form := &FormData{ + files: map[string]string{ + "foo.pdf": "/foo.pdf", + "bar.xml": "/bar.xml", + "baz.xml": "/baz.xml", + }, + filesByField: map[string][]string{ + "embeds": {"/bar.xml", "/baz.xml"}, + }, + } + form.Embeds(&actual) + + if len(actual) != len(expected) { + t.Errorf("expected %d embeds but got %d", len(expected), len(actual)) + } + + if !reflect.DeepEqual(actual, expected) { + t.Errorf("expected %v but got %v", expected, actual) + } +} diff --git a/pkg/modules/chromium/routes.go b/pkg/modules/chromium/routes.go index e096eab..a756bc1 100644 --- a/pkg/modules/chromium/routes.go +++ b/pkg/modules/chromium/routes.go @@ -361,6 +361,7 @@ func convertUrlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { pdfFormats := pdfengines.FormDataPdfFormats(form) metadata := pdfengines.FormDataPdfMetadata(form, false) userPassword, ownerPassword := pdfengines.FormDataPdfEncrypt(form) + embedPaths := pdfengines.FormDataPdfEmbeds(form) var url string err := form. @@ -370,7 +371,7 @@ func convertUrlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { return fmt.Errorf("validate form data: %w", err) } - err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword) + err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths) if err != nil { return fmt.Errorf("convert URL to PDF: %w", err) } @@ -423,6 +424,7 @@ func convertHtmlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { pdfFormats := pdfengines.FormDataPdfFormats(form) metadata := pdfengines.FormDataPdfMetadata(form, false) userPassword, ownerPassword := pdfengines.FormDataPdfEncrypt(form) + embedPaths := pdfengines.FormDataPdfEmbeds(form) var inputPath string err := form. @@ -433,7 +435,7 @@ func convertHtmlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { } url := fmt.Sprintf("file://%s", inputPath) - err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword) + err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths) if err != nil { return fmt.Errorf("convert HTML to PDF: %w", err) } @@ -487,6 +489,7 @@ func convertMarkdownRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { pdfFormats := pdfengines.FormDataPdfFormats(form) metadata := pdfengines.FormDataPdfMetadata(form, false) userPassword, ownerPassword := pdfengines.FormDataPdfEncrypt(form) + embedPaths := pdfengines.FormDataPdfEmbeds(form) var ( inputPath string @@ -506,7 +509,7 @@ func convertMarkdownRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { return fmt.Errorf("transform markdown file(s) to HTML: %w", err) } - err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword) + err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths) if err != nil { return fmt.Errorf("convert markdown to PDF: %w", err) } @@ -630,7 +633,7 @@ func markdownToHtml(ctx *api.Context, inputPath string, markdownPaths []string) return fmt.Sprintf("file://%s", inputPath), nil } -func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url string, options PdfOptions, mode gotenberg.SplitMode, pdfFormats gotenberg.PdfFormats, metadata map[string]interface{}, userPassword, ownerPassword string) error { +func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url string, options PdfOptions, mode gotenberg.SplitMode, pdfFormats gotenberg.PdfFormats, metadata map[string]interface{}, userPassword, ownerPassword string, embedPaths []string) error { outputPath := ctx.GeneratePath(".pdf") // See https://github.com/gotenberg/gotenberg/issues/1130. filename := ctx.OutputFilename(outputPath) @@ -687,6 +690,11 @@ func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url return fmt.Errorf("write metadata: %w", err) } + err = pdfengines.EmbedFilesStub(ctx, engine, embedPaths, convertOutputPaths) + if err != nil { + return fmt.Errorf("embed files into PDFs: %w", err) + } + err = pdfengines.EncryptPdfStub(ctx, engine, userPassword, ownerPassword, convertOutputPaths) if err != nil { return fmt.Errorf("encrypt PDFs: %w", err) diff --git a/pkg/modules/exiftool/exiftool.go b/pkg/modules/exiftool/exiftool.go index ae33e88..d00127c 100644 --- a/pkg/modules/exiftool/exiftool.go +++ b/pkg/modules/exiftool/exiftool.go @@ -181,6 +181,11 @@ func (engine *ExifTool) Encrypt(ctx context.Context, logger *zap.Logger, inputPa return fmt.Errorf("encrypt PDF using ExifTool: %w", gotenberg.ErrPdfEncryptionNotSupported) } +// EmbedFiles is not available in this implementation. +func (engine *ExifTool) EmbedFiles(ctx context.Context, logger *zap.Logger, filePaths []string, inputPath string) error { + return fmt.Errorf("embed files with ExifTool: %w", gotenberg.ErrPdfEngineMethodNotSupported) +} + // Interface guards. var ( _ gotenberg.Module = (*ExifTool)(nil) diff --git a/pkg/modules/libreoffice/pdfengine/pdfengine.go b/pkg/modules/libreoffice/pdfengine/pdfengine.go index e47d9b8..e5a5c66 100644 --- a/pkg/modules/libreoffice/pdfengine/pdfengine.go +++ b/pkg/modules/libreoffice/pdfengine/pdfengine.go @@ -96,6 +96,11 @@ func (engine *LibreOfficePdfEngine) Encrypt(ctx context.Context, logger *zap.Log return fmt.Errorf("encrypt PDF using LibreOffice: %w", gotenberg.ErrPdfEngineMethodNotSupported) } +// EmbedFiles is not available in this implementation. +func (engine *LibreOfficePdfEngine) EmbedFiles(ctx context.Context, logger *zap.Logger, filePaths []string, inputPath string) error { + return fmt.Errorf("embed files with LibreOffice: %w", gotenberg.ErrPdfEngineMethodNotSupported) +} + // Interface guards. var ( _ gotenberg.Module = (*LibreOfficePdfEngine)(nil) diff --git a/pkg/modules/libreoffice/routes.go b/pkg/modules/libreoffice/routes.go index 7acaea7..cc96a4d 100644 --- a/pkg/modules/libreoffice/routes.go +++ b/pkg/modules/libreoffice/routes.go @@ -31,6 +31,7 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap pdfFormats := pdfengines.FormDataPdfFormats(form) metadata := pdfengines.FormDataPdfMetadata(form, false) userPassword, ownerPassword := pdfengines.FormDataPdfEncrypt(form) + embedPaths := pdfengines.FormDataPdfEmbeds(form) zeroValuedSplitMode := gotenberg.SplitMode{} @@ -264,6 +265,11 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap } } + err = pdfengines.EmbedFilesStub(ctx, engine, embedPaths, outputPaths) + if err != nil { + return fmt.Errorf("embed files into PDFs: %w", err) + } + err = pdfengines.EncryptPdfStub(ctx, engine, userPassword, ownerPassword, outputPaths) if err != nil { return fmt.Errorf("encrypt PDFs: %w", err) diff --git a/pkg/modules/pdfcpu/pdfcpu.go b/pkg/modules/pdfcpu/pdfcpu.go index 97e25f5..37f0d25 100644 --- a/pkg/modules/pdfcpu/pdfcpu.go +++ b/pkg/modules/pdfcpu/pdfcpu.go @@ -171,6 +171,34 @@ func (engine *PdfCpu) WriteMetadata(ctx context.Context, logger *zap.Logger, met return fmt.Errorf("write PDF metadata with pdfcpu: %w", gotenberg.ErrPdfEngineMethodNotSupported) } +// EmbedFiles embeds files into a PDF. All files are embedded as file attachments +// without modifying the main PDF content. +func (engine *PdfCpu) EmbedFiles(ctx context.Context, logger *zap.Logger, filePaths []string, inputPath string) error { + if len(filePaths) == 0 { + return nil + } + + logger.Debug(fmt.Sprintf("embedding %d file(s) to %s: %v", len(filePaths), inputPath, filePaths)) + + args := []string{ + "attachments", "add", + inputPath, + } + args = append(args, filePaths...) + + cmd, err := gotenberg.CommandContext(ctx, logger, engine.binPath, args...) + if err != nil { + return fmt.Errorf("create command for attaching files: %w", err) + } + + _, err = cmd.Exec() + if err != nil { + return fmt.Errorf("attach files with pdfcpu: %w", err) + } + + return nil +} + // Encrypt adds password protection to a PDF file using pdfcpu. func (engine *PdfCpu) Encrypt(ctx context.Context, logger *zap.Logger, inputPath, userPassword, ownerPassword string) error { if userPassword == "" { diff --git a/pkg/modules/pdfengines/multi.go b/pkg/modules/pdfengines/multi.go index 78a5277..5726a2b 100644 --- a/pkg/modules/pdfengines/multi.go +++ b/pkg/modules/pdfengines/multi.go @@ -19,6 +19,7 @@ type multiPdfEngines struct { readMetadataEngines []gotenberg.PdfEngine writeMetadataEngines []gotenberg.PdfEngine passwordEngines []gotenberg.PdfEngine + embedEngines []gotenberg.PdfEngine } func newMultiPdfEngines( @@ -28,7 +29,8 @@ func newMultiPdfEngines( convertEngines, readMetadataEngines, writeMetadataEngines, - passwordEngines []gotenberg.PdfEngine, + passwordEngines, + embedEngines []gotenberg.PdfEngine, ) *multiPdfEngines { return &multiPdfEngines{ mergeEngines: mergeEngines, @@ -38,6 +40,7 @@ func newMultiPdfEngines( readMetadataEngines: readMetadataEngines, writeMetadataEngines: writeMetadataEngines, passwordEngines: passwordEngines, + embedEngines: embedEngines, } } @@ -238,6 +241,31 @@ func (multi *multiPdfEngines) Encrypt(ctx context.Context, logger *zap.Logger, i return fmt.Errorf("encrypt PDF using multi PDF engines: %w", err) } +// EmbedFiles embeds files into a PDF using the first available +// engine that supports file embedding. +func (multi *multiPdfEngines) EmbedFiles(ctx context.Context, logger *zap.Logger, filePaths []string, inputPath string) error { + var err error + errChan := make(chan error, 1) + + for _, engine := range multi.embedEngines { + go func(engine gotenberg.PdfEngine) { + errChan <- engine.EmbedFiles(ctx, logger, filePaths, inputPath) + }(engine) + + select { + case embedErr := <-errChan: + errored := multierr.AppendInto(&err, embedErr) + if !errored { + return nil + } + case <-ctx.Done(): + return ctx.Err() + } + } + + return fmt.Errorf("embed files into PDF using multi PDF engines: %w", err) +} + // Interface guards. var ( _ gotenberg.PdfEngine = (*multiPdfEngines)(nil) diff --git a/pkg/modules/pdfengines/pdfengines.go b/pkg/modules/pdfengines/pdfengines.go index 8e3bc17..6fd70ec 100644 --- a/pkg/modules/pdfengines/pdfengines.go +++ b/pkg/modules/pdfengines/pdfengines.go @@ -34,6 +34,7 @@ type PdfEngines struct { readMetadataNames []string writeMetadataNames []string encryptNames []string + embedNames []string engines []gotenberg.PdfEngine disableRoutes bool } @@ -51,6 +52,7 @@ func (mod *PdfEngines) Descriptor() gotenberg.ModuleDescriptor { fs.StringSlice("pdfengines-read-metadata-engines", []string{"exiftool"}, "Set the PDF engines and their order for the read metadata feature - empty means all") fs.StringSlice("pdfengines-write-metadata-engines", []string{"exiftool"}, "Set the PDF engines and their order for the write metadata feature - empty means all") fs.StringSlice("pdfengines-encrypt-engines", []string{"qpdf", "pdftk", "pdfcpu"}, "Set the PDF engines and their order for the password protection feature - empty means all") + fs.StringSlice("pdfengines-embed-engines", []string{"pdfcpu"}, "Set the PDF engines and their order for the file embedding feature - empty means all") fs.Bool("pdfengines-disable-routes", false, "Disable the routes") // Deprecated flags. @@ -77,6 +79,7 @@ func (mod *PdfEngines) Provision(ctx *gotenberg.Context) error { readMetadataNames := flags.MustStringSlice("pdfengines-read-metadata-engines") writeMetadataNames := flags.MustStringSlice("pdfengines-write-metadata-engines") encryptNames := flags.MustStringSlice("pdfengines-encrypt-engines") + embedNames := flags.MustStringSlice("pdfengines-embed-engines") mod.disableRoutes = flags.MustBool("pdfengines-disable-routes") engines, err := ctx.Modules(new(gotenberg.PdfEngine)) @@ -138,6 +141,11 @@ func (mod *PdfEngines) Provision(ctx *gotenberg.Context) error { mod.encryptNames = encryptNames } + mod.embedNames = defaultNames + if len(embedNames) > 0 { + mod.embedNames = embedNames + } + return nil } @@ -192,6 +200,7 @@ func (mod *PdfEngines) Validate() error { findNonExistingEngines(mod.readMetadataNames) findNonExistingEngines(mod.writeMetadataNames) findNonExistingEngines(mod.encryptNames) + findNonExistingEngines(mod.embedNames) if len(nonExistingEngines) == 0 { return nil @@ -238,6 +247,7 @@ func (mod *PdfEngines) PdfEngine() (gotenberg.PdfEngine, error) { engines(mod.readMetadataNames), engines(mod.writeMetadataNames), engines(mod.encryptNames), + engines(mod.embedNames), ), nil } @@ -262,6 +272,7 @@ func (mod *PdfEngines) Routes() ([]api.Route, error) { readMetadataRoute(engine), writeMetadataRoute(engine), encryptRoute(engine), + embedRoute(engine), }, nil } diff --git a/pkg/modules/pdfengines/routes.go b/pkg/modules/pdfengines/routes.go index ce9135c..27e536c 100644 --- a/pkg/modules/pdfengines/routes.go +++ b/pkg/modules/pdfengines/routes.go @@ -254,6 +254,14 @@ func WriteMetadataStub(ctx *api.Context, engine gotenberg.PdfEngine, metadata ma return nil } +// FormDataPdfEmbeds extracts embedded file paths from form data. +// Only files uploaded with the "embeds" field name are included. +func FormDataPdfEmbeds(form *api.FormData) []string { + var embedPaths []string + form.Embeds(&embedPaths) + return embedPaths +} + // FormDataPdfEncrypt extracts encryption parameters from form data. func FormDataPdfEncrypt(form *api.FormData) (userPassword, ownerPassword string) { form.String("userPassword", &userPassword, "") @@ -277,6 +285,22 @@ func EncryptPdfStub(ctx *api.Context, engine gotenberg.PdfEngine, userPassword, return nil } +// EmbedFilesStub embeds files into PDF files. +func EmbedFilesStub(ctx *api.Context, engine gotenberg.PdfEngine, embedPaths []string, inputPaths []string) error { + if len(embedPaths) == 0 { + return nil + } + + for _, inputPath := range inputPaths { + err := engine.EmbedFiles(ctx, ctx.Log(), embedPaths, inputPath) + if err != nil { + return fmt.Errorf("embed files into PDF '%s': %w", inputPath, err) + } + } + + return nil +} + // mergeRoute returns an [api.Route] which can merge PDFs. func mergeRoute(engine gotenberg.PdfEngine) api.Route { return api.Route{ @@ -290,6 +314,7 @@ func mergeRoute(engine gotenberg.PdfEngine) api.Route { pdfFormats := FormDataPdfFormats(form) metadata := FormDataPdfMetadata(form, false) userPassword, ownerPassword := FormDataPdfEncrypt(form) + embedPaths := FormDataPdfEmbeds(form) var inputPaths []string var flatten bool @@ -324,6 +349,11 @@ func mergeRoute(engine gotenberg.PdfEngine) api.Route { } } + err = EmbedFilesStub(ctx, engine, embedPaths, outputPaths) + if err != nil { + return fmt.Errorf("embed files into PDFs: %w", err) + } + err = EncryptPdfStub(ctx, engine, userPassword, ownerPassword, outputPaths) if err != nil { return fmt.Errorf("encrypt PDFs: %w", err) @@ -353,6 +383,7 @@ func splitRoute(engine gotenberg.PdfEngine) api.Route { pdfFormats := FormDataPdfFormats(form) metadata := FormDataPdfMetadata(form, false) userPassword, ownerPassword := FormDataPdfEncrypt(form) + embedPaths := FormDataPdfEmbeds(form) var inputPaths []string var flatten bool @@ -386,6 +417,11 @@ func splitRoute(engine gotenberg.PdfEngine) api.Route { } } + err = EmbedFilesStub(ctx, engine, embedPaths, convertOutputPaths) + if err != nil { + return fmt.Errorf("embed files into PDFs: %w", err) + } + err = EncryptPdfStub(ctx, engine, userPassword, ownerPassword, convertOutputPaths) if err != nil { return fmt.Errorf("encrypt PDFs: %w", err) @@ -620,3 +656,38 @@ func encryptRoute(engine gotenberg.PdfEngine) api.Route { }, } } + +// embedRoute returns an [api.Route] which can add embedded files to PDFs. +func embedRoute(engine gotenberg.PdfEngine) api.Route { + return api.Route{ + Method: http.MethodPost, + Path: "/forms/pdfengines/embed", + IsMultipart: true, + Handler: func(c echo.Context) error { + ctx := c.Get("context").(*api.Context) + + form := ctx.FormData() + embedPaths := FormDataPdfEmbeds(form) + + var inputPaths []string + err := form. + MandatoryPaths([]string{".pdf"}, &inputPaths). + Validate() + if err != nil { + return fmt.Errorf("validate form data: %w", err) + } + + err = EmbedFilesStub(ctx, engine, embedPaths, inputPaths) + if err != nil { + return fmt.Errorf("embed files into PDFs: %w", err) + } + + err = ctx.AddOutputPaths(inputPaths...) + if err != nil { + return fmt.Errorf("add output paths: %w", err) + } + + return nil + }, + } +} diff --git a/pkg/modules/pdftk/pdftk.go b/pkg/modules/pdftk/pdftk.go index 655cbd4..f5c112b 100644 --- a/pkg/modules/pdftk/pdftk.go +++ b/pkg/modules/pdftk/pdftk.go @@ -183,6 +183,11 @@ func (engine *PdfTk) Encrypt(ctx context.Context, logger *zap.Logger, inputPath, return nil } +// EmbedFiles is not available in this implementation. +func (engine *PdfTk) EmbedFiles(ctx context.Context, logger *zap.Logger, filePaths []string, inputPath string) error { + return fmt.Errorf("embed files with PDFtk: %w", gotenberg.ErrPdfEngineMethodNotSupported) +} + // Interface guards. var ( _ gotenberg.Module = (*PdfTk)(nil) diff --git a/pkg/modules/qpdf/qpdf.go b/pkg/modules/qpdf/qpdf.go index f2aa2e3..c850798 100644 --- a/pkg/modules/qpdf/qpdf.go +++ b/pkg/modules/qpdf/qpdf.go @@ -201,6 +201,11 @@ func (engine *QPdf) Encrypt(ctx context.Context, logger *zap.Logger, inputPath, return nil } +// EmbedFiles is not available in this implementation. +func (engine *QPdf) EmbedFiles(ctx context.Context, logger *zap.Logger, filePaths []string, inputPath string) error { + return fmt.Errorf("embed files with QPDF: %w", gotenberg.ErrPdfEngineMethodNotSupported) +} + var ( _ gotenberg.Module = (*QPdf)(nil) _ gotenberg.Provisioner = (*QPdf)(nil) diff --git a/test/integration/features/chromium_convert_html.feature b/test/integration/features/chromium_convert_html.feature index 9b6ea65..32eb2c0 100644 --- a/test/integration/features/chromium_convert_html.feature +++ b/test/integration/features/chromium_convert_html.feature @@ -942,3 +942,19 @@ Feature: /forms/chromium/convert/html | files | testdata/page-1-html/index.html | file | Then the response status code should be 200 Then the response header "Content-Type" should be "application/pdf" + + @embed + Scenario: POST /forms/chromium/convert/html (Embeds) + Given I have a default Gotenberg container + When I make a "POST" request to Gotenberg at the "/forms/chromium/convert/html" endpoint with the following form data and header(s): + | files | testdata/page-1-html/index.html | file | + | embeds | testdata/embed_1.xml | file | + | embeds | testdata/embed_2.xml | file | + | Gotenberg-Output-Filename | foo | header | + Then the response status code should be 200 + And the response header "Content-Type" should be "application/pdf" + And there should be 1 PDF(s) in the response + And there should be the following file(s) in the response: + | foo.pdf | + And the "foo.pdf" PDF should have the "embed_1.xml" file embedded in it + And the "foo.pdf" PDF should have the "embed_2.xml" file embedded in it diff --git a/test/integration/features/chromium_convert_markdown.feature b/test/integration/features/chromium_convert_markdown.feature index dd415ca..e70eb85 100644 --- a/test/integration/features/chromium_convert_markdown.feature +++ b/test/integration/features/chromium_convert_markdown.feature @@ -1075,3 +1075,20 @@ Feature: /forms/chromium/convert/markdown | files | testdata/page-1-markdown/page_1.md | file | Then the response status code should be 200 Then the response header "Content-Type" should be "application/pdf" + + @embed + Scenario: POST /forms/chromium/convert/markdown (Embeds) + Given I have a default Gotenberg container + When I make a "POST" request to Gotenberg at the "/forms/chromium/convert/markdown" endpoint with the following form data and header(s): + | files | testdata/page-1-markdown/index.html | file | + | files | testdata/page-1-markdown/page_1.md | file | + | embeds | testdata/embed_1.xml | file | + | embeds | testdata/embed_2.xml | file | + | Gotenberg-Output-Filename | foo | header | + Then the response status code should be 200 + And the response header "Content-Type" should be "application/pdf" + And there should be 1 PDF(s) in the response + And there should be the following file(s) in the response: + | foo.pdf | + And the "foo.pdf" PDF should have the "embed_1.xml" file embedded in it + And the "foo.pdf" PDF should have the "embed_2.xml" file embedded in it diff --git a/test/integration/features/chromium_convert_url.feature b/test/integration/features/chromium_convert_url.feature index 319f1aa..786caa8 100644 --- a/test/integration/features/chromium_convert_url.feature +++ b/test/integration/features/chromium_convert_url.feature @@ -1039,3 +1039,20 @@ Feature: /forms/chromium/convert/url | url | http://host.docker.internal:%d/html/testdata/page-1-html/index.html | field | Then the response status code should be 200 Then the response header "Content-Type" should be "application/pdf" + + @embed + Scenario: POST /foo/forms/chromium/convert/url (Embeds) + Given I have a default Gotenberg container + And I have a static server + When I make a "POST" request to Gotenberg at the "/forms/chromium/convert/url" endpoint with the following form data and header(s): + | embeds | testdata/embed_1.xml | file | + | embeds | testdata/embed_2.xml | file | + | url | http://host.docker.internal:%d/html/testdata/page-1-html/index.html | field | + | Gotenberg-Output-Filename | foo | header | + Then the response status code should be 200 + And the response header "Content-Type" should be "application/pdf" + And there should be 1 PDF(s) in the response + And there should be the following file(s) in the webhook request: + | foo.pdf | + And the "foo.pdf" PDF should have the "embed_1.xml" file embedded in it + And the "foo.pdf" PDF should have the "embed_2.xml" file embedded in it diff --git a/test/integration/features/libreoffice_convert.feature b/test/integration/features/libreoffice_convert.feature index 20c903e..84ffb04 100644 --- a/test/integration/features/libreoffice_convert.feature +++ b/test/integration/features/libreoffice_convert.feature @@ -652,3 +652,19 @@ Feature: /forms/libreoffice/convert | files | testdata/page_1.docx | file | Then the response status code should be 200 Then the response header "Content-Type" should be "application/pdf" + + @embed + Scenario: POST /forms/libreoffice/convert (Embeds) + Given I have a default Gotenberg container + When I make a "POST" request to Gotenberg at the "/forms/libreoffice/convert" endpoint with the following form data and header(s): + | files | testdata/page_1.docx | file | + | embeds | testdata/embed_1.xml | file | + | embeds | testdata/embed_2.xml | file | + | Gotenberg-Output-Filename | foo | header | + Then the response status code should be 200 + And the response header "Content-Type" should be "application/pdf" + And there should be 1 PDF(s) in the response + And there should be the following file(s) in the response: + | foo.pdf | + And the "foo.pdf" PDF should have the "embed_1.xml" file embedded in it + And the "foo.pdf" PDF should have the "embed_2.xml" file embedded in it diff --git a/test/integration/features/pdfengines_embeds.feature b/test/integration/features/pdfengines_embeds.feature new file mode 100644 index 0000000..4ae4046 --- /dev/null +++ b/test/integration/features/pdfengines_embeds.feature @@ -0,0 +1,32 @@ +@embed +Feature: /forms/pdfengines/embed + + Scenario: POST /forms/pdfengines/embed + Given I have a default Gotenberg container + When I make a "POST" request to Gotenberg at the "/forms/pdfengines/embed" endpoint with the following form data and header(s): + | files | testdata/page_1.pdf | file | + | embeds | testdata/embed_1.xml | file | + | embeds | testdata/embed_2.xml | file | + | embeds | testdata/page_2.pdf | file | + Then the response status code should be 200 + And the response header "Content-Type" should be "application/pdf" + And there should be 1 PDF(s) in the response + And there should be the following file(s) in the response: + | page_1.pdf | + And the "page_1.pdf" PDF should have the "embed_1.xml" file embedded in it + And the "page_1.pdf" PDF should have the "embed_2.xml" file embedded in it + And the "page_1.pdf" PDF should have the "page_2.pdf" file embedded in it + + Scenario: POST /forms/pdfengines/embed with (Download From) + Given I have a default Gotenberg container + And I have a static server + When I make a "POST" request to Gotenberg at the "/forms/pdfengines/embed" endpoint with the following form data and header(s): + | files | testdata/page_1.pdf | file | + | downloadFrom | [{"url":"http://host.docker.internal:%d/static/testdata/embed_1.xml", "embedded": true},{"url":"http://host.docker.internal:%d/static/testdata/embed_2.xml", "embedded": false}] | field | + Then the response status code should be 200 + And the response header "Content-Type" should be "application/pdf" + And there should be 1 PDF(s) in the response + And there should be the following file(s) in the response: + | page_1.pdf | + And the "page_1.pdf" PDF should have the "embed_1.xml" file embedded in it + And the "page_1.pdf" PDF should NOT have the "embed_2.xml" file embedded in it diff --git a/test/integration/features/pdfengines_merge.feature b/test/integration/features/pdfengines_merge.feature index 54005dd..25a6110 100644 --- a/test/integration/features/pdfengines_merge.feature +++ b/test/integration/features/pdfengines_merge.feature @@ -368,3 +368,16 @@ Feature: /forms/pdfengines/merge | files | testdata/page_2.pdf | file | Then the response status code should be 200 Then the response header "Content-Type" should be "application/pdf" + + @embed + Scenario: POST /foo/forms/pdfengines/merge (Embeds) + Given I have a default Gotenberg container + When I make a "POST" request to Gotenberg at the "/forms/pdfengines/merge" endpoint with the following form data and header(s): + | files | testdata/page_1.pdf | file | + | files | testdata/page_2.pdf | file | + | embeds | testdata/embed_1.xml | file | + | embeds | testdata/embed_2.xml | file | + | Gotenberg-Output-Filename | foo | header | + Then the response status code should be 200 + And the "foo.pdf" PDF should have the "embed_1.xml" file embedded in it + And the "foo.pdf" PDF should have the "embed_2.xml" file embedded in it diff --git a/test/integration/features/pdfengines_split.feature b/test/integration/features/pdfengines_split.feature index 3566f52..d988510 100644 --- a/test/integration/features/pdfengines_split.feature +++ b/test/integration/features/pdfengines_split.feature @@ -650,3 +650,23 @@ Feature: /forms/pdfengines/split | splitSpan | 2 | field | Then the response status code should be 200 Then the response header "Content-Type" should be "application/zip" + + @embed + Scenario: POST /foo/forms/pdfengines/split (Embeds) + Given I have a default Gotenberg container + When I make a "POST" request to Gotenberg at the "/forms/pdfengines/split" endpoint with the following form data and header(s): + | files | testdata/pages_3.pdf | file | + | embeds | testdata/embed_1.xml | file | + | embeds | testdata/embed_2.xml | file | + | splitMode | intervals | field | + | splitSpan | 2 | field | + Then the response status code should be 200 + And the response header "Content-Type" should be "application/zip" + And there should be 2 PDF(s) in the response + And there should be the following file(s) in the response: + | pages_3_0.pdf | + | pages_3_1.pdf | + And the "pages_3_0.pdf" PDF should have the "embed_1.xml" file embedded in it + And the "pages_3_0.pdf" PDF should have the "embed_2.xml" file embedded in it + And the "pages_3_1.pdf" PDF should have the "embed_1.xml" file embedded in it + And the "pages_3_1.pdf" PDF should have the "embed_2.xml" file embedded in it diff --git a/test/integration/scenario/scenario.go b/test/integration/scenario/scenario.go index 4852e38..222bd58 100644 --- a/test/integration/scenario/scenario.go +++ b/test/integration/scenario/scenario.go @@ -735,6 +735,41 @@ func (s *scenario) thePdfShouldBeSetToLandscapeOrientation(ctx context.Context, return nil } +func (s *scenario) thePdfShouldHaveTheFollowingEmbedsInIt(ctx context.Context, name, should string, embed string) error { + path, err := s.getPath(name) + if err != nil { + return fmt.Errorf("get path %q: %w", name, err) + } + invert := should == "should NOT" + + cmd := []string{ + "verapdf", + "--off", + "--loglevel", + "0", + "--extract", + "embeddedFile", + name, + } + + output, err := execCommandInIntegrationToolsContainer(ctx, cmd, path) + if err != nil { + return fmt.Errorf("exec %q: %w", cmd, err) + } + + found := strings.Contains(output, fmt.Sprintf("%s", embed)) + + if invert && found { + return fmt.Errorf("embed %q found", embed) + } + + if !invert && !found { + return fmt.Errorf("embed %q not found", embed) + } + + return nil +} + func (s *scenario) thePdfShouldHaveTheFollowingContentAtPage(ctx context.Context, name, kind string, page int, expected *godog.DocString) error { var path string if !strings.HasPrefix(name, "*_") { @@ -927,6 +962,7 @@ func InitializeScenario(ctx *godog.ScenarioContext) { ctx.Then(`^the "([^"]*)" PDF should have (\d+) page\(s\)$`, s.thePdfShouldHavePages) ctx.Then(`^the "([^"]*)" PDF (should|should NOT) be set to landscape orientation$`, s.thePdfShouldBeSetToLandscapeOrientation) ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have the following content at page (\d+):$`, s.thePdfShouldHaveTheFollowingContentAtPage) + ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have the "([^"]*)" file embedded in it$`, s.thePdfShouldHaveTheFollowingEmbedsInIt) ctx.After(func(ctx context.Context, sc *godog.Scenario, err error) (context.Context, error) { if s.gotenbergContainer != nil { errTerminate := s.gotenbergContainer.Terminate(ctx, testcontainers.StopTimeout(0)) @@ -950,3 +986,14 @@ func InitializeScenario(ctx *godog.ScenarioContext) { return ctx, nil }) } + +func (s *scenario) getPath(name string) (string, error) { + path := fmt.Sprintf("%s/%s/%s", s.workdir, s.resp.Header().Get("Gotenberg-Trace"), name) + + _, err := os.Stat(path) + if os.IsNotExist(err) { + return "", fmt.Errorf("PDF %q does not exist", path) + } + + return path, nil +} diff --git a/test/integration/testdata/embed_1.xml b/test/integration/testdata/embed_1.xml new file mode 100644 index 0000000..acd3c47 --- /dev/null +++ b/test/integration/testdata/embed_1.xml @@ -0,0 +1,5 @@ + + test 1.1 + test 1.2 + test 1.3 + \ No newline at end of file diff --git a/test/integration/testdata/embed_2.xml b/test/integration/testdata/embed_2.xml new file mode 100644 index 0000000..44ff7b5 --- /dev/null +++ b/test/integration/testdata/embed_2.xml @@ -0,0 +1,5 @@ + + test 2.1 + test 2.2 + test 2.3 + \ No newline at end of file