diff --git a/.bruno/Chromium/Convert/HTML to PDF.bru b/.bruno/Chromium/Convert/HTML to PDF.bru index f1cf2a7..d340368 100644 --- a/.bruno/Chromium/Convert/HTML to PDF.bru +++ b/.bruno/Chromium/Convert/HTML to PDF.bru @@ -53,7 +53,10 @@ body:multipart-form { ~embeds: @file(../test/integration/testdata/embed_1.xml) ~embeds: @file(../test/integration/testdata/embed_2.xml) ~embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}} - ~facturx: {"conformanceLevel":"EN 16931","documentType":"INVOICE","documentFileName":"factur-x.xml","version":"1.0"} + ~facturxXml: @file(../test/integration/testdata/embed_1.xml) + ~facturxConformanceLevel: EN 16931 + ~facturxDocumentType: INVOICE + ~facturxVersion: 1.0 ~watermarkSource: text ~watermarkExpression: CONFIDENTIAL ~watermarkPages: diff --git a/.bruno/Chromium/Convert/Markdown to PDF.bru b/.bruno/Chromium/Convert/Markdown to PDF.bru index 0f06526..ca5e90a 100644 --- a/.bruno/Chromium/Convert/Markdown to PDF.bru +++ b/.bruno/Chromium/Convert/Markdown to PDF.bru @@ -54,7 +54,10 @@ body:multipart-form { ~embeds: @file(../test/integration/testdata/embed_1.xml) ~embeds: @file(../test/integration/testdata/embed_2.xml) ~embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}} - ~facturx: {"conformanceLevel":"EN 16931","documentType":"INVOICE","documentFileName":"factur-x.xml","version":"1.0"} + ~facturxXml: @file(../../test/integration/testdata/embed_1.xml) + ~facturxConformanceLevel: EN 16931 + ~facturxDocumentType: INVOICE + ~facturxVersion: 1.0 ~watermarkSource: text ~watermarkExpression: CONFIDENTIAL ~watermarkPages: diff --git a/.bruno/Chromium/Convert/URL to PDF.bru b/.bruno/Chromium/Convert/URL to PDF.bru index ef48091..7dd5d5a 100644 --- a/.bruno/Chromium/Convert/URL to PDF.bru +++ b/.bruno/Chromium/Convert/URL to PDF.bru @@ -53,7 +53,10 @@ body:multipart-form { ~embeds: @file(../test/integration/testdata/embed_1.xml) ~embeds: @file(../test/integration/testdata/embed_2.xml) ~embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}} - ~facturx: {"conformanceLevel":"EN 16931","documentType":"INVOICE","documentFileName":"factur-x.xml","version":"1.0"} + ~facturxXml: @file(../test/integration/testdata/embed_1.xml) + ~facturxConformanceLevel: EN 16931 + ~facturxDocumentType: INVOICE + ~facturxVersion: 1.0 ~watermarkSource: text ~watermarkExpression: CONFIDENTIAL ~watermarkPages: diff --git a/.bruno/LibreOffice/Convert to PDF.bru b/.bruno/LibreOffice/Convert to PDF.bru index 51e5282..5816480 100644 --- a/.bruno/LibreOffice/Convert to PDF.bru +++ b/.bruno/LibreOffice/Convert to PDF.bru @@ -70,7 +70,10 @@ body:multipart-form { ~embeds: @file(../test/integration/testdata/embed_1.xml) ~embeds: @file(../test/integration/testdata/embed_2.xml) ~embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}} - ~facturx: {"conformanceLevel":"EN 16931","documentType":"INVOICE","documentFileName":"factur-x.xml","version":"1.0"} + ~facturxXml: @file(../test/integration/testdata/embed_1.xml) + ~facturxConformanceLevel: EN 16931 + ~facturxDocumentType: INVOICE + ~facturxVersion: 1.0 ~watermarkSource: text ~watermarkExpression: CONFIDENTIAL ~watermarkPages: diff --git a/.bruno/PDF Engines/Embed/Embed Files.bru b/.bruno/PDF Engines/Embed/Embed Files.bru index bb99286..746d7e4 100644 --- a/.bruno/PDF Engines/Embed/Embed Files.bru +++ b/.bruno/PDF Engines/Embed/Embed Files.bru @@ -15,7 +15,10 @@ body:multipart-form { embeds: @file(../test/integration/testdata/embed_1.xml) embeds: @file(../test/integration/testdata/embed_2.xml) embedsMetadata: {"embed_1.xml":{"mimeType":"text/xml","relationship":"Data"}, "embed_2.xml":{"mimeType":"text/xml","relationship":"Data"}} - ~facturx: {"conformanceLevel":"EN 16931","documentType":"INVOICE","documentFileName":"factur-x.xml","version":"1.0"} + ~facturxXml: @file(../test/integration/testdata/embed_1.xml) + ~facturxConformanceLevel: EN 16931 + ~facturxDocumentType: INVOICE + ~facturxVersion: 1.0 ~downloadFrom: [{"url":"https://example.com/attachment.xml","embedded":true}] } diff --git a/.bruno/PDF Engines/Factur-X/Inject Factur-X XMP.bru b/.bruno/PDF Engines/Factur-X/Inject Factur-X XMP.bru index 59cc6f8..52df33c 100644 --- a/.bruno/PDF Engines/Factur-X/Inject Factur-X XMP.bru +++ b/.bruno/PDF Engines/Factur-X/Inject Factur-X XMP.bru @@ -12,7 +12,10 @@ post { body:multipart-form { files: @file(../../test/integration/testdata/page_1.pdf) - facturx: {"conformanceLevel":"EN 16931","documentType":"INVOICE","documentFileName":"factur-x.xml","version":"1.0"} + facturxXml: @file(../../test/integration/testdata/embed_1.xml) + facturxConformanceLevel: EN 16931 + ~facturxDocumentType: INVOICE + ~facturxVersion: 1.0 } headers { diff --git a/.bruno/PDF Engines/Merge/Merge PDFs.bru b/.bruno/PDF Engines/Merge/Merge PDFs.bru index 31c99a0..f01d18a 100644 --- a/.bruno/PDF Engines/Merge/Merge PDFs.bru +++ b/.bruno/PDF Engines/Merge/Merge PDFs.bru @@ -31,7 +31,10 @@ body:multipart-form { ~stampOptions: {"scale":"0.5 abs","rot":"45"} ~rotateAngle: 90 ~rotatePages: - ~facturx: {"conformanceLevel":"EN 16931","documentType":"INVOICE","documentFileName":"factur-x.xml","version":"1.0"} + ~facturxXml: @file(../../test/integration/testdata/embed_1.xml) + ~facturxConformanceLevel: EN 16931 + ~facturxDocumentType: INVOICE + ~facturxVersion: 1.0 } headers { diff --git a/.bruno/PDF Engines/Split/Split PDF.bru b/.bruno/PDF Engines/Split/Split PDF.bru index 94ab7de..d83a239 100644 --- a/.bruno/PDF Engines/Split/Split PDF.bru +++ b/.bruno/PDF Engines/Split/Split PDF.bru @@ -31,7 +31,10 @@ body:multipart-form { ~stampOptions: {"scale":"0.5 abs","rot":"45"} ~rotateAngle: 90 ~rotatePages: - ~facturx: {"conformanceLevel":"EN 16931","documentType":"INVOICE","documentFileName":"factur-x.xml","version":"1.0"} + ~facturxXml: @file(../../test/integration/testdata/embed_1.xml) + ~facturxConformanceLevel: EN 16931 + ~facturxDocumentType: INVOICE + ~facturxVersion: 1.0 } headers { diff --git a/pkg/gotenberg/mocks.go b/pkg/gotenberg/mocks.go index 616db36..024f504 100644 --- a/pkg/gotenberg/mocks.go +++ b/pkg/gotenberg/mocks.go @@ -45,22 +45,23 @@ func (mod *DebuggableMock) Debug() map[string]any { // //nolint:dupl type PdfEngineMock struct { - MergeMock func(ctx context.Context, logger *slog.Logger, inputPaths []string, outputPath string) error - SplitMock func(ctx context.Context, logger *slog.Logger, mode SplitMode, inputPath, outputDirPath string) ([]string, error) - FlattenMock func(ctx context.Context, logger *slog.Logger, inputPath string) error - ConvertMock func(ctx context.Context, logger *slog.Logger, formats PdfFormats, inputPath, outputPath string) error - ReadMetadataMock func(ctx context.Context, logger *slog.Logger, inputPath string) (map[string]any, error) - PageCountMock func(ctx context.Context, logger *slog.Logger, inputPath string) (int, error) - WriteMetadataMock func(ctx context.Context, logger *slog.Logger, metadata map[string]any, inputPath string) error - ReadBookmarksMock func(ctx context.Context, logger *slog.Logger, inputPath string) ([]Bookmark, error) - EncryptMock func(ctx context.Context, logger *slog.Logger, inputPath, userPassword, ownerPassword string) error - EmbedFilesMock func(ctx context.Context, logger *slog.Logger, filePaths []string, inputPath string) error - EmbedFilesMetadataMock func(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error - WriteBookmarksMock func(ctx context.Context, logger *slog.Logger, inputPath string, bookmarks []Bookmark) error - WatermarkMock func(ctx context.Context, logger *slog.Logger, inputPath string, stamp Stamp) error - StampMock func(ctx context.Context, logger *slog.Logger, inputPath string, stamp Stamp) error - RotateMock func(ctx context.Context, logger *slog.Logger, inputPath string, angle int, pages string) error - InjectFacturXXMPMock func(ctx context.Context, logger *slog.Logger, facturX FacturX, inputPath string) error + MergeMock func(ctx context.Context, logger *slog.Logger, inputPaths []string, outputPath string) error + SplitMock func(ctx context.Context, logger *slog.Logger, mode SplitMode, inputPath, outputDirPath string) ([]string, error) + FlattenMock func(ctx context.Context, logger *slog.Logger, inputPath string) error + ConvertMock func(ctx context.Context, logger *slog.Logger, formats PdfFormats, inputPath, outputPath string) error + ReadMetadataMock func(ctx context.Context, logger *slog.Logger, inputPath string) (map[string]any, error) + PageCountMock func(ctx context.Context, logger *slog.Logger, inputPath string) (int, error) + WriteMetadataMock func(ctx context.Context, logger *slog.Logger, metadata map[string]any, inputPath string) error + ReadBookmarksMock func(ctx context.Context, logger *slog.Logger, inputPath string) ([]Bookmark, error) + EncryptMock func(ctx context.Context, logger *slog.Logger, inputPath, userPassword, ownerPassword string) error + EmbedFilesMock func(ctx context.Context, logger *slog.Logger, filePaths []string, inputPath string) error + EmbedFilesMetadataMock func(ctx context.Context, logger *slog.Logger, metadata map[string]map[string]string, inputPath string) error + WriteBookmarksMock func(ctx context.Context, logger *slog.Logger, inputPath string, bookmarks []Bookmark) error + WatermarkMock func(ctx context.Context, logger *slog.Logger, inputPath string, stamp Stamp) error + StampMock func(ctx context.Context, logger *slog.Logger, inputPath string, stamp Stamp) error + RotateMock func(ctx context.Context, logger *slog.Logger, inputPath string, angle int, pages string) error + InjectFacturXXMPMock func(ctx context.Context, logger *slog.Logger, facturX FacturX, inputPath string) error + ReadPdfAConformanceMock func(ctx context.Context, logger *slog.Logger, inputPath string) (string, string, error) } func (engine *PdfEngineMock) Merge(ctx context.Context, logger *slog.Logger, inputPaths []string, outputPath string) error { @@ -127,6 +128,10 @@ func (engine *PdfEngineMock) InjectFacturXXMP(ctx context.Context, logger *slog. return engine.InjectFacturXXMPMock(ctx, logger, facturX, inputPath) } +func (engine *PdfEngineMock) ReadPdfAConformance(ctx context.Context, logger *slog.Logger, inputPath string) (string, string, error) { + return engine.ReadPdfAConformanceMock(ctx, logger, inputPath) +} + // PdfEngineProviderMock is a mock for the [PdfEngineProvider] interface. type PdfEngineProviderMock struct { PdfEngineMock func() (PdfEngine, error) diff --git a/pkg/gotenberg/pdfengine.go b/pkg/gotenberg/pdfengine.go index 96bae87..23cf436 100644 --- a/pkg/gotenberg/pdfengine.go +++ b/pkg/gotenberg/pdfengine.go @@ -185,6 +185,10 @@ const ( // FacturXDocumentTypeOrderChange represents the ORDER_CHANGE Factur-X document type. FacturXDocumentTypeOrderChange string = "ORDER_CHANGE" + + // FacturXDocumentFileName is the canonical name of the embedded XML invoice + // mandated by the Factur-X standard. Validators expect this exact name. + FacturXDocumentFileName string = "factur-x.xml" ) // FacturX gathers the properties required by the Factur-X/ZUGFeRD standard for @@ -196,8 +200,8 @@ type FacturX struct { // DocumentType is one of the FacturXDocumentType* values. DocumentType string - // DocumentFileName is the name of the embedded XML invoice (e.g., - // "factur-x.xml"). + // DocumentFileName is the name of the embedded XML invoice. It is set + // internally to the canonical [FacturXDocumentFileName], not by the caller. DocumentFileName string // Version is the Factur-X version (e.g., "1.0"). @@ -275,6 +279,12 @@ type PdfEngine interface { // registers the fx namespace, the four fx properties, and the matching // PDF/A extension schema so the result stays PDF/A-valid. InjectFacturXXMP(ctx context.Context, logger *slog.Logger, facturX FacturX, inputPath string) error + + // ReadPdfAConformance reads the PDF/A part and conformance (e.g., "3" and + // "B") from the document-level XMP packet (Catalog /Metadata stream, + // pdfaid:part and pdfaid:conformance). It returns empty strings when the + // document carries no PDF/A identification. + ReadPdfAConformance(ctx context.Context, logger *slog.Logger, inputPath string) (part string, conformance string, err error) } // PdfEngineProvider offers an interface to instantiate a [PdfEngine]. diff --git a/pkg/modules/api/formdata.go b/pkg/modules/api/formdata.go index 302efe1..fa2ddc2 100644 --- a/pkg/modules/api/formdata.go +++ b/pkg/modules/api/formdata.go @@ -26,6 +26,10 @@ const ( // StampFormField represents the form field name for the stamp file. StampFormField string = "stamp" + + // FacturXXmlFormField represents the form field name for the Factur-X CII + // invoice XML file. + FacturXXmlFormField string = "facturxXml" ) // FormData is a helper for validating and hydrating values from a @@ -424,89 +428,6 @@ func (form *FormData) EmbedsMetadata(target *map[string]map[string]string) *Form return form } -// FacturX parses the "facturx" form field (a JSON string) into a -// [gotenberg.FacturX]. The "conformanceLevel" property is mandatory; the -// "documentType", "version", and "documentFileName" properties default to -// "INVOICE", "1.0", and "factur-x.xml" respectively. It leaves the target -// untouched when the field is absent. -// -// var facturX gotenberg.FacturX -// -// ctx.FormData().FacturX(&facturX, false) -func (form *FormData) FacturX(target *gotenberg.FacturX, mandatory bool) *FormData { - if form.errors != nil { - return form - } - - val, ok := form.values["facturx"] - if !ok || len(val) == 0 || val[0] == "" { - if mandatory { - form.append(fmt.Errorf("form field '%s' is required", "facturx")) - } - return form - } - - var parsed struct { - ConformanceLevel string `json:"conformanceLevel"` - DocumentType string `json:"documentType"` - DocumentFileName string `json:"documentFileName"` - Version string `json:"version"` - } - - err := json.Unmarshal([]byte(val[0]), &parsed) - if err != nil { - form.append(fmt.Errorf("form field 'facturx' is invalid: %w", err)) - return form - } - - facturX := gotenberg.FacturX{ - ConformanceLevel: parsed.ConformanceLevel, - DocumentType: parsed.DocumentType, - DocumentFileName: parsed.DocumentFileName, - Version: parsed.Version, - } - - if facturX.DocumentType == "" { - facturX.DocumentType = gotenberg.FacturXDocumentTypeInvoice - } - - if facturX.Version == "" { - facturX.Version = "1.0" - } - - if facturX.DocumentFileName == "" { - facturX.DocumentFileName = "factur-x.xml" - } - - switch facturX.ConformanceLevel { - case gotenberg.FacturXConformanceMinimum, - gotenberg.FacturXConformanceBasicWL, - gotenberg.FacturXConformanceBasic, - gotenberg.FacturXConformanceEN16931, - gotenberg.FacturXConformanceExtended, - gotenberg.FacturXConformanceXRechnung: - case "": - form.append(errors.New("form field 'facturx' is invalid: 'conformanceLevel' is required")) - return form - default: - form.append(fmt.Errorf("form field 'facturx' is invalid: unsupported 'conformanceLevel' '%s'", facturX.ConformanceLevel)) - return form - } - - switch facturX.DocumentType { - case gotenberg.FacturXDocumentTypeInvoice, - gotenberg.FacturXDocumentTypeOrder, - gotenberg.FacturXDocumentTypeOrderResponse, - gotenberg.FacturXDocumentTypeOrderChange: - default: - form.append(fmt.Errorf("form field 'facturx' is invalid: unsupported 'documentType' '%s'", facturX.DocumentType)) - return form - } - - *target = facturX - return form -} - // MandatoryPaths binds the absolute paths of form data files, according to a // list of file extensions, to a string slice variable. It populates an error // if there is no file for given file extensions. @@ -558,13 +479,28 @@ func (form *FormData) Stamp(target *string) *FormData { return form } +// FacturXXml binds the absolute path of the uploaded Factur-X CII invoice +// XML. Only a file uploaded with the "facturxXml" field name is included. +func (form *FormData) FacturXXml(target *string) *FormData { + if form.errors != nil { + return form + } + + if paths, ok := form.filesByField[FacturXXmlFormField]; ok && len(paths) > 0 { + *target = paths[0] + } + + return form +} + // paths bind the absolute paths of form data files, according to a list of // file extensions, to a string slice variable. -// embeds, watermark, and stamp files are excluded. +// embeds, watermark, stamp, and facturxXml files are excluded. func (form *FormData) paths(extensions []string, target *[]string) *FormData { embeds, ok := form.filesByField[EmbedsFormField] watermarks, wmOk := form.filesByField[WatermarkFormField] stamps, stOk := form.filesByField[StampFormField] + facturxXmls, fxOk := form.filesByField[FacturXXmlFormField] // Collect (originalFilename, diskPath) pairs so that we can sort by // original filename rather than by UUID-based disk name. @@ -588,6 +524,10 @@ func (form *FormData) paths(extensions []string, target *[]string) *FormData { continue } + if fxOk && slices.Contains(facturxXmls, path) { + continue + } + for _, ext := range extensions { // See https://github.com/gotenberg/gotenberg/issues/228. if strings.ToLower(filepath.Ext(filename)) == ext { diff --git a/pkg/modules/api/formdata_test.go b/pkg/modules/api/formdata_test.go index fbdab38..391c9d7 100644 --- a/pkg/modules/api/formdata_test.go +++ b/pkg/modules/api/formdata_test.go @@ -7,8 +7,6 @@ import ( "reflect" "testing" "time" - - "github.com/gotenberg/gotenberg/v8/pkg/gotenberg" ) func TestFormData_Validate(t *testing.T) { @@ -1786,110 +1784,56 @@ func TestFormData_Embeds(t *testing.T) { } } -func TestFormData_FacturX(t *testing.T) { +func TestFormData_FacturXXml(t *testing.T) { for _, tc := range []struct { - scenario string - form *FormData - mandatory bool - expect gotenberg.FacturX - expectErr bool + scenario string + form *FormData + expect string }{ { - scenario: "key does not exist, not mandatory", - form: &FormData{}, - mandatory: false, - expect: gotenberg.FacturX{}, + scenario: "no facturxXml file", + form: &FormData{}, + expect: "", }, { - scenario: "key does not exist, mandatory", - form: &FormData{}, - mandatory: true, - expectErr: true, - }, - { - scenario: "all fields provided", + scenario: "facturxXml file present", form: &FormData{ - values: map[string][]string{ - "facturx": {`{"conformanceLevel":"EXTENDED","documentType":"ORDER","documentFileName":"order.xml","version":"2.0"}`}, + filesByField: map[string][]string{ + FacturXXmlFormField: {"/tmp/abc/12345.xml"}, }, }, - expect: gotenberg.FacturX{ - ConformanceLevel: gotenberg.FacturXConformanceExtended, - DocumentType: gotenberg.FacturXDocumentTypeOrder, - DocumentFileName: "order.xml", - Version: "2.0", - }, - }, - { - scenario: "only conformance level, defaults applied", - form: &FormData{ - values: map[string][]string{ - "facturx": {`{"conformanceLevel":"EN 16931"}`}, - }, - }, - expect: gotenberg.FacturX{ - ConformanceLevel: gotenberg.FacturXConformanceEN16931, - DocumentType: gotenberg.FacturXDocumentTypeInvoice, - DocumentFileName: "factur-x.xml", - Version: "1.0", - }, - }, - { - scenario: "invalid JSON", - form: &FormData{ - values: map[string][]string{ - "facturx": {`{not json`}, - }, - }, - expectErr: true, - }, - { - scenario: "missing conformance level", - form: &FormData{ - values: map[string][]string{ - "facturx": {`{"documentType":"INVOICE"}`}, - }, - }, - expectErr: true, - }, - { - scenario: "unsupported conformance level", - form: &FormData{ - values: map[string][]string{ - "facturx": {`{"conformanceLevel":"FOO"}`}, - }, - }, - expectErr: true, - }, - { - scenario: "unsupported document type", - form: &FormData{ - values: map[string][]string{ - "facturx": {`{"conformanceLevel":"BASIC","documentType":"RECEIPT"}`}, - }, - }, - expectErr: true, + expect: "/tmp/abc/12345.xml", }, } { t.Run(tc.scenario, func(t *testing.T) { - var actual gotenberg.FacturX + var actual string - tc.form.FacturX(&actual, tc.mandatory) + tc.form.FacturXXml(&actual) - if tc.expectErr { - if tc.form.errors == nil { - t.Error("expected an error but got none") - } - return - } - - if tc.form.errors != nil { - t.Errorf("expected no error but got: %v", tc.form.errors) - } - - if !reflect.DeepEqual(actual, tc.expect) { - t.Errorf("expected %+v but got %+v", tc.expect, actual) + if actual != tc.expect { + t.Errorf("expected %q but got %q", tc.expect, actual) } }) } } + +// TestFormData_paths_excludesFacturXXml verifies that an uploaded facturxXml is +// never picked up as an input document by paths(). +func TestFormData_paths_excludesFacturXXml(t *testing.T) { + form := &FormData{ + files: map[string]string{ + "document.xml": "/tmp/abc/document.xml", + "factur-x.xml": "/tmp/abc/invoice.xml", + }, + filesByField: map[string][]string{ + FacturXXmlFormField: {"/tmp/abc/invoice.xml"}, + }, + } + + var paths []string + form.paths([]string{".xml"}, &paths) + + if len(paths) != 1 || paths[0] != "/tmp/abc/document.xml" { + t.Errorf("expected only the non-Factur-X .xml document, got %+v", paths) + } +} diff --git a/pkg/modules/chromium/routes.go b/pkg/modules/chromium/routes.go index bbe7650..1198e43 100644 --- a/pkg/modules/chromium/routes.go +++ b/pkg/modules/chromium/routes.go @@ -454,7 +454,7 @@ func convertUrlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { stampFile := pdfengines.FormDataPdfStampFile(form) rotateAngle, rotatePages := pdfengines.FormDataPdfRotate(form, false) embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form) - facturX := pdfengines.FormDataPdfFacturX(form, false) + facturX, facturxXmlPath := pdfengines.FormDataPdfFacturX(form) var url string err := form. @@ -478,7 +478,7 @@ func convertUrlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { return fmt.Errorf("validate stamp: %w", err) } - err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, embedsMetadata, facturX, watermark, stamp, rotateAngle, rotatePages) + err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, embedsMetadata, facturX, facturxXmlPath, watermark, stamp, rotateAngle, rotatePages) if err != nil { return fmt.Errorf("convert URL to PDF: %w", err) } @@ -543,7 +543,7 @@ func convertHtmlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { stampFile := pdfengines.FormDataPdfStampFile(form) rotateAngle, rotatePages := pdfengines.FormDataPdfRotate(form, false) embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form) - facturX := pdfengines.FormDataPdfFacturX(form, false) + facturX, facturxXmlPath := pdfengines.FormDataPdfFacturX(form) var inputPath string err := form. @@ -564,7 +564,7 @@ func convertHtmlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { url := fmt.Sprintf("file://%s", inputPath) options.AllowedFilePrefixes = []string{ctx.DirPath()} - err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, embedsMetadata, facturX, watermark, stamp, rotateAngle, rotatePages) + err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, embedsMetadata, facturX, facturxXmlPath, watermark, stamp, rotateAngle, rotatePages) if err != nil { return fmt.Errorf("convert HTML to PDF: %w", err) } @@ -626,7 +626,7 @@ func convertMarkdownRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { stampFile := pdfengines.FormDataPdfStampFile(form) rotateAngle, rotatePages := pdfengines.FormDataPdfRotate(form, false) embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form) - facturX := pdfengines.FormDataPdfFacturX(form, false) + facturX, facturxXmlPath := pdfengines.FormDataPdfFacturX(form) var ( inputPath string @@ -656,7 +656,7 @@ func convertMarkdownRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { } options.AllowedFilePrefixes = []string{ctx.DirPath()} - err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, embedsMetadata, facturX, watermark, stamp, rotateAngle, rotatePages) + err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, embedsMetadata, facturX, facturxXmlPath, watermark, stamp, rotateAngle, rotatePages) if err != nil { return fmt.Errorf("convert markdown to PDF: %w", err) } @@ -781,7 +781,7 @@ func markdownToHtml(ctx *api.Context, inputPath string, markdownPaths []string) return fmt.Sprintf("file://%s", inputPath), nil } -func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url string, options PdfOptions, mode gotenberg.SplitMode, pdfFormats gotenberg.PdfFormats, metadata map[string]any, userPassword, ownerPassword string, embedPaths []string, embedsMetadata map[string]map[string]string, facturX gotenberg.FacturX, watermark, stamp gotenberg.Stamp, rotateAngle int, rotatePages string) error { +func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url string, options PdfOptions, mode gotenberg.SplitMode, pdfFormats gotenberg.PdfFormats, metadata map[string]any, userPassword, ownerPassword string, embedPaths []string, embedsMetadata map[string]map[string]string, facturX gotenberg.FacturX, facturxXmlPath string, watermark, stamp gotenberg.Stamp, rotateAngle int, rotatePages string) error { outputPath := ctx.GeneratePath(".pdf") // See https://github.com/gotenberg/gotenberg/issues/1130. filename := ctx.OutputFilename(outputPath) @@ -848,6 +848,11 @@ func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url return err } + err = pdfengines.ValidateFacturXCompat(facturX, facturxXmlPath, pdfFormats) + if err != nil { + return err + } + outputPaths, err := pdfengines.SplitPdfStub(ctx, engine, mode, []string{outputPath}) if err != nil { return fmt.Errorf("split PDF: %w", err) @@ -868,6 +873,8 @@ func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url return fmt.Errorf("rotate PDFs: %w", err) } + pdfFormats = pdfengines.FacturXPdfFormats(ctx, engine, facturX, pdfFormats, true, nil) + convertOutputPaths, err := pdfengines.ConvertStub(ctx, engine, pdfFormats, outputPaths) if err != nil { return fmt.Errorf("convert PDF(s): %w", err) @@ -890,9 +897,9 @@ func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url return fmt.Errorf("set embeds metadata: %w", err) } - err = pdfengines.InjectFacturXXMPStub(ctx, engine, facturX, convertOutputPaths) + err = pdfengines.ApplyFacturXStub(ctx, engine, facturX, facturxXmlPath, convertOutputPaths) if err != nil { - return fmt.Errorf("inject Factur-X XMP: %w", err) + return fmt.Errorf("apply Factur-X: %w", err) } err = pdfengines.EncryptPdfStub(ctx, engine, userPassword, ownerPassword, convertOutputPaths) diff --git a/pkg/modules/exiftool/exiftool.go b/pkg/modules/exiftool/exiftool.go index 5e0755e..a5bab08 100644 --- a/pkg/modules/exiftool/exiftool.go +++ b/pkg/modules/exiftool/exiftool.go @@ -528,6 +528,11 @@ func (engine *ExifTool) InjectFacturXXMP(ctx context.Context, logger *slog.Logge return fmt.Errorf("inject Factur-X XMP with ExifTool: %w", gotenberg.ErrPdfEngineMethodNotSupported) } +// ReadPdfAConformance is not available in this implementation. +func (engine *ExifTool) ReadPdfAConformance(ctx context.Context, logger *slog.Logger, inputPath string) (string, string, error) { + return "", "", fmt.Errorf("read PDF/A conformance with ExifTool: %w", gotenberg.ErrPdfEngineMethodNotSupported) +} + // Interface guards. var ( _ gotenberg.Module = (*ExifTool)(nil) diff --git a/pkg/modules/libreoffice/pdfengine/pdfengine.go b/pkg/modules/libreoffice/pdfengine/pdfengine.go index d4a7a69..5d26aea 100644 --- a/pkg/modules/libreoffice/pdfengine/pdfengine.go +++ b/pkg/modules/libreoffice/pdfengine/pdfengine.go @@ -140,6 +140,11 @@ func (engine *LibreOfficePdfEngine) InjectFacturXXMP(ctx context.Context, logger return fmt.Errorf("inject Factur-X XMP with LibreOffice: %w", gotenberg.ErrPdfEngineMethodNotSupported) } +// ReadPdfAConformance is not available in this implementation. +func (engine *LibreOfficePdfEngine) ReadPdfAConformance(ctx context.Context, logger *slog.Logger, inputPath string) (string, string, error) { + return "", "", fmt.Errorf("read PDF/A conformance with LibreOffice: %w", gotenberg.ErrPdfEngineMethodNotSupported) +} + // Interface guards. var ( _ gotenberg.Module = (*LibreOfficePdfEngine)(nil) diff --git a/pkg/modules/libreoffice/routes.go b/pkg/modules/libreoffice/routes.go index 54b596d..4edd12f 100644 --- a/pkg/modules/libreoffice/routes.go +++ b/pkg/modules/libreoffice/routes.go @@ -38,7 +38,7 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap stampFile := pdfengines.FormDataPdfStampFile(form) angle, rotatePages := pdfengines.FormDataPdfRotate(form, false) embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form) - facturX := pdfengines.FormDataPdfFacturX(form, false) + facturX, facturxXmlPath := pdfengines.FormDataPdfFacturX(form) zeroValuedSplitMode := gotenberg.SplitMode{} @@ -319,8 +319,17 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap return err } + err = pdfengines.ValidateFacturXCompat(facturX, facturxXmlPath, pdfFormats) + if err != nil { + return err + } + + // Factur-X requires PDF/A-3; default to PDF/A-3b when no format was + // requested. The conversion runs as a post-processing step below. + pdfFormats = pdfengines.FacturXPdfFormats(ctx, engine, facturX, pdfFormats, true, nil) + hasPostProcessing := watermark.Source != "" || stamp.Source != "" || angle != 0 || - len(embedPaths) > 0 || len(metadata) > 0 || flatten + len(embedPaths) > 0 || len(metadata) > 0 || flatten || facturX.ConformanceLevel != "" outputPaths := make([]string, len(inputPaths)) for i, inputPath := range inputPaths { @@ -504,9 +513,9 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap return fmt.Errorf("set embeds metadata: %w", err) } - err = pdfengines.InjectFacturXXMPStub(ctx, engine, facturX, outputPaths) + err = pdfengines.ApplyFacturXStub(ctx, engine, facturX, facturxXmlPath, outputPaths) if err != nil { - return fmt.Errorf("inject Factur-X XMP: %w", err) + return fmt.Errorf("apply Factur-X: %w", err) } err = pdfengines.EncryptPdfStub(ctx, engine, userPassword, ownerPassword, outputPaths) diff --git a/pkg/modules/pdfcpu/pdfcpu.go b/pkg/modules/pdfcpu/pdfcpu.go index 8f1c8b3..50d696a 100644 --- a/pkg/modules/pdfcpu/pdfcpu.go +++ b/pkg/modules/pdfcpu/pdfcpu.go @@ -457,6 +457,11 @@ func (engine *PdfCpu) InjectFacturXXMP(ctx context.Context, logger *slog.Logger, return fmt.Errorf("inject Factur-X XMP with pdfcpu: %w", gotenberg.ErrPdfEngineMethodNotSupported) } +// ReadPdfAConformance is not available in this implementation. +func (engine *PdfCpu) ReadPdfAConformance(ctx context.Context, logger *slog.Logger, inputPath string) (string, string, error) { + return "", "", fmt.Errorf("read PDF/A conformance with pdfcpu: %w", gotenberg.ErrPdfEngineMethodNotSupported) +} + // EmbedFiles embeds files into a PDF. All files are embedded as file attachments // without modifying the main PDF content. func (engine *PdfCpu) EmbedFiles(ctx context.Context, logger *slog.Logger, filePaths []string, inputPath string) error { diff --git a/pkg/modules/pdfengines/multi.go b/pkg/modules/pdfengines/multi.go index 41ed58c..d0e34a1 100644 --- a/pkg/modules/pdfengines/multi.go +++ b/pkg/modules/pdfengines/multi.go @@ -334,6 +334,23 @@ func (multi *multiPdfEngines) InjectFacturXXMP(ctx context.Context, logger *slog ) } +// ReadPdfAConformance reads the PDF/A part and conformance using the first +// available engine that supports it. +func (multi *multiPdfEngines) ReadPdfAConformance(ctx context.Context, logger *slog.Logger, inputPath string) (string, string, error) { + type pdfaConf struct { + part string + conformance string + } + result, err := runWithFallback(ctx, "pdfengines.ReadPdfAConformance", multi.facturXEngines, + func(ctx context.Context, engine gotenberg.PdfEngine) (pdfaConf, error) { + part, conformance, err := engine.ReadPdfAConformance(ctx, logger, inputPath) + return pdfaConf{part: part, conformance: conformance}, err + }, + func(err error) error { return fmt.Errorf("read PDF/A conformance with multi PDF engines: %w", err) }, + ) + return result.part, result.conformance, err +} + // Interface guards. var ( _ gotenberg.PdfEngine = (*multiPdfEngines)(nil) diff --git a/pkg/modules/pdfengines/routes.go b/pkg/modules/pdfengines/routes.go index 1a49f53..14f5f4b 100644 --- a/pkg/modules/pdfengines/routes.go +++ b/pkg/modules/pdfengines/routes.go @@ -467,12 +467,187 @@ func EmbedFilesMetadataStub(ctx *api.Context, engine gotenberg.PdfEngine, metada return nil } -// FormDataPdfFacturX extracts Factur-X parameters from form data. -// The "facturx" field is a JSON string with the four fx properties. -func FormDataPdfFacturX(form *api.FormData, mandatory bool) gotenberg.FacturX { - var facturX gotenberg.FacturX - form.FacturX(&facturX, mandatory) - return facturX +// FormDataPdfFacturX extracts the Factur-X parameters and the invoice XML path +// from form data. Factur-X is requested when both facturxConformanceLevel and +// facturxXml are provided. The embedded XML always takes the canonical +// [gotenberg.FacturXDocumentFileName] name. +func FormDataPdfFacturX(form *api.FormData) (gotenberg.FacturX, string) { + var ( + facturxXmlPath string + conformanceLevel string + documentType string + version string + ) + + form. + FacturXXml(&facturxXmlPath). + Custom("facturxConformanceLevel", func(value string) error { + conformanceLevel = value + switch value { + case "", + gotenberg.FacturXConformanceMinimum, + gotenberg.FacturXConformanceBasicWL, + gotenberg.FacturXConformanceBasic, + gotenberg.FacturXConformanceEN16931, + gotenberg.FacturXConformanceExtended, + gotenberg.FacturXConformanceXRechnung: + return nil + default: + return fmt.Errorf("unsupported conformance level '%s'", value) + } + }). + Custom("facturxDocumentType", func(value string) error { + if value == "" { + documentType = gotenberg.FacturXDocumentTypeInvoice + return nil + } + documentType = value + switch value { + case gotenberg.FacturXDocumentTypeInvoice, + gotenberg.FacturXDocumentTypeOrder, + gotenberg.FacturXDocumentTypeOrderResponse, + gotenberg.FacturXDocumentTypeOrderChange: + return nil + default: + return fmt.Errorf("unsupported document type '%s'", value) + } + }). + String("facturxVersion", &version, "1.0") + + return gotenberg.FacturX{ + ConformanceLevel: conformanceLevel, + DocumentType: documentType, + DocumentFileName: gotenberg.FacturXDocumentFileName, + Version: version, + }, facturxXmlPath +} + +// isPdfA3 reports whether the format is a PDF/A-3 variant, the only family that +// allows the embedded files Factur-X requires. +func isPdfA3(pdfA string) bool { + return pdfA == gotenberg.PdfA3a || pdfA == gotenberg.PdfA3b || pdfA == gotenberg.PdfA3u +} + +// ValidateFacturXCompat enforces the Factur-X pairing and PDF/A-3 rules. It +// returns a 400 error when the request is half-specified, or when an explicit +// PDF/A format is not a PDF/A-3 variant. +func ValidateFacturXCompat(facturX gotenberg.FacturX, facturxXmlPath string, pdfFormats gotenberg.PdfFormats) error { + if facturX.ConformanceLevel == "" && facturxXmlPath == "" { + return nil + } + + if facturX.ConformanceLevel == "" { + return api.WrapError( + errors.New("facturxConformanceLevel is required when facturxXml is provided"), + api.NewSentinelHttpError(http.StatusBadRequest, "Invalid form data: 'facturxConformanceLevel' is required when 'facturxXml' is provided"), + ) + } + + if facturxXmlPath == "" { + return api.WrapError( + errors.New("facturxXml is required when facturxConformanceLevel is set"), + api.NewSentinelHttpError(http.StatusBadRequest, "Invalid form data: 'facturxXml' file is required when 'facturxConformanceLevel' is set"), + ) + } + + if pdfFormats.PdfA != "" && !isPdfA3(pdfFormats.PdfA) { + return api.WrapError( + fmt.Errorf("Factur-X requires PDF/A-3, got '%s'", pdfFormats.PdfA), + api.NewSentinelHttpError(http.StatusBadRequest, fmt.Sprintf("Invalid form data: Factur-X requires a PDF/A-3 variant (PDF/A-3a, PDF/A-3b, or PDF/A-3u), got '%s'", pdfFormats.PdfA)), + ) + } + + return nil +} + +// FacturXPdfFormats returns the PDF/A formats to convert to so the output meets +// Factur-X's PDF/A-3 requirement. It returns pdfFormats unchanged when Factur-X +// is not requested or the caller already asked for a PDF/A-3 variant. Otherwise +// it defaults to PDF/A-3b, except for pre-existing PDFs (sourceDoc false) that +// already carry PDF/A-3, which are left untouched. +func FacturXPdfFormats(ctx *api.Context, engine gotenberg.PdfEngine, facturX gotenberg.FacturX, pdfFormats gotenberg.PdfFormats, sourceDoc bool, inputPaths []string) gotenberg.PdfFormats { + if facturX.ConformanceLevel == "" || isPdfA3(pdfFormats.PdfA) { + return pdfFormats + } + + if sourceDoc { + pdfFormats.PdfA = gotenberg.PdfA3b + return pdfFormats + } + + // Pre-existing PDFs: keep an already-PDF/A-3 input as-is, otherwise default + // to PDF/A-3b. + for _, inputPath := range inputPaths { + part, _, err := engine.ReadPdfAConformance(ctx, ctx.Log(), inputPath) + if err != nil { + ctx.Log().DebugContext(ctx, fmt.Sprintf("read PDF/A conformance of '%s', assuming not PDF/A-3: %s", inputPath, err)) + part = "" + } + if part != "3" { + pdfFormats.PdfA = gotenberg.PdfA3b + return pdfFormats + } + } + + return pdfFormats +} + +// ApplyFacturXStub turns each input PDF into a Factur-X document: it embeds the +// CII invoice XML under the canonical name with AFRelationship "Alternative", +// then injects the fx XMP metadata. The inputs must already be PDF/A-3 (see +// [FacturXPdfFormats]). It is a no-op when Factur-X is not requested. +func ApplyFacturXStub(ctx *api.Context, engine gotenberg.PdfEngine, facturX gotenberg.FacturX, facturxXmlPath string, inputPaths []string) error { + if facturX.ConformanceLevel == "" { + return nil + } + + err := embedFacturXXml(ctx, engine, facturxXmlPath, inputPaths) + if err != nil { + return err + } + + metadata := map[string]map[string]string{ + facturX.DocumentFileName: { + "mimeType": "text/xml", + "relationship": "Alternative", + }, + } + err = EmbedFilesMetadataStub(ctx, engine, metadata, inputPaths) + if err != nil { + return fmt.Errorf("set Factur-X embed metadata: %w", err) + } + + err = InjectFacturXXMPStub(ctx, engine, facturX, inputPaths) + if err != nil { + return err + } + + return nil +} + +// embedFacturXXml embeds the Factur-X invoice XML into each PDF under the +// canonical [gotenberg.FacturXDocumentFileName] name, regardless of the +// uploaded file name. +func embedFacturXXml(ctx *api.Context, engine gotenberg.PdfEngine, facturxXmlPath string, inputPaths []string) error { + embedDir, err := ctx.CreateSubDirectory(uuid.New().String()) + if err != nil { + return fmt.Errorf("create Factur-X embed subdirectory: %w", err) + } + + canonicalPath := fmt.Sprintf("%s/%s", embedDir, gotenberg.FacturXDocumentFileName) + err = os.Symlink(facturxXmlPath, canonicalPath) + if err != nil { + return fmt.Errorf("symlink Factur-X invoice XML: %w", err) + } + + for _, inputPath := range inputPaths { + err = engine.EmbedFiles(ctx, ctx.Log(), []string{canonicalPath}, inputPath) + if err != nil { + return fmt.Errorf("embed Factur-X invoice XML into PDF '%s': %w", inputPath, err) + } + } + + return nil } // InjectFacturXXMPStub injects Factur-X XMP metadata into PDF files. If the @@ -732,7 +907,7 @@ func mergeRoute(engine gotenberg.PdfEngine) api.Route { stampFile := FormDataPdfStampFile(form) angle, rotatePages := FormDataPdfRotate(form, false) embedsMetadata := FormDataPdfEmbedsMetadata(form) - facturX := FormDataPdfFacturX(form, false) + facturX, facturxXmlPath := FormDataPdfFacturX(form) var inputPaths []string var flatten bool @@ -760,6 +935,11 @@ func mergeRoute(engine gotenberg.PdfEngine) api.Route { return err } + err = ValidateFacturXCompat(facturX, facturxXmlPath, pdfFormats) + if err != nil { + return err + } + outputPath := ctx.GeneratePath(".pdf") err = engine.Merge(ctx, ctx.Log(), inputPaths, outputPath) if err != nil { @@ -790,6 +970,8 @@ func mergeRoute(engine gotenberg.PdfEngine) api.Route { } } + pdfFormats = FacturXPdfFormats(ctx, engine, facturX, pdfFormats, false, outputPaths) + outputPaths, err = ConvertStub(ctx, engine, pdfFormats, outputPaths) if err != nil { return fmt.Errorf("convert PDF: %w", err) @@ -856,9 +1038,9 @@ func mergeRoute(engine gotenberg.PdfEngine) api.Route { return fmt.Errorf("set embeds metadata: %w", err) } - err = InjectFacturXXMPStub(ctx, engine, facturX, outputPaths) + err = ApplyFacturXStub(ctx, engine, facturX, facturxXmlPath, outputPaths) if err != nil { - return fmt.Errorf("inject Factur-X XMP: %w", err) + return fmt.Errorf("apply Factur-X: %w", err) } err = EncryptPdfStub(ctx, engine, userPassword, ownerPassword, outputPaths) @@ -897,7 +1079,7 @@ func splitRoute(engine gotenberg.PdfEngine) api.Route { stampFile := FormDataPdfStampFile(form) angle, rotatePages := FormDataPdfRotate(form, false) embedsMetadata := FormDataPdfEmbedsMetadata(form) - facturX := FormDataPdfFacturX(form, false) + facturX, facturxXmlPath := FormDataPdfFacturX(form) var inputPaths []string var flatten bool @@ -923,6 +1105,11 @@ func splitRoute(engine gotenberg.PdfEngine) api.Route { return err } + err = ValidateFacturXCompat(facturX, facturxXmlPath, pdfFormats) + if err != nil { + return err + } + outputPaths, err := SplitPdfStub(ctx, engine, mode, inputPaths) if err != nil { return fmt.Errorf("split PDFs: %w", err) @@ -950,6 +1137,8 @@ func splitRoute(engine gotenberg.PdfEngine) api.Route { } } + pdfFormats = FacturXPdfFormats(ctx, engine, facturX, pdfFormats, false, outputPaths) + convertOutputPaths, err := ConvertStub(ctx, engine, pdfFormats, outputPaths) if err != nil { return fmt.Errorf("convert PDFs: %w", err) @@ -972,9 +1161,9 @@ func splitRoute(engine gotenberg.PdfEngine) api.Route { return fmt.Errorf("set embeds metadata: %w", err) } - err = InjectFacturXXMPStub(ctx, engine, facturX, convertOutputPaths) + err = ApplyFacturXStub(ctx, engine, facturX, facturxXmlPath, convertOutputPaths) if err != nil { - return fmt.Errorf("inject Factur-X XMP: %w", err) + return fmt.Errorf("apply Factur-X: %w", err) } err = EncryptPdfStub(ctx, engine, userPassword, ownerPassword, convertOutputPaths) @@ -1302,7 +1491,7 @@ func embedRoute(engine gotenberg.PdfEngine) api.Route { form := ctx.FormData() embedPaths := FormDataPdfEmbeds(form) embedsMetadata := FormDataPdfEmbedsMetadata(form) - facturX := FormDataPdfFacturX(form, false) + facturX, facturxXmlPath := FormDataPdfFacturX(form) var inputPaths []string err := form. @@ -1311,22 +1500,36 @@ func embedRoute(engine gotenberg.PdfEngine) api.Route { if err != nil { return fmt.Errorf("validate form data: %w", err) } - err = EmbedFilesStub(ctx, engine, embedPaths, inputPaths) + + err = ValidateFacturXCompat(facturX, facturxXmlPath, gotenberg.PdfFormats{}) + if err != nil { + return err + } + + // Factur-X requires PDF/A-3. Convert when needed; a no-op otherwise, + // so a plain embed request keeps its inputs untouched. + pdfFormats := FacturXPdfFormats(ctx, engine, facturX, gotenberg.PdfFormats{}, false, inputPaths) + outputPaths, err := ConvertStub(ctx, engine, pdfFormats, inputPaths) + if err != nil { + return fmt.Errorf("convert PDFs: %w", err) + } + + err = EmbedFilesStub(ctx, engine, embedPaths, outputPaths) if err != nil { return fmt.Errorf("embed files into PDFs: %w", err) } - err = EmbedFilesMetadataStub(ctx, engine, embedsMetadata, inputPaths) + err = EmbedFilesMetadataStub(ctx, engine, embedsMetadata, outputPaths) if err != nil { return fmt.Errorf("set embeds metadata: %w", err) } - err = InjectFacturXXMPStub(ctx, engine, facturX, inputPaths) + err = ApplyFacturXStub(ctx, engine, facturX, facturxXmlPath, outputPaths) if err != nil { - return fmt.Errorf("inject Factur-X XMP: %w", err) + return fmt.Errorf("apply Factur-X: %w", err) } - err = ctx.AddOutputPaths(inputPaths...) + err = ctx.AddOutputPaths(outputPaths...) if err != nil { return fmt.Errorf("add output paths: %w", err) } @@ -1457,8 +1660,9 @@ func rotateRoute(engine gotenberg.PdfEngine) api.Route { } } -// facturXRoute returns an [api.Route] which injects Factur-X/ZUGFeRD XMP -// metadata into PDF/A-3 files. +// facturXRoute returns an [api.Route] which turns existing PDFs into Factur-X +// documents: it ensures PDF/A-3, embeds the CII invoice XML, and injects the fx +// XMP metadata. func facturXRoute(engine gotenberg.PdfEngine) api.Route { return api.Route{ Method: http.MethodPost, @@ -1468,7 +1672,8 @@ func facturXRoute(engine gotenberg.PdfEngine) api.Route { ctx := c.Get("context").(*api.Context) form := ctx.FormData() - facturX := FormDataPdfFacturX(form, true) + pdfFormats := FormDataPdfFormats(form) + facturX, facturxXmlPath := FormDataPdfFacturX(form) var inputPaths []string err := form. @@ -1478,12 +1683,32 @@ func facturXRoute(engine gotenberg.PdfEngine) api.Route { return fmt.Errorf("validate form data: %w", err) } - err = InjectFacturXXMPStub(ctx, engine, facturX, inputPaths) - if err != nil { - return fmt.Errorf("inject Factur-X XMP into PDFs: %w", err) + // Factur-X is the whole point of this route, so both fields are + // mandatory here. + if facturX.ConformanceLevel == "" || facturxXmlPath == "" { + return api.WrapError( + errors.New("facturxConformanceLevel and facturxXml are required"), + api.NewSentinelHttpError(http.StatusBadRequest, "Invalid form data: 'facturxConformanceLevel' and 'facturxXml' are both required"), + ) } - err = ctx.AddOutputPaths(inputPaths...) + err = ValidateFacturXCompat(facturX, facturxXmlPath, pdfFormats) + if err != nil { + return err + } + + pdfFormats = FacturXPdfFormats(ctx, engine, facturX, pdfFormats, false, inputPaths) + outputPaths, err := ConvertStub(ctx, engine, pdfFormats, inputPaths) + if err != nil { + return fmt.Errorf("convert PDFs: %w", err) + } + + err = ApplyFacturXStub(ctx, engine, facturX, facturxXmlPath, outputPaths) + if err != nil { + return fmt.Errorf("apply Factur-X: %w", err) + } + + err = ctx.AddOutputPaths(outputPaths...) if err != nil { return fmt.Errorf("add output paths: %w", err) } diff --git a/pkg/modules/pdftk/pdftk.go b/pkg/modules/pdftk/pdftk.go index f6a7421..dcc5767 100644 --- a/pkg/modules/pdftk/pdftk.go +++ b/pkg/modules/pdftk/pdftk.go @@ -505,6 +505,11 @@ func (engine *PdfTk) InjectFacturXXMP(ctx context.Context, logger *slog.Logger, return fmt.Errorf("inject Factur-X XMP with PDFtk: %w", gotenberg.ErrPdfEngineMethodNotSupported) } +// ReadPdfAConformance is not available in this implementation. +func (engine *PdfTk) ReadPdfAConformance(ctx context.Context, logger *slog.Logger, inputPath string) (string, string, error) { + return "", "", fmt.Errorf("read PDF/A conformance with PDFtk: %w", gotenberg.ErrPdfEngineMethodNotSupported) +} + // Interface guards. var ( _ gotenberg.Module = (*PdfTk)(nil) diff --git a/pkg/modules/qpdf/facturx_test.go b/pkg/modules/qpdf/facturx_test.go index 031921a..f7a6623 100644 --- a/pkg/modules/qpdf/facturx_test.go +++ b/pkg/modules/qpdf/facturx_test.go @@ -247,3 +247,53 @@ func assertContains(t *testing.T, haystack, needle string) { t.Errorf("expected output to contain %q", needle) } } + +func TestParsePdfAId(t *testing.T) { + for _, tc := range []struct { + scenario string + xmp string + expectPart string + expectConform string + }{ + { + scenario: "element form", + xmp: `3B`, + expectPart: "3", + expectConform: "B", + }, + { + scenario: "attribute form", + xmp: ``, + expectPart: "3", + expectConform: "U", + }, + { + scenario: "part 2", + xmp: `2B`, + expectPart: "2", + expectConform: "B", + }, + { + scenario: "no pdfa identification", + xmp: `foo`, + expectPart: "", + expectConform: "", + }, + { + scenario: "empty packet", + xmp: "", + expectPart: "", + expectConform: "", + }, + } { + t.Run(tc.scenario, func(t *testing.T) { + part, conformance := parsePdfAId(tc.xmp) + if part != tc.expectPart { + t.Errorf("expected part %q but got %q", tc.expectPart, part) + } + if conformance != tc.expectConform { + t.Errorf("expected conformance %q but got %q", tc.expectConform, conformance) + } + }) + } +} diff --git a/pkg/modules/qpdf/qpdf.go b/pkg/modules/qpdf/qpdf.go index 3b000e4..753dac3 100644 --- a/pkg/modules/qpdf/qpdf.go +++ b/pkg/modules/qpdf/qpdf.go @@ -12,6 +12,7 @@ import ( "os" "os/exec" "path/filepath" + "regexp" "strings" "syscall" @@ -729,6 +730,68 @@ func (engine *QPdf) InjectFacturXXMP(ctx context.Context, logger *slog.Logger, f return nil } +// ReadPdfAConformance reads the PDF/A part and conformance from the +// document-level XMP packet (pdfaid:part and pdfaid:conformance) using QPDF's +// JSON output. It returns empty strings when the document carries no XMP +// metadata stream or no PDF/A identification. +func (engine *QPdf) ReadPdfAConformance(ctx context.Context, logger *slog.Logger, inputPath string) (string, string, error) { + ctx, span := gotenberg.Tracer().Start(ctx, "qpdf.ReadPdfAConformance", + trace.WithSpanKind(trace.SpanKindClient), + trace.WithAttributes(semconv.ServerAddress(engine.binPath)), + ) + defer span.End() + + logger.DebugContext(ctx, fmt.Sprintf("reading PDF/A conformance from %s with QPDF", inputPath)) + + args := append([]string{inputPath}, engine.globalArgs...) + args = append(args, "--json-output", "--json-stream-data=inline") + + output, err := engine.execCaptureOutput(ctx, args...) + if err != nil { + err = fmt.Errorf("get PDF JSON with QPDF: %w", err) + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) + return "", "", err + } + + objects, err := parsePdfObjects(output) + if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) + return "", "", err + } + + _, _, xmp, err := findMetadataStream(objects) + if err != nil { + // No XMP metadata stream means no PDF/A identification. + logger.DebugContext(ctx, fmt.Sprintf("no XMP metadata stream in %s: %s", inputPath, err)) + span.SetStatus(codes.Ok, "") + return "", "", nil + } + + part, conformance := parsePdfAId(xmp) + span.SetStatus(codes.Ok, "") + return part, conformance, nil +} + +var ( + pdfaIdPartRe = regexp.MustCompile(`pdfaid:part[\s>="']*([0-9]+)`) + pdfaIdConformanceRe = regexp.MustCompile(`pdfaid:conformance[\s>="']*([A-Za-z]+)`) +) + +// parsePdfAId extracts the PDF/A part and conformance from an XMP packet. It +// handles both the element (3) and attribute +// (pdfaid:part="3") serializations. +func parsePdfAId(xmp string) (part string, conformance string) { + if m := pdfaIdPartRe.FindStringSubmatch(xmp); m != nil { + part = m[1] + } + if m := pdfaIdConformanceRe.FindStringSubmatch(xmp); m != nil { + conformance = m[1] + } + return part, conformance +} + // validateFacturX checks the Factur-X fields against the supported values. func validateFacturX(facturX gotenberg.FacturX) error { switch facturX.ConformanceLevel { @@ -930,7 +993,7 @@ func facturXSchemaLi() string { DocumentType Text external - INVOICE + The type of the embedded Factur-X document Version diff --git a/test/integration/features/libreoffice_convert.feature b/test/integration/features/libreoffice_convert.feature index 9411275..61e7562 100644 --- a/test/integration/features/libreoffice_convert.feature +++ b/test/integration/features/libreoffice_convert.feature @@ -714,27 +714,29 @@ Feature: /forms/libreoffice/convert Then the response PDF(s) should have the "embed_1.xml" file embedded Then the response PDF(s) should have the "embed_2.xml" file embedded + # A Factur-X request supplies the invoice XML via facturxXml plus the + # facturxConformanceLevel; Gotenberg owns the PDF/A-3, the Alternative + # relationship, and the canonical factur-x.xml name. No explicit pdfa here + # exercises the automatic PDF/A-3b default for a source document. @convert - @embed @factur-x Scenario: POST /forms/libreoffice/convert (Factur-X / ZUGFeRD) Given I have a default Gotenberg container When I make a "POST" request to Gotenberg at the "/forms/libreoffice/convert" endpoint with the following form data and header(s): - | files | testdata/page_1.docx | file | - | pdfa | PDF/A-3b | field | - | embeds | testdata/embed_1.xml | file | - | embedsMetadata | {"embed_1.xml":{"mimeType":"text/xml","relationship":"Alternative"}} | field | - | facturx | {"conformanceLevel":"EN 16931"} | field | - | Gotenberg-Output-Filename | foo | header | + | files | testdata/page_1.docx | file | + | facturxXml | testdata/embed_1.xml | file | + | facturxConformanceLevel | EN 16931 | field | + | Gotenberg-Output-Filename | foo | header | Then the response status code should be 200 Then the response header "Content-Type" should be "application/pdf" Then there should be 1 PDF(s) in the response Then the response PDF(s) should be valid "PDF/A-3b" with a tolerance of 0 failed rule(s) - Then the response PDF(s) should have the "embed_1.xml" file embedded with relationship "Alternative" + Then the response PDF(s) should have the "factur-x.xml" file embedded with relationship "Alternative" Then the response PDF(s) should declare Factur-X XMP with conformance level "EN 16931" + # The base PDF is already PDF/A-3b: detection keeps it as-is, no reconversion. @factur-x - Scenario: POST /forms/pdfengines/factur-x (Standalone) + Scenario: POST /forms/pdfengines/factur-x (Standalone, already PDF/A-3) Given I have a default Gotenberg container When I make a "POST" request to Gotenberg at the "/forms/libreoffice/convert" endpoint with the following form data and header(s): | files | testdata/page_1.docx | file | @@ -742,15 +744,45 @@ Feature: /forms/libreoffice/convert | Gotenberg-Output-Filename | base | header | Then the response status code should be 200 When I make a "POST" request to Gotenberg at the "/forms/pdfengines/factur-x" endpoint with the following form data and header(s): - | files | teststore/base.pdf | file | - | facturx | {"conformanceLevel":"BASIC","documentType":"ORDER"} | field | - | Gotenberg-Output-Filename | foo | header | + | files | teststore/base.pdf | file | + | facturxXml | testdata/embed_1.xml | file | + | facturxConformanceLevel | BASIC | field | + | facturxDocumentType | ORDER | field | + | Gotenberg-Output-Filename | foo | header | Then the response status code should be 200 Then the response header "Content-Type" should be "application/pdf" Then there should be 1 PDF(s) in the response Then the response PDF(s) should be valid "PDF/A-3b" with a tolerance of 0 failed rule(s) + Then the response PDF(s) should have the "factur-x.xml" file embedded with relationship "Alternative" Then the response PDF(s) should declare Factur-X XMP with conformance level "BASIC" + # The base PDF is not PDF/A: detection converts it to PDF/A-3b automatically. + @factur-x + Scenario: POST /forms/pdfengines/factur-x (Standalone, converts non-PDF/A input) + Given I have a default Gotenberg container + When I make a "POST" request to Gotenberg at the "/forms/libreoffice/convert" endpoint with the following form data and header(s): + | files | testdata/page_1.docx | file | + | Gotenberg-Output-Filename | plain | header | + Then the response status code should be 200 + When I make a "POST" request to Gotenberg at the "/forms/pdfengines/factur-x" endpoint with the following form data and header(s): + | files | teststore/plain.pdf | file | + | facturxXml | testdata/embed_1.xml | file | + | facturxConformanceLevel | EN 16931 | field | + | Gotenberg-Output-Filename | foo | header | + Then the response status code should be 200 + Then the response PDF(s) should be valid "PDF/A-3b" with a tolerance of 0 failed rule(s) + Then the response PDF(s) should have the "factur-x.xml" file embedded with relationship "Alternative" + Then the response PDF(s) should declare Factur-X XMP with conformance level "EN 16931" + + # facturxConformanceLevel without facturxXml is a half-specified request. + @factur-x + Scenario: POST /forms/pdfengines/factur-x (Bad Request) + Given I have a default Gotenberg container + When I make a "POST" request to Gotenberg at the "/forms/pdfengines/factur-x" endpoint with the following form data and header(s): + | files | testdata/page_1.pdf | file | + | facturxConformanceLevel | EN 16931 | field | + Then the response status code should be 400 + # FIXME: once decrypt is done, add encrypt and check after the content of the PDF. @convert @metadata