diff --git a/pkg/modules/chromium/chromium.go b/pkg/modules/chromium/chromium.go index 7aaeb80..999b04e 100644 --- a/pkg/modules/chromium/chromium.go +++ b/pkg/modules/chromium/chromium.go @@ -300,7 +300,8 @@ type PdfOptions struct { PreferCssPageSize bool // GenerateDocumentOutline defines whether the document outline should be - // embedded into the PDF. + // embedded into the PDF. Chromium derives the outline from the tagged-PDF + // structure tree, so enabling this implies GenerateTaggedPdf. GenerateDocumentOutline bool // GenerateTaggedPdf defines whether to generate tagged (accessible) diff --git a/pkg/modules/chromium/tasks.go b/pkg/modules/chromium/tasks.go index 59f883b..0f37fee 100644 --- a/pkg/modules/chromium/tasks.go +++ b/pkg/modules/chromium/tasks.go @@ -21,8 +21,29 @@ import ( "github.com/gotenberg/gotenberg/v8/pkg/gotenberg" ) +// resolvePdfOptions applies the cross-option constraints Chromium imposes +// before printing. +// +// Chromium derives the PDF document outline from the tagged-PDF structure +// tree, so [PdfOptions.GenerateDocumentOutline] produces no outline unless +// tagged PDF is also generated. Requesting an outline therefore implies +// tagged PDF. See https://github.com/gotenberg/gotenberg/issues/1579. +func resolvePdfOptions(options PdfOptions) PdfOptions { + if options.GenerateDocumentOutline { + options.GenerateTaggedPdf = true + } + + return options +} + func printToPdfActionFunc(reqCtx context.Context, logger *slog.Logger, outputPath string, options PdfOptions) chromedp.ActionFunc { return func(ctx context.Context) error { + if options.GenerateDocumentOutline && !options.GenerateTaggedPdf { + logger.DebugContext(ctx, "document outline requested, enabling tagged PDF because Chromium derives the outline from the structure tree") + } + + options = resolvePdfOptions(options) + // ctx is the chromedp task context, derived from context.Background(), // so the span is started under reqCtx to keep print_to_pdf in the // conversion trace instead of orphaning it into a new one. diff --git a/pkg/modules/chromium/tasks_test.go b/pkg/modules/chromium/tasks_test.go new file mode 100644 index 0000000..383ac2f --- /dev/null +++ b/pkg/modules/chromium/tasks_test.go @@ -0,0 +1,52 @@ +package chromium + +import "testing" + +func TestResolvePdfOptions(t *testing.T) { + for _, tc := range []struct { + scenario string + generateOutline bool + generateTaggedIn bool + generateTaggedWant bool + }{ + { + scenario: "outline requested forces tagged PDF", + generateOutline: true, + generateTaggedIn: false, + generateTaggedWant: true, + }, + { + scenario: "outline requested keeps tagged PDF on", + generateOutline: true, + generateTaggedIn: true, + generateTaggedWant: true, + }, + { + scenario: "no outline leaves tagged PDF off", + generateOutline: false, + generateTaggedIn: false, + generateTaggedWant: false, + }, + { + scenario: "no outline keeps tagged PDF on", + generateOutline: false, + generateTaggedIn: true, + generateTaggedWant: true, + }, + } { + t.Run(tc.scenario, func(t *testing.T) { + options := DefaultPdfOptions() + options.GenerateDocumentOutline = tc.generateOutline + options.GenerateTaggedPdf = tc.generateTaggedIn + + got := resolvePdfOptions(options) + + if got.GenerateTaggedPdf != tc.generateTaggedWant { + t.Errorf("expected GenerateTaggedPdf=%t, got %t", tc.generateTaggedWant, got.GenerateTaggedPdf) + } + if got.GenerateDocumentOutline != tc.generateOutline { + t.Errorf("expected GenerateDocumentOutline=%t, got %t", tc.generateOutline, got.GenerateDocumentOutline) + } + }) + } +} diff --git a/test/integration/features/chromium_convert_html.feature b/test/integration/features/chromium_convert_html.feature index 97ce549..786cc93 100644 --- a/test/integration/features/chromium_convert_html.feature +++ b/test/integration/features/chromium_convert_html.feature @@ -548,6 +548,25 @@ Feature: /forms/chromium/convert/html Error: Exception 2 """ + Scenario: POST /forms/chromium/convert/html (Document Outline) + Given I have a default Gotenberg container + When I make a "POST" request to Gotenberg at the "/forms/chromium/convert/html" endpoint with the following form data and header(s): + | files | testdata/page-outline-html/index.html | file | + | generateDocumentOutline | true | field | + | Gotenberg-Output-Filename | foo | header | + Then the response status code should be 200 + Then the response header "Content-Type" should be "application/pdf" + Then there should be 1 PDF(s) in the response + # generateTaggedPdf is left unset, yet the outline must still be embedded: + # Gotenberg enables tagged PDF automatically because Chromium derives the + # outline from the structure tree. See issue #1579. + Then the "foo.pdf" PDF should have a document outline + When I make a "POST" request to Gotenberg at the "/forms/chromium/convert/html" endpoint with the following form data and header(s): + | files | testdata/page-outline-html/index.html | file | + | Gotenberg-Output-Filename | bar | header | + Then the response status code should be 200 + Then the "bar.pdf" PDF should NOT have a document outline + Scenario: POST /forms/chromium/convert/html (Bad Request) Given I have a default Gotenberg container When I make a "POST" request to Gotenberg at the "/forms/chromium/convert/html" endpoint with the following form data and header(s): diff --git a/test/integration/scenario/scenario.go b/test/integration/scenario/scenario.go index 1683a27..b7dc67f 100644 --- a/test/integration/scenario/scenario.go +++ b/test/integration/scenario/scenario.go @@ -1077,6 +1077,60 @@ func (s *scenario) thePdfShouldBeSetToLandscapeOrientation(ctx context.Context, return nil } +func (s *scenario) thePdfShouldHaveADocumentOutline(ctx context.Context, name, kind string) error { + var path string + if !strings.HasPrefix(name, "*_") { + path = fmt.Sprintf("%s/%s/%s", s.workdir, s.resp.Header().Get("Gotenberg-Trace"), name) + + _, err := os.Stat(path) + if os.IsNotExist(err) { + return fmt.Errorf("PDF %q does not exist", path) + } + } else { + substr := strings.ReplaceAll(name, "*_", "") + err := filepath.Walk(s.teststoreDir, func(currentPath string, info os.FileInfo, pathErr error) error { + if pathErr != nil { + return pathErr + } + if strings.Contains(info.Name(), substr) { + path = currentPath + return filepath.SkipDir + } + return nil + }) + if err != nil { + return fmt.Errorf("walk %q: %w", s.workdir, err) + } + } + + cmd := []string{ + "verapdf", + "-off", + "--extract", + "outlines", + filepath.Base(path), + } + + output, err := execCommandInIntegrationToolsContainer(ctx, cmd, path) + if err != nil { + return fmt.Errorf("exec %q: %w", cmd, err) + } + + // veraPDF emits an empty features report when the catalog holds no outline. + hasOutline := !strings.Contains(output, "") + invert := kind == "should NOT" + + if !invert && !hasOutline { + return fmt.Errorf("PDF %q has no document outline", path) + } + + if invert && hasOutline { + return fmt.Errorf("PDF %q has a document outline", path) + } + + return nil +} + // pdfPageText extracts the text of a single page from a produced PDF using // pdftotext. name is either a literal filename or a "*_" glob resolved against // the test store. @@ -1541,6 +1595,7 @@ func InitializeScenario(ctx *godog.ScenarioContext) { ctx.Then(`^the (response|webhook request) PDF\(s\) should declare Factur-X XMP with conformance level "([^"]*)"$`, s.thePdfsShouldDeclareFacturXConformanceLevel) ctx.Then(`^the "([^"]*)" PDF should have (\d+) page\(s\)$`, s.thePdfShouldHavePages) ctx.Then(`^the "([^"]*)" PDF (should|should NOT) be set to landscape orientation$`, s.thePdfShouldBeSetToLandscapeOrientation) + ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have a document outline$`, s.thePdfShouldHaveADocumentOutline) ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have the following content at page (\d+):$`, s.thePdfShouldHaveTheFollowingContentAtPage) ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have content matching "([^"]*)" at page (\d+)$`, s.thePdfShouldHaveContentMatchingAtPage) ctx.Then(`^the "([^"]*)" PDF should have (\d+) image\(s\)$`, s.thePdfShouldHaveImages) diff --git a/test/integration/testdata/page-outline-html/index.html b/test/integration/testdata/page-outline-html/index.html new file mode 100644 index 0000000..09f28c7 --- /dev/null +++ b/test/integration/testdata/page-outline-html/index.html @@ -0,0 +1,16 @@ + + + + Outline + + +

Chapter 1

+

Intro.

+

Section 1.1

+

Body.

+

Chapter 2

+

Intro.

+

Section 2.1

+

Body.

+ +