fix(chromium): generateDocumentOutline now implies generateTaggedPdf

This commit is contained in:
Julien Neuhart
2026-06-16 17:06:10 +02:00
parent 98fc403478
commit 7b054da4e7
6 changed files with 165 additions and 1 deletions
+2 -1
View File
@@ -300,7 +300,8 @@ type PdfOptions struct {
PreferCssPageSize bool
// GenerateDocumentOutline defines whether the document outline should be
// embedded into the PDF.
// embedded into the PDF. Chromium derives the outline from the tagged-PDF
// structure tree, so enabling this implies GenerateTaggedPdf.
GenerateDocumentOutline bool
// GenerateTaggedPdf defines whether to generate tagged (accessible)
+21
View File
@@ -21,8 +21,29 @@ import (
"github.com/gotenberg/gotenberg/v8/pkg/gotenberg"
)
// resolvePdfOptions applies the cross-option constraints Chromium imposes
// before printing.
//
// Chromium derives the PDF document outline from the tagged-PDF structure
// tree, so [PdfOptions.GenerateDocumentOutline] produces no outline unless
// tagged PDF is also generated. Requesting an outline therefore implies
// tagged PDF. See https://github.com/gotenberg/gotenberg/issues/1579.
func resolvePdfOptions(options PdfOptions) PdfOptions {
if options.GenerateDocumentOutline {
options.GenerateTaggedPdf = true
}
return options
}
func printToPdfActionFunc(reqCtx context.Context, logger *slog.Logger, outputPath string, options PdfOptions) chromedp.ActionFunc {
return func(ctx context.Context) error {
if options.GenerateDocumentOutline && !options.GenerateTaggedPdf {
logger.DebugContext(ctx, "document outline requested, enabling tagged PDF because Chromium derives the outline from the structure tree")
}
options = resolvePdfOptions(options)
// ctx is the chromedp task context, derived from context.Background(),
// so the span is started under reqCtx to keep print_to_pdf in the
// conversion trace instead of orphaning it into a new one.
+52
View File
@@ -0,0 +1,52 @@
package chromium
import "testing"
func TestResolvePdfOptions(t *testing.T) {
for _, tc := range []struct {
scenario string
generateOutline bool
generateTaggedIn bool
generateTaggedWant bool
}{
{
scenario: "outline requested forces tagged PDF",
generateOutline: true,
generateTaggedIn: false,
generateTaggedWant: true,
},
{
scenario: "outline requested keeps tagged PDF on",
generateOutline: true,
generateTaggedIn: true,
generateTaggedWant: true,
},
{
scenario: "no outline leaves tagged PDF off",
generateOutline: false,
generateTaggedIn: false,
generateTaggedWant: false,
},
{
scenario: "no outline keeps tagged PDF on",
generateOutline: false,
generateTaggedIn: true,
generateTaggedWant: true,
},
} {
t.Run(tc.scenario, func(t *testing.T) {
options := DefaultPdfOptions()
options.GenerateDocumentOutline = tc.generateOutline
options.GenerateTaggedPdf = tc.generateTaggedIn
got := resolvePdfOptions(options)
if got.GenerateTaggedPdf != tc.generateTaggedWant {
t.Errorf("expected GenerateTaggedPdf=%t, got %t", tc.generateTaggedWant, got.GenerateTaggedPdf)
}
if got.GenerateDocumentOutline != tc.generateOutline {
t.Errorf("expected GenerateDocumentOutline=%t, got %t", tc.generateOutline, got.GenerateDocumentOutline)
}
})
}
}
@@ -548,6 +548,25 @@ Feature: /forms/chromium/convert/html
Error: Exception 2
"""
Scenario: POST /forms/chromium/convert/html (Document Outline)
Given I have a default Gotenberg container
When I make a "POST" request to Gotenberg at the "/forms/chromium/convert/html" endpoint with the following form data and header(s):
| files | testdata/page-outline-html/index.html | file |
| generateDocumentOutline | true | field |
| Gotenberg-Output-Filename | foo | header |
Then the response status code should be 200
Then the response header "Content-Type" should be "application/pdf"
Then there should be 1 PDF(s) in the response
# generateTaggedPdf is left unset, yet the outline must still be embedded:
# Gotenberg enables tagged PDF automatically because Chromium derives the
# outline from the structure tree. See issue #1579.
Then the "foo.pdf" PDF should have a document outline
When I make a "POST" request to Gotenberg at the "/forms/chromium/convert/html" endpoint with the following form data and header(s):
| files | testdata/page-outline-html/index.html | file |
| Gotenberg-Output-Filename | bar | header |
Then the response status code should be 200
Then the "bar.pdf" PDF should NOT have a document outline
Scenario: POST /forms/chromium/convert/html (Bad Request)
Given I have a default Gotenberg container
When I make a "POST" request to Gotenberg at the "/forms/chromium/convert/html" endpoint with the following form data and header(s):
+55
View File
@@ -1077,6 +1077,60 @@ func (s *scenario) thePdfShouldBeSetToLandscapeOrientation(ctx context.Context,
return nil
}
func (s *scenario) thePdfShouldHaveADocumentOutline(ctx context.Context, name, kind string) error {
var path string
if !strings.HasPrefix(name, "*_") {
path = fmt.Sprintf("%s/%s/%s", s.workdir, s.resp.Header().Get("Gotenberg-Trace"), name)
_, err := os.Stat(path)
if os.IsNotExist(err) {
return fmt.Errorf("PDF %q does not exist", path)
}
} else {
substr := strings.ReplaceAll(name, "*_", "")
err := filepath.Walk(s.teststoreDir, func(currentPath string, info os.FileInfo, pathErr error) error {
if pathErr != nil {
return pathErr
}
if strings.Contains(info.Name(), substr) {
path = currentPath
return filepath.SkipDir
}
return nil
})
if err != nil {
return fmt.Errorf("walk %q: %w", s.workdir, err)
}
}
cmd := []string{
"verapdf",
"-off",
"--extract",
"outlines",
filepath.Base(path),
}
output, err := execCommandInIntegrationToolsContainer(ctx, cmd, path)
if err != nil {
return fmt.Errorf("exec %q: %w", cmd, err)
}
// veraPDF emits an empty features report when the catalog holds no outline.
hasOutline := !strings.Contains(output, "<featuresReport></featuresReport>")
invert := kind == "should NOT"
if !invert && !hasOutline {
return fmt.Errorf("PDF %q has no document outline", path)
}
if invert && hasOutline {
return fmt.Errorf("PDF %q has a document outline", path)
}
return nil
}
// pdfPageText extracts the text of a single page from a produced PDF using
// pdftotext. name is either a literal filename or a "*_" glob resolved against
// the test store.
@@ -1541,6 +1595,7 @@ func InitializeScenario(ctx *godog.ScenarioContext) {
ctx.Then(`^the (response|webhook request) PDF\(s\) should declare Factur-X XMP with conformance level "([^"]*)"$`, s.thePdfsShouldDeclareFacturXConformanceLevel)
ctx.Then(`^the "([^"]*)" PDF should have (\d+) page\(s\)$`, s.thePdfShouldHavePages)
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) be set to landscape orientation$`, s.thePdfShouldBeSetToLandscapeOrientation)
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have a document outline$`, s.thePdfShouldHaveADocumentOutline)
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have the following content at page (\d+):$`, s.thePdfShouldHaveTheFollowingContentAtPage)
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have content matching "([^"]*)" at page (\d+)$`, s.thePdfShouldHaveContentMatchingAtPage)
ctx.Then(`^the "([^"]*)" PDF should have (\d+) image\(s\)$`, s.thePdfShouldHaveImages)
+16
View File
@@ -0,0 +1,16 @@
<!doctype html>
<html lang="en">
<head>
<title>Outline</title>
</head>
<body>
<h1>Chapter 1</h1>
<p>Intro.</p>
<h2>Section 1.1</h2>
<p>Body.</p>
<h1>Chapter 2</h1>
<p>Intro.</p>
<h2>Section 2.1</h2>
<p>Body.</p>
</body>
</html>