mirror of
https://github.com/gotenberg/gotenberg.git
synced 2026-07-02 00:17:40 +08:00
fix(chromium): generateDocumentOutline now implies generateTaggedPdf
This commit is contained in:
@@ -300,7 +300,8 @@ type PdfOptions struct {
|
||||
PreferCssPageSize bool
|
||||
|
||||
// GenerateDocumentOutline defines whether the document outline should be
|
||||
// embedded into the PDF.
|
||||
// embedded into the PDF. Chromium derives the outline from the tagged-PDF
|
||||
// structure tree, so enabling this implies GenerateTaggedPdf.
|
||||
GenerateDocumentOutline bool
|
||||
|
||||
// GenerateTaggedPdf defines whether to generate tagged (accessible)
|
||||
|
||||
@@ -21,8 +21,29 @@ import (
|
||||
"github.com/gotenberg/gotenberg/v8/pkg/gotenberg"
|
||||
)
|
||||
|
||||
// resolvePdfOptions applies the cross-option constraints Chromium imposes
|
||||
// before printing.
|
||||
//
|
||||
// Chromium derives the PDF document outline from the tagged-PDF structure
|
||||
// tree, so [PdfOptions.GenerateDocumentOutline] produces no outline unless
|
||||
// tagged PDF is also generated. Requesting an outline therefore implies
|
||||
// tagged PDF. See https://github.com/gotenberg/gotenberg/issues/1579.
|
||||
func resolvePdfOptions(options PdfOptions) PdfOptions {
|
||||
if options.GenerateDocumentOutline {
|
||||
options.GenerateTaggedPdf = true
|
||||
}
|
||||
|
||||
return options
|
||||
}
|
||||
|
||||
func printToPdfActionFunc(reqCtx context.Context, logger *slog.Logger, outputPath string, options PdfOptions) chromedp.ActionFunc {
|
||||
return func(ctx context.Context) error {
|
||||
if options.GenerateDocumentOutline && !options.GenerateTaggedPdf {
|
||||
logger.DebugContext(ctx, "document outline requested, enabling tagged PDF because Chromium derives the outline from the structure tree")
|
||||
}
|
||||
|
||||
options = resolvePdfOptions(options)
|
||||
|
||||
// ctx is the chromedp task context, derived from context.Background(),
|
||||
// so the span is started under reqCtx to keep print_to_pdf in the
|
||||
// conversion trace instead of orphaning it into a new one.
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
package chromium
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestResolvePdfOptions(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
scenario string
|
||||
generateOutline bool
|
||||
generateTaggedIn bool
|
||||
generateTaggedWant bool
|
||||
}{
|
||||
{
|
||||
scenario: "outline requested forces tagged PDF",
|
||||
generateOutline: true,
|
||||
generateTaggedIn: false,
|
||||
generateTaggedWant: true,
|
||||
},
|
||||
{
|
||||
scenario: "outline requested keeps tagged PDF on",
|
||||
generateOutline: true,
|
||||
generateTaggedIn: true,
|
||||
generateTaggedWant: true,
|
||||
},
|
||||
{
|
||||
scenario: "no outline leaves tagged PDF off",
|
||||
generateOutline: false,
|
||||
generateTaggedIn: false,
|
||||
generateTaggedWant: false,
|
||||
},
|
||||
{
|
||||
scenario: "no outline keeps tagged PDF on",
|
||||
generateOutline: false,
|
||||
generateTaggedIn: true,
|
||||
generateTaggedWant: true,
|
||||
},
|
||||
} {
|
||||
t.Run(tc.scenario, func(t *testing.T) {
|
||||
options := DefaultPdfOptions()
|
||||
options.GenerateDocumentOutline = tc.generateOutline
|
||||
options.GenerateTaggedPdf = tc.generateTaggedIn
|
||||
|
||||
got := resolvePdfOptions(options)
|
||||
|
||||
if got.GenerateTaggedPdf != tc.generateTaggedWant {
|
||||
t.Errorf("expected GenerateTaggedPdf=%t, got %t", tc.generateTaggedWant, got.GenerateTaggedPdf)
|
||||
}
|
||||
if got.GenerateDocumentOutline != tc.generateOutline {
|
||||
t.Errorf("expected GenerateDocumentOutline=%t, got %t", tc.generateOutline, got.GenerateDocumentOutline)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -548,6 +548,25 @@ Feature: /forms/chromium/convert/html
|
||||
Error: Exception 2
|
||||
"""
|
||||
|
||||
Scenario: POST /forms/chromium/convert/html (Document Outline)
|
||||
Given I have a default Gotenberg container
|
||||
When I make a "POST" request to Gotenberg at the "/forms/chromium/convert/html" endpoint with the following form data and header(s):
|
||||
| files | testdata/page-outline-html/index.html | file |
|
||||
| generateDocumentOutline | true | field |
|
||||
| Gotenberg-Output-Filename | foo | header |
|
||||
Then the response status code should be 200
|
||||
Then the response header "Content-Type" should be "application/pdf"
|
||||
Then there should be 1 PDF(s) in the response
|
||||
# generateTaggedPdf is left unset, yet the outline must still be embedded:
|
||||
# Gotenberg enables tagged PDF automatically because Chromium derives the
|
||||
# outline from the structure tree. See issue #1579.
|
||||
Then the "foo.pdf" PDF should have a document outline
|
||||
When I make a "POST" request to Gotenberg at the "/forms/chromium/convert/html" endpoint with the following form data and header(s):
|
||||
| files | testdata/page-outline-html/index.html | file |
|
||||
| Gotenberg-Output-Filename | bar | header |
|
||||
Then the response status code should be 200
|
||||
Then the "bar.pdf" PDF should NOT have a document outline
|
||||
|
||||
Scenario: POST /forms/chromium/convert/html (Bad Request)
|
||||
Given I have a default Gotenberg container
|
||||
When I make a "POST" request to Gotenberg at the "/forms/chromium/convert/html" endpoint with the following form data and header(s):
|
||||
|
||||
@@ -1077,6 +1077,60 @@ func (s *scenario) thePdfShouldBeSetToLandscapeOrientation(ctx context.Context,
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *scenario) thePdfShouldHaveADocumentOutline(ctx context.Context, name, kind string) error {
|
||||
var path string
|
||||
if !strings.HasPrefix(name, "*_") {
|
||||
path = fmt.Sprintf("%s/%s/%s", s.workdir, s.resp.Header().Get("Gotenberg-Trace"), name)
|
||||
|
||||
_, err := os.Stat(path)
|
||||
if os.IsNotExist(err) {
|
||||
return fmt.Errorf("PDF %q does not exist", path)
|
||||
}
|
||||
} else {
|
||||
substr := strings.ReplaceAll(name, "*_", "")
|
||||
err := filepath.Walk(s.teststoreDir, func(currentPath string, info os.FileInfo, pathErr error) error {
|
||||
if pathErr != nil {
|
||||
return pathErr
|
||||
}
|
||||
if strings.Contains(info.Name(), substr) {
|
||||
path = currentPath
|
||||
return filepath.SkipDir
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("walk %q: %w", s.workdir, err)
|
||||
}
|
||||
}
|
||||
|
||||
cmd := []string{
|
||||
"verapdf",
|
||||
"-off",
|
||||
"--extract",
|
||||
"outlines",
|
||||
filepath.Base(path),
|
||||
}
|
||||
|
||||
output, err := execCommandInIntegrationToolsContainer(ctx, cmd, path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("exec %q: %w", cmd, err)
|
||||
}
|
||||
|
||||
// veraPDF emits an empty features report when the catalog holds no outline.
|
||||
hasOutline := !strings.Contains(output, "<featuresReport></featuresReport>")
|
||||
invert := kind == "should NOT"
|
||||
|
||||
if !invert && !hasOutline {
|
||||
return fmt.Errorf("PDF %q has no document outline", path)
|
||||
}
|
||||
|
||||
if invert && hasOutline {
|
||||
return fmt.Errorf("PDF %q has a document outline", path)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// pdfPageText extracts the text of a single page from a produced PDF using
|
||||
// pdftotext. name is either a literal filename or a "*_" glob resolved against
|
||||
// the test store.
|
||||
@@ -1541,6 +1595,7 @@ func InitializeScenario(ctx *godog.ScenarioContext) {
|
||||
ctx.Then(`^the (response|webhook request) PDF\(s\) should declare Factur-X XMP with conformance level "([^"]*)"$`, s.thePdfsShouldDeclareFacturXConformanceLevel)
|
||||
ctx.Then(`^the "([^"]*)" PDF should have (\d+) page\(s\)$`, s.thePdfShouldHavePages)
|
||||
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) be set to landscape orientation$`, s.thePdfShouldBeSetToLandscapeOrientation)
|
||||
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have a document outline$`, s.thePdfShouldHaveADocumentOutline)
|
||||
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have the following content at page (\d+):$`, s.thePdfShouldHaveTheFollowingContentAtPage)
|
||||
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have content matching "([^"]*)" at page (\d+)$`, s.thePdfShouldHaveContentMatchingAtPage)
|
||||
ctx.Then(`^the "([^"]*)" PDF should have (\d+) image\(s\)$`, s.thePdfShouldHaveImages)
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>Outline</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Chapter 1</h1>
|
||||
<p>Intro.</p>
|
||||
<h2>Section 1.1</h2>
|
||||
<p>Body.</p>
|
||||
<h1>Chapter 2</h1>
|
||||
<p>Intro.</p>
|
||||
<h2>Section 2.1</h2>
|
||||
<p>Body.</p>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user