mirror of
https://github.com/gotenberg/gotenberg.git
synced 2026-07-02 00:17:40 +08:00
fix(chromium): generateDocumentOutline now implies generateTaggedPdf
This commit is contained in:
@@ -300,7 +300,8 @@ type PdfOptions struct {
|
|||||||
PreferCssPageSize bool
|
PreferCssPageSize bool
|
||||||
|
|
||||||
// GenerateDocumentOutline defines whether the document outline should be
|
// GenerateDocumentOutline defines whether the document outline should be
|
||||||
// embedded into the PDF.
|
// embedded into the PDF. Chromium derives the outline from the tagged-PDF
|
||||||
|
// structure tree, so enabling this implies GenerateTaggedPdf.
|
||||||
GenerateDocumentOutline bool
|
GenerateDocumentOutline bool
|
||||||
|
|
||||||
// GenerateTaggedPdf defines whether to generate tagged (accessible)
|
// GenerateTaggedPdf defines whether to generate tagged (accessible)
|
||||||
|
|||||||
@@ -21,8 +21,29 @@ import (
|
|||||||
"github.com/gotenberg/gotenberg/v8/pkg/gotenberg"
|
"github.com/gotenberg/gotenberg/v8/pkg/gotenberg"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// resolvePdfOptions applies the cross-option constraints Chromium imposes
|
||||||
|
// before printing.
|
||||||
|
//
|
||||||
|
// Chromium derives the PDF document outline from the tagged-PDF structure
|
||||||
|
// tree, so [PdfOptions.GenerateDocumentOutline] produces no outline unless
|
||||||
|
// tagged PDF is also generated. Requesting an outline therefore implies
|
||||||
|
// tagged PDF. See https://github.com/gotenberg/gotenberg/issues/1579.
|
||||||
|
func resolvePdfOptions(options PdfOptions) PdfOptions {
|
||||||
|
if options.GenerateDocumentOutline {
|
||||||
|
options.GenerateTaggedPdf = true
|
||||||
|
}
|
||||||
|
|
||||||
|
return options
|
||||||
|
}
|
||||||
|
|
||||||
func printToPdfActionFunc(reqCtx context.Context, logger *slog.Logger, outputPath string, options PdfOptions) chromedp.ActionFunc {
|
func printToPdfActionFunc(reqCtx context.Context, logger *slog.Logger, outputPath string, options PdfOptions) chromedp.ActionFunc {
|
||||||
return func(ctx context.Context) error {
|
return func(ctx context.Context) error {
|
||||||
|
if options.GenerateDocumentOutline && !options.GenerateTaggedPdf {
|
||||||
|
logger.DebugContext(ctx, "document outline requested, enabling tagged PDF because Chromium derives the outline from the structure tree")
|
||||||
|
}
|
||||||
|
|
||||||
|
options = resolvePdfOptions(options)
|
||||||
|
|
||||||
// ctx is the chromedp task context, derived from context.Background(),
|
// ctx is the chromedp task context, derived from context.Background(),
|
||||||
// so the span is started under reqCtx to keep print_to_pdf in the
|
// so the span is started under reqCtx to keep print_to_pdf in the
|
||||||
// conversion trace instead of orphaning it into a new one.
|
// conversion trace instead of orphaning it into a new one.
|
||||||
|
|||||||
@@ -0,0 +1,52 @@
|
|||||||
|
package chromium
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestResolvePdfOptions(t *testing.T) {
|
||||||
|
for _, tc := range []struct {
|
||||||
|
scenario string
|
||||||
|
generateOutline bool
|
||||||
|
generateTaggedIn bool
|
||||||
|
generateTaggedWant bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
scenario: "outline requested forces tagged PDF",
|
||||||
|
generateOutline: true,
|
||||||
|
generateTaggedIn: false,
|
||||||
|
generateTaggedWant: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
scenario: "outline requested keeps tagged PDF on",
|
||||||
|
generateOutline: true,
|
||||||
|
generateTaggedIn: true,
|
||||||
|
generateTaggedWant: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
scenario: "no outline leaves tagged PDF off",
|
||||||
|
generateOutline: false,
|
||||||
|
generateTaggedIn: false,
|
||||||
|
generateTaggedWant: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
scenario: "no outline keeps tagged PDF on",
|
||||||
|
generateOutline: false,
|
||||||
|
generateTaggedIn: true,
|
||||||
|
generateTaggedWant: true,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
t.Run(tc.scenario, func(t *testing.T) {
|
||||||
|
options := DefaultPdfOptions()
|
||||||
|
options.GenerateDocumentOutline = tc.generateOutline
|
||||||
|
options.GenerateTaggedPdf = tc.generateTaggedIn
|
||||||
|
|
||||||
|
got := resolvePdfOptions(options)
|
||||||
|
|
||||||
|
if got.GenerateTaggedPdf != tc.generateTaggedWant {
|
||||||
|
t.Errorf("expected GenerateTaggedPdf=%t, got %t", tc.generateTaggedWant, got.GenerateTaggedPdf)
|
||||||
|
}
|
||||||
|
if got.GenerateDocumentOutline != tc.generateOutline {
|
||||||
|
t.Errorf("expected GenerateDocumentOutline=%t, got %t", tc.generateOutline, got.GenerateDocumentOutline)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -548,6 +548,25 @@ Feature: /forms/chromium/convert/html
|
|||||||
Error: Exception 2
|
Error: Exception 2
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
Scenario: POST /forms/chromium/convert/html (Document Outline)
|
||||||
|
Given I have a default Gotenberg container
|
||||||
|
When I make a "POST" request to Gotenberg at the "/forms/chromium/convert/html" endpoint with the following form data and header(s):
|
||||||
|
| files | testdata/page-outline-html/index.html | file |
|
||||||
|
| generateDocumentOutline | true | field |
|
||||||
|
| Gotenberg-Output-Filename | foo | header |
|
||||||
|
Then the response status code should be 200
|
||||||
|
Then the response header "Content-Type" should be "application/pdf"
|
||||||
|
Then there should be 1 PDF(s) in the response
|
||||||
|
# generateTaggedPdf is left unset, yet the outline must still be embedded:
|
||||||
|
# Gotenberg enables tagged PDF automatically because Chromium derives the
|
||||||
|
# outline from the structure tree. See issue #1579.
|
||||||
|
Then the "foo.pdf" PDF should have a document outline
|
||||||
|
When I make a "POST" request to Gotenberg at the "/forms/chromium/convert/html" endpoint with the following form data and header(s):
|
||||||
|
| files | testdata/page-outline-html/index.html | file |
|
||||||
|
| Gotenberg-Output-Filename | bar | header |
|
||||||
|
Then the response status code should be 200
|
||||||
|
Then the "bar.pdf" PDF should NOT have a document outline
|
||||||
|
|
||||||
Scenario: POST /forms/chromium/convert/html (Bad Request)
|
Scenario: POST /forms/chromium/convert/html (Bad Request)
|
||||||
Given I have a default Gotenberg container
|
Given I have a default Gotenberg container
|
||||||
When I make a "POST" request to Gotenberg at the "/forms/chromium/convert/html" endpoint with the following form data and header(s):
|
When I make a "POST" request to Gotenberg at the "/forms/chromium/convert/html" endpoint with the following form data and header(s):
|
||||||
|
|||||||
@@ -1077,6 +1077,60 @@ func (s *scenario) thePdfShouldBeSetToLandscapeOrientation(ctx context.Context,
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *scenario) thePdfShouldHaveADocumentOutline(ctx context.Context, name, kind string) error {
|
||||||
|
var path string
|
||||||
|
if !strings.HasPrefix(name, "*_") {
|
||||||
|
path = fmt.Sprintf("%s/%s/%s", s.workdir, s.resp.Header().Get("Gotenberg-Trace"), name)
|
||||||
|
|
||||||
|
_, err := os.Stat(path)
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return fmt.Errorf("PDF %q does not exist", path)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
substr := strings.ReplaceAll(name, "*_", "")
|
||||||
|
err := filepath.Walk(s.teststoreDir, func(currentPath string, info os.FileInfo, pathErr error) error {
|
||||||
|
if pathErr != nil {
|
||||||
|
return pathErr
|
||||||
|
}
|
||||||
|
if strings.Contains(info.Name(), substr) {
|
||||||
|
path = currentPath
|
||||||
|
return filepath.SkipDir
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("walk %q: %w", s.workdir, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd := []string{
|
||||||
|
"verapdf",
|
||||||
|
"-off",
|
||||||
|
"--extract",
|
||||||
|
"outlines",
|
||||||
|
filepath.Base(path),
|
||||||
|
}
|
||||||
|
|
||||||
|
output, err := execCommandInIntegrationToolsContainer(ctx, cmd, path)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("exec %q: %w", cmd, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// veraPDF emits an empty features report when the catalog holds no outline.
|
||||||
|
hasOutline := !strings.Contains(output, "<featuresReport></featuresReport>")
|
||||||
|
invert := kind == "should NOT"
|
||||||
|
|
||||||
|
if !invert && !hasOutline {
|
||||||
|
return fmt.Errorf("PDF %q has no document outline", path)
|
||||||
|
}
|
||||||
|
|
||||||
|
if invert && hasOutline {
|
||||||
|
return fmt.Errorf("PDF %q has a document outline", path)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// pdfPageText extracts the text of a single page from a produced PDF using
|
// pdfPageText extracts the text of a single page from a produced PDF using
|
||||||
// pdftotext. name is either a literal filename or a "*_" glob resolved against
|
// pdftotext. name is either a literal filename or a "*_" glob resolved against
|
||||||
// the test store.
|
// the test store.
|
||||||
@@ -1541,6 +1595,7 @@ func InitializeScenario(ctx *godog.ScenarioContext) {
|
|||||||
ctx.Then(`^the (response|webhook request) PDF\(s\) should declare Factur-X XMP with conformance level "([^"]*)"$`, s.thePdfsShouldDeclareFacturXConformanceLevel)
|
ctx.Then(`^the (response|webhook request) PDF\(s\) should declare Factur-X XMP with conformance level "([^"]*)"$`, s.thePdfsShouldDeclareFacturXConformanceLevel)
|
||||||
ctx.Then(`^the "([^"]*)" PDF should have (\d+) page\(s\)$`, s.thePdfShouldHavePages)
|
ctx.Then(`^the "([^"]*)" PDF should have (\d+) page\(s\)$`, s.thePdfShouldHavePages)
|
||||||
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) be set to landscape orientation$`, s.thePdfShouldBeSetToLandscapeOrientation)
|
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) be set to landscape orientation$`, s.thePdfShouldBeSetToLandscapeOrientation)
|
||||||
|
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have a document outline$`, s.thePdfShouldHaveADocumentOutline)
|
||||||
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have the following content at page (\d+):$`, s.thePdfShouldHaveTheFollowingContentAtPage)
|
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have the following content at page (\d+):$`, s.thePdfShouldHaveTheFollowingContentAtPage)
|
||||||
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have content matching "([^"]*)" at page (\d+)$`, s.thePdfShouldHaveContentMatchingAtPage)
|
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have content matching "([^"]*)" at page (\d+)$`, s.thePdfShouldHaveContentMatchingAtPage)
|
||||||
ctx.Then(`^the "([^"]*)" PDF should have (\d+) image\(s\)$`, s.thePdfShouldHaveImages)
|
ctx.Then(`^the "([^"]*)" PDF should have (\d+) image\(s\)$`, s.thePdfShouldHaveImages)
|
||||||
|
|||||||
@@ -0,0 +1,16 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<title>Outline</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Chapter 1</h1>
|
||||||
|
<p>Intro.</p>
|
||||||
|
<h2>Section 1.1</h2>
|
||||||
|
<p>Body.</p>
|
||||||
|
<h1>Chapter 2</h1>
|
||||||
|
<p>Intro.</p>
|
||||||
|
<h2>Section 2.1</h2>
|
||||||
|
<p>Body.</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
Reference in New Issue
Block a user