fix(libreoffice): suppress auto-generated page header for CSV conversions

This commit is contained in:
Julien Neuhart
2026-06-05 17:19:29 +02:00
parent 5558e43821
commit 9ab39b6fca
6 changed files with 76 additions and 5 deletions
+1 -1
View File
@@ -88,7 +88,7 @@ RUN apt-get update -qq \
WORKDIR /downloads
RUN curl -Ls https://raw.githubusercontent.com/gotenberg/unoconverter/v0.2.0/unoconv -o unoconverter \
RUN curl -Ls https://raw.githubusercontent.com/gotenberg/unoconverter/v0.3.0/unoconv -o unoconverter \
&& chmod +x unoconverter
RUN curl -o pdftk-all.jar "https://gitlab.com/api/v4/projects/5024297/packages/generic/pdftk-java/$PDFTK_VERSION/pdftk-all.jar" \
@@ -7,6 +7,7 @@ import (
"log/slog"
"net"
"os"
"path/filepath"
"strings"
"sync"
"sync/atomic"
@@ -319,6 +320,16 @@ func (p *libreOfficeProcess) pdf(ctx context.Context, logger *slog.Logger, input
args = append(args, "--disable-update-indexes")
}
// A CSV becomes a single Calc sheet named after the input file, and Calc's
// default page style prints that sheet name as a centered header. Uploads
// are stored under a UUID-based filename, so the UUID would otherwise leak
// into the rendered PDF. Suppress the header for CSV inputs; spreadsheets
// that carry their own page styles (XLSX, ODS) are left untouched.
// See https://github.com/gotenberg/gotenberg/issues/1568.
if strings.EqualFold(filepath.Ext(inputPath), ".csv") {
args = append(args, "--disable-calc-header")
}
args = append(args, "--export", fmt.Sprintf("ExportFormFields=%t", options.ExportFormFields))
args = append(args, "--export", fmt.Sprintf("AllowDuplicateFieldNames=%t", options.AllowDuplicateFieldNames))
args = append(args, "--export", fmt.Sprintf("ExportBookmarks=%t", options.ExportBookmarks))
+1
View File
@@ -68,6 +68,7 @@ Available tags:
- `the "<name>" PDF should have <N> page(s)`
- `the "<name>" PDF (should|should NOT) be set to landscape orientation`
- `the "<name>" PDF (should|should NOT) have the following content at page <N>:` (docstring)
- `the "<name>" PDF (should|should NOT) have content matching "<regexp>" at page <N>`
- `the (response|webhook request) PDF(s) should be valid "<standard>" with a tolerance of <N> failed rule(s)` (standards: `PDF/A-1b`, `PDF/A-2b`, `PDF/A-3b`, `PDF/UA-1`, `PDF/UA-2`)
- `the (response|webhook request) PDF(s) (should|should NOT) be flatten`
- `the (response|webhook request) PDF(s) (should|should NOT) be encrypted`
@@ -18,6 +18,25 @@ Feature: /forms/libreoffice/convert
Page 1
"""
# A CSV becomes a single Calc sheet named after the input file, and Calc's
# default page style prints that sheet name as a centered header. Uploads are
# stored under a UUID-based filename, so the UUID must not leak into the PDF.
# See https://github.com/gotenberg/gotenberg/issues/1568.
Scenario: POST /forms/libreoffice/convert (CSV Without Sheet Name Header)
Given I have a default Gotenberg container
When I make a "POST" request to Gotenberg at the "/forms/libreoffice/convert" endpoint with the following form data and header(s):
| files | testdata/sheet.csv | file |
| Gotenberg-Output-Filename | foo | header |
Then the response status code should be 200
Then the response header "Content-Type" should be "application/pdf"
Then there should be 1 PDF(s) in the response
Then the "foo.pdf" PDF should have 1 page(s)
Then the "foo.pdf" PDF should have the following content at page 1:
"""
Alice
"""
Then the "foo.pdf" PDF should NOT have content matching "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}" at page 1
Scenario: POST /forms/libreoffice/convert (Many Documents)
Given I have a default Gotenberg container
When I make a "POST" request to Gotenberg at the "/forms/libreoffice/convert" endpoint with the following form data and header(s):
+41 -4
View File
@@ -1077,14 +1077,17 @@ func (s *scenario) thePdfShouldBeSetToLandscapeOrientation(ctx context.Context,
return nil
}
func (s *scenario) thePdfShouldHaveTheFollowingContentAtPage(ctx context.Context, name, kind string, page int, expected *godog.DocString) error {
// pdfPageText extracts the text of a single page from a produced PDF using
// pdftotext. name is either a literal filename or a "*_" glob resolved against
// the test store.
func (s *scenario) pdfPageText(ctx context.Context, name string, page int) (string, error) {
var path string
if !strings.HasPrefix(name, "*_") {
path = fmt.Sprintf("%s/%s/%s", s.workdir, s.resp.Header().Get("Gotenberg-Trace"), name)
_, err := os.Stat(path)
if os.IsNotExist(err) {
return fmt.Errorf("PDF %q does not exist", path)
return "", fmt.Errorf("PDF %q does not exist", path)
}
} else {
substr := strings.ReplaceAll(name, "*_", "")
@@ -1099,7 +1102,7 @@ func (s *scenario) thePdfShouldHaveTheFollowingContentAtPage(ctx context.Context
return nil
})
if err != nil {
return fmt.Errorf("walk %q: %w", s.workdir, err)
return "", fmt.Errorf("walk %q: %w", s.workdir, err)
}
}
@@ -1115,7 +1118,16 @@ func (s *scenario) thePdfShouldHaveTheFollowingContentAtPage(ctx context.Context
output, err := execCommandInIntegrationToolsContainer(ctx, cmd, path)
if err != nil {
return fmt.Errorf("exec %q: %w", cmd, err)
return "", fmt.Errorf("exec %q: %w", cmd, err)
}
return output, nil
}
func (s *scenario) thePdfShouldHaveTheFollowingContentAtPage(ctx context.Context, name, kind string, page int, expected *godog.DocString) error {
output, err := s.pdfPageText(ctx, name, page)
if err != nil {
return err
}
invert := kind == "should NOT"
@@ -1131,6 +1143,30 @@ func (s *scenario) thePdfShouldHaveTheFollowingContentAtPage(ctx context.Context
return nil
}
func (s *scenario) thePdfShouldHaveContentMatchingAtPage(ctx context.Context, name, kind, pattern string, page int) error {
output, err := s.pdfPageText(ctx, name, page)
if err != nil {
return err
}
re, err := regexp.Compile(pattern)
if err != nil {
return fmt.Errorf("compile pattern %q: %w", pattern, err)
}
invert := kind == "should NOT"
if !invert && !re.MatchString(output) {
return fmt.Errorf("pattern %q not found in %q", pattern, output)
}
if invert && re.MatchString(output) {
return fmt.Errorf("pattern %q found in %q", pattern, output)
}
return nil
}
func (s *scenario) thePdfsShouldBeFlatten(ctx context.Context, kind, should string) error {
dirPath := s.teststoreDir
@@ -1444,6 +1480,7 @@ func InitializeScenario(ctx *godog.ScenarioContext) {
ctx.Then(`^the "([^"]*)" PDF should have (\d+) page\(s\)$`, s.thePdfShouldHavePages)
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) be set to landscape orientation$`, s.thePdfShouldBeSetToLandscapeOrientation)
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have the following content at page (\d+):$`, s.thePdfShouldHaveTheFollowingContentAtPage)
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have content matching "([^"]*)" at page (\d+)$`, s.thePdfShouldHaveContentMatchingAtPage)
ctx.Then(`^the "([^"]*)" PDF should have (\d+) image\(s\)$`, s.thePdfShouldHaveImages)
ctx.After(func(ctx context.Context, sc *godog.Scenario, err error) (context.Context, error) {
if s.gotenbergContainer != nil {
+3
View File
@@ -0,0 +1,3 @@
Name,Amount
Alice,100
Bob,200
1 Name Amount
2 Alice 100
3 Bob 200