mirror of
https://github.com/gotenberg/gotenberg.git
synced 2026-07-02 00:17:40 +08:00
fix(libreoffice): suppress auto-generated page header for CSV conversions
This commit is contained in:
+1
-1
@@ -88,7 +88,7 @@ RUN apt-get update -qq \
|
||||
|
||||
WORKDIR /downloads
|
||||
|
||||
RUN curl -Ls https://raw.githubusercontent.com/gotenberg/unoconverter/v0.2.0/unoconv -o unoconverter \
|
||||
RUN curl -Ls https://raw.githubusercontent.com/gotenberg/unoconverter/v0.3.0/unoconv -o unoconverter \
|
||||
&& chmod +x unoconverter
|
||||
|
||||
RUN curl -o pdftk-all.jar "https://gitlab.com/api/v4/projects/5024297/packages/generic/pdftk-java/$PDFTK_VERSION/pdftk-all.jar" \
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"log/slog"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
@@ -319,6 +320,16 @@ func (p *libreOfficeProcess) pdf(ctx context.Context, logger *slog.Logger, input
|
||||
args = append(args, "--disable-update-indexes")
|
||||
}
|
||||
|
||||
// A CSV becomes a single Calc sheet named after the input file, and Calc's
|
||||
// default page style prints that sheet name as a centered header. Uploads
|
||||
// are stored under a UUID-based filename, so the UUID would otherwise leak
|
||||
// into the rendered PDF. Suppress the header for CSV inputs; spreadsheets
|
||||
// that carry their own page styles (XLSX, ODS) are left untouched.
|
||||
// See https://github.com/gotenberg/gotenberg/issues/1568.
|
||||
if strings.EqualFold(filepath.Ext(inputPath), ".csv") {
|
||||
args = append(args, "--disable-calc-header")
|
||||
}
|
||||
|
||||
args = append(args, "--export", fmt.Sprintf("ExportFormFields=%t", options.ExportFormFields))
|
||||
args = append(args, "--export", fmt.Sprintf("AllowDuplicateFieldNames=%t", options.AllowDuplicateFieldNames))
|
||||
args = append(args, "--export", fmt.Sprintf("ExportBookmarks=%t", options.ExportBookmarks))
|
||||
|
||||
@@ -68,6 +68,7 @@ Available tags:
|
||||
- `the "<name>" PDF should have <N> page(s)`
|
||||
- `the "<name>" PDF (should|should NOT) be set to landscape orientation`
|
||||
- `the "<name>" PDF (should|should NOT) have the following content at page <N>:` (docstring)
|
||||
- `the "<name>" PDF (should|should NOT) have content matching "<regexp>" at page <N>`
|
||||
- `the (response|webhook request) PDF(s) should be valid "<standard>" with a tolerance of <N> failed rule(s)` (standards: `PDF/A-1b`, `PDF/A-2b`, `PDF/A-3b`, `PDF/UA-1`, `PDF/UA-2`)
|
||||
- `the (response|webhook request) PDF(s) (should|should NOT) be flatten`
|
||||
- `the (response|webhook request) PDF(s) (should|should NOT) be encrypted`
|
||||
|
||||
@@ -18,6 +18,25 @@ Feature: /forms/libreoffice/convert
|
||||
Page 1
|
||||
"""
|
||||
|
||||
# A CSV becomes a single Calc sheet named after the input file, and Calc's
|
||||
# default page style prints that sheet name as a centered header. Uploads are
|
||||
# stored under a UUID-based filename, so the UUID must not leak into the PDF.
|
||||
# See https://github.com/gotenberg/gotenberg/issues/1568.
|
||||
Scenario: POST /forms/libreoffice/convert (CSV Without Sheet Name Header)
|
||||
Given I have a default Gotenberg container
|
||||
When I make a "POST" request to Gotenberg at the "/forms/libreoffice/convert" endpoint with the following form data and header(s):
|
||||
| files | testdata/sheet.csv | file |
|
||||
| Gotenberg-Output-Filename | foo | header |
|
||||
Then the response status code should be 200
|
||||
Then the response header "Content-Type" should be "application/pdf"
|
||||
Then there should be 1 PDF(s) in the response
|
||||
Then the "foo.pdf" PDF should have 1 page(s)
|
||||
Then the "foo.pdf" PDF should have the following content at page 1:
|
||||
"""
|
||||
Alice
|
||||
"""
|
||||
Then the "foo.pdf" PDF should NOT have content matching "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}" at page 1
|
||||
|
||||
Scenario: POST /forms/libreoffice/convert (Many Documents)
|
||||
Given I have a default Gotenberg container
|
||||
When I make a "POST" request to Gotenberg at the "/forms/libreoffice/convert" endpoint with the following form data and header(s):
|
||||
|
||||
@@ -1077,14 +1077,17 @@ func (s *scenario) thePdfShouldBeSetToLandscapeOrientation(ctx context.Context,
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *scenario) thePdfShouldHaveTheFollowingContentAtPage(ctx context.Context, name, kind string, page int, expected *godog.DocString) error {
|
||||
// pdfPageText extracts the text of a single page from a produced PDF using
|
||||
// pdftotext. name is either a literal filename or a "*_" glob resolved against
|
||||
// the test store.
|
||||
func (s *scenario) pdfPageText(ctx context.Context, name string, page int) (string, error) {
|
||||
var path string
|
||||
if !strings.HasPrefix(name, "*_") {
|
||||
path = fmt.Sprintf("%s/%s/%s", s.workdir, s.resp.Header().Get("Gotenberg-Trace"), name)
|
||||
|
||||
_, err := os.Stat(path)
|
||||
if os.IsNotExist(err) {
|
||||
return fmt.Errorf("PDF %q does not exist", path)
|
||||
return "", fmt.Errorf("PDF %q does not exist", path)
|
||||
}
|
||||
} else {
|
||||
substr := strings.ReplaceAll(name, "*_", "")
|
||||
@@ -1099,7 +1102,7 @@ func (s *scenario) thePdfShouldHaveTheFollowingContentAtPage(ctx context.Context
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("walk %q: %w", s.workdir, err)
|
||||
return "", fmt.Errorf("walk %q: %w", s.workdir, err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1115,7 +1118,16 @@ func (s *scenario) thePdfShouldHaveTheFollowingContentAtPage(ctx context.Context
|
||||
|
||||
output, err := execCommandInIntegrationToolsContainer(ctx, cmd, path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("exec %q: %w", cmd, err)
|
||||
return "", fmt.Errorf("exec %q: %w", cmd, err)
|
||||
}
|
||||
|
||||
return output, nil
|
||||
}
|
||||
|
||||
func (s *scenario) thePdfShouldHaveTheFollowingContentAtPage(ctx context.Context, name, kind string, page int, expected *godog.DocString) error {
|
||||
output, err := s.pdfPageText(ctx, name, page)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
invert := kind == "should NOT"
|
||||
@@ -1131,6 +1143,30 @@ func (s *scenario) thePdfShouldHaveTheFollowingContentAtPage(ctx context.Context
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *scenario) thePdfShouldHaveContentMatchingAtPage(ctx context.Context, name, kind, pattern string, page int) error {
|
||||
output, err := s.pdfPageText(ctx, name, page)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
re, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return fmt.Errorf("compile pattern %q: %w", pattern, err)
|
||||
}
|
||||
|
||||
invert := kind == "should NOT"
|
||||
|
||||
if !invert && !re.MatchString(output) {
|
||||
return fmt.Errorf("pattern %q not found in %q", pattern, output)
|
||||
}
|
||||
|
||||
if invert && re.MatchString(output) {
|
||||
return fmt.Errorf("pattern %q found in %q", pattern, output)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *scenario) thePdfsShouldBeFlatten(ctx context.Context, kind, should string) error {
|
||||
dirPath := s.teststoreDir
|
||||
|
||||
@@ -1444,6 +1480,7 @@ func InitializeScenario(ctx *godog.ScenarioContext) {
|
||||
ctx.Then(`^the "([^"]*)" PDF should have (\d+) page\(s\)$`, s.thePdfShouldHavePages)
|
||||
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) be set to landscape orientation$`, s.thePdfShouldBeSetToLandscapeOrientation)
|
||||
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have the following content at page (\d+):$`, s.thePdfShouldHaveTheFollowingContentAtPage)
|
||||
ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have content matching "([^"]*)" at page (\d+)$`, s.thePdfShouldHaveContentMatchingAtPage)
|
||||
ctx.Then(`^the "([^"]*)" PDF should have (\d+) image\(s\)$`, s.thePdfShouldHaveImages)
|
||||
ctx.After(func(ctx context.Context, sc *godog.Scenario, err error) (context.Context, error) {
|
||||
if s.gotenbergContainer != nil {
|
||||
|
||||
Vendored
+3
@@ -0,0 +1,3 @@
|
||||
Name,Amount
|
||||
Alice,100
|
||||
Bob,200
|
||||
|
Reference in New Issue
Block a user