diff --git a/build/Dockerfile b/build/Dockerfile index baa878c..cfe82a8 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -88,7 +88,7 @@ RUN apt-get update -qq \ WORKDIR /downloads -RUN curl -Ls https://raw.githubusercontent.com/gotenberg/unoconverter/v0.2.0/unoconv -o unoconverter \ +RUN curl -Ls https://raw.githubusercontent.com/gotenberg/unoconverter/v0.3.0/unoconv -o unoconverter \ && chmod +x unoconverter RUN curl -o pdftk-all.jar "https://gitlab.com/api/v4/projects/5024297/packages/generic/pdftk-java/$PDFTK_VERSION/pdftk-all.jar" \ diff --git a/pkg/modules/libreoffice/api/libreoffice.go b/pkg/modules/libreoffice/api/libreoffice.go index bf67937..099fa9c 100644 --- a/pkg/modules/libreoffice/api/libreoffice.go +++ b/pkg/modules/libreoffice/api/libreoffice.go @@ -7,6 +7,7 @@ import ( "log/slog" "net" "os" + "path/filepath" "strings" "sync" "sync/atomic" @@ -319,6 +320,16 @@ func (p *libreOfficeProcess) pdf(ctx context.Context, logger *slog.Logger, input args = append(args, "--disable-update-indexes") } + // A CSV becomes a single Calc sheet named after the input file, and Calc's + // default page style prints that sheet name as a centered header. Uploads + // are stored under a UUID-based filename, so the UUID would otherwise leak + // into the rendered PDF. Suppress the header for CSV inputs; spreadsheets + // that carry their own page styles (XLSX, ODS) are left untouched. + // See https://github.com/gotenberg/gotenberg/issues/1568. + if strings.EqualFold(filepath.Ext(inputPath), ".csv") { + args = append(args, "--disable-calc-header") + } + args = append(args, "--export", fmt.Sprintf("ExportFormFields=%t", options.ExportFormFields)) args = append(args, "--export", fmt.Sprintf("AllowDuplicateFieldNames=%t", options.AllowDuplicateFieldNames)) args = append(args, "--export", fmt.Sprintf("ExportBookmarks=%t", options.ExportBookmarks)) diff --git a/test/integration/README.md b/test/integration/README.md index 8e307d3..0132c2f 100644 --- a/test/integration/README.md +++ b/test/integration/README.md @@ -68,6 +68,7 @@ Available tags: - `the "" PDF should have page(s)` - `the "" PDF (should|should NOT) be set to landscape orientation` - `the "" PDF (should|should NOT) have the following content at page :` (docstring) +- `the "" PDF (should|should NOT) have content matching "" at page ` - `the (response|webhook request) PDF(s) should be valid "" with a tolerance of failed rule(s)` (standards: `PDF/A-1b`, `PDF/A-2b`, `PDF/A-3b`, `PDF/UA-1`, `PDF/UA-2`) - `the (response|webhook request) PDF(s) (should|should NOT) be flatten` - `the (response|webhook request) PDF(s) (should|should NOT) be encrypted` diff --git a/test/integration/features/libreoffice_convert.feature b/test/integration/features/libreoffice_convert.feature index a3f5296..9411275 100644 --- a/test/integration/features/libreoffice_convert.feature +++ b/test/integration/features/libreoffice_convert.feature @@ -18,6 +18,25 @@ Feature: /forms/libreoffice/convert Page 1 """ + # A CSV becomes a single Calc sheet named after the input file, and Calc's + # default page style prints that sheet name as a centered header. Uploads are + # stored under a UUID-based filename, so the UUID must not leak into the PDF. + # See https://github.com/gotenberg/gotenberg/issues/1568. + Scenario: POST /forms/libreoffice/convert (CSV Without Sheet Name Header) + Given I have a default Gotenberg container + When I make a "POST" request to Gotenberg at the "/forms/libreoffice/convert" endpoint with the following form data and header(s): + | files | testdata/sheet.csv | file | + | Gotenberg-Output-Filename | foo | header | + Then the response status code should be 200 + Then the response header "Content-Type" should be "application/pdf" + Then there should be 1 PDF(s) in the response + Then the "foo.pdf" PDF should have 1 page(s) + Then the "foo.pdf" PDF should have the following content at page 1: + """ + Alice + """ + Then the "foo.pdf" PDF should NOT have content matching "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}" at page 1 + Scenario: POST /forms/libreoffice/convert (Many Documents) Given I have a default Gotenberg container When I make a "POST" request to Gotenberg at the "/forms/libreoffice/convert" endpoint with the following form data and header(s): diff --git a/test/integration/scenario/scenario.go b/test/integration/scenario/scenario.go index 904fdb5..967e4e4 100644 --- a/test/integration/scenario/scenario.go +++ b/test/integration/scenario/scenario.go @@ -1077,14 +1077,17 @@ func (s *scenario) thePdfShouldBeSetToLandscapeOrientation(ctx context.Context, return nil } -func (s *scenario) thePdfShouldHaveTheFollowingContentAtPage(ctx context.Context, name, kind string, page int, expected *godog.DocString) error { +// pdfPageText extracts the text of a single page from a produced PDF using +// pdftotext. name is either a literal filename or a "*_" glob resolved against +// the test store. +func (s *scenario) pdfPageText(ctx context.Context, name string, page int) (string, error) { var path string if !strings.HasPrefix(name, "*_") { path = fmt.Sprintf("%s/%s/%s", s.workdir, s.resp.Header().Get("Gotenberg-Trace"), name) _, err := os.Stat(path) if os.IsNotExist(err) { - return fmt.Errorf("PDF %q does not exist", path) + return "", fmt.Errorf("PDF %q does not exist", path) } } else { substr := strings.ReplaceAll(name, "*_", "") @@ -1099,7 +1102,7 @@ func (s *scenario) thePdfShouldHaveTheFollowingContentAtPage(ctx context.Context return nil }) if err != nil { - return fmt.Errorf("walk %q: %w", s.workdir, err) + return "", fmt.Errorf("walk %q: %w", s.workdir, err) } } @@ -1115,7 +1118,16 @@ func (s *scenario) thePdfShouldHaveTheFollowingContentAtPage(ctx context.Context output, err := execCommandInIntegrationToolsContainer(ctx, cmd, path) if err != nil { - return fmt.Errorf("exec %q: %w", cmd, err) + return "", fmt.Errorf("exec %q: %w", cmd, err) + } + + return output, nil +} + +func (s *scenario) thePdfShouldHaveTheFollowingContentAtPage(ctx context.Context, name, kind string, page int, expected *godog.DocString) error { + output, err := s.pdfPageText(ctx, name, page) + if err != nil { + return err } invert := kind == "should NOT" @@ -1131,6 +1143,30 @@ func (s *scenario) thePdfShouldHaveTheFollowingContentAtPage(ctx context.Context return nil } +func (s *scenario) thePdfShouldHaveContentMatchingAtPage(ctx context.Context, name, kind, pattern string, page int) error { + output, err := s.pdfPageText(ctx, name, page) + if err != nil { + return err + } + + re, err := regexp.Compile(pattern) + if err != nil { + return fmt.Errorf("compile pattern %q: %w", pattern, err) + } + + invert := kind == "should NOT" + + if !invert && !re.MatchString(output) { + return fmt.Errorf("pattern %q not found in %q", pattern, output) + } + + if invert && re.MatchString(output) { + return fmt.Errorf("pattern %q found in %q", pattern, output) + } + + return nil +} + func (s *scenario) thePdfsShouldBeFlatten(ctx context.Context, kind, should string) error { dirPath := s.teststoreDir @@ -1444,6 +1480,7 @@ func InitializeScenario(ctx *godog.ScenarioContext) { ctx.Then(`^the "([^"]*)" PDF should have (\d+) page\(s\)$`, s.thePdfShouldHavePages) ctx.Then(`^the "([^"]*)" PDF (should|should NOT) be set to landscape orientation$`, s.thePdfShouldBeSetToLandscapeOrientation) ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have the following content at page (\d+):$`, s.thePdfShouldHaveTheFollowingContentAtPage) + ctx.Then(`^the "([^"]*)" PDF (should|should NOT) have content matching "([^"]*)" at page (\d+)$`, s.thePdfShouldHaveContentMatchingAtPage) ctx.Then(`^the "([^"]*)" PDF should have (\d+) image\(s\)$`, s.thePdfShouldHaveImages) ctx.After(func(ctx context.Context, sc *godog.Scenario, err error) (context.Context, error) { if s.gotenbergContainer != nil { diff --git a/test/integration/testdata/sheet.csv b/test/integration/testdata/sheet.csv new file mode 100644 index 0000000..d6590b8 --- /dev/null +++ b/test/integration/testdata/sheet.csv @@ -0,0 +1,3 @@ +Name,Amount +Alice,100 +Bob,200