chore(libreoffice): switch to env var for handling non-basic latin characters in filenames

This commit is contained in:
Julien Neuhart
2025-05-13 16:09:51 +02:00
parent 79425af848
commit be3145aae9
4 changed files with 26 additions and 67 deletions
+7
View File
@@ -214,6 +214,13 @@ RUN \
# Cleanup.
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Set default characterset encoding to UTF-8.
# See:
# https://github.com/gotenberg/gotenberg/issues/104
# https://github.com/gotenberg/gotenberg/issues/730
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
RUN \
# Install LibreOffice & unoconverter.
echo "deb http://deb.debian.org/debian bookworm-backports main" >> /etc/apt/sources.list &&\
@@ -4,16 +4,13 @@ import (
"context"
"errors"
"fmt"
"io"
"net"
"os"
"path/filepath"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/google/uuid"
"go.uber.org/zap"
"github.com/gotenberg/gotenberg/v8/pkg/gotenberg"
@@ -333,11 +330,6 @@ func (p *libreOfficeProcess) pdf(ctx context.Context, logger *zap.Logger, inputP
)
}
inputPath, err := nonBasicLatinCharactersGuard(logger, inputPath)
if err != nil {
return fmt.Errorf("non-basic latin characters guard: %w", err)
}
args = append(args, "--output", outputPath, inputPath)
cmd, err := gotenberg.CommandContext(ctx, logger, p.arguments.unoBinPath, args...)
@@ -374,65 +366,6 @@ func (p *libreOfficeProcess) pdf(ctx context.Context, logger *zap.Logger, inputP
return fmt.Errorf("convert to PDF: %w", err)
}
// LibreOffice cannot convert a file with a name containing non-basic Latin
// characters.
// See:
// https://github.com/gotenberg/gotenberg/issues/104
// https://github.com/gotenberg/gotenberg/issues/730
func nonBasicLatinCharactersGuard(logger *zap.Logger, inputPath string) (string, error) {
hasNonBasicLatinChars := func(str string) bool {
for _, r := range str {
// Check if the character is outside basic Latin.
if r != '.' && (r < ' ' || r > '~') {
return true
}
}
return false
}
filename := filepath.Base(inputPath)
if !hasNonBasicLatinChars(filename) {
logger.Debug("no non-basic latin characters in filename, skip copy")
return inputPath, nil
}
logger.Warn("non-basic latin characters in filename, copy to a file with a valid filename")
basePath := filepath.Dir(inputPath)
ext := filepath.Ext(inputPath)
newInputPath := filepath.Join(basePath, fmt.Sprintf("%s%s", uuid.NewString(), ext))
in, err := os.Open(inputPath)
if err != nil {
return "", fmt.Errorf("open file: %w", err)
}
defer func() {
err := in.Close()
if err != nil {
logger.Error(fmt.Sprintf("close file: %s", err))
}
}()
out, err := os.Create(newInputPath)
if err != nil {
return "", fmt.Errorf("create new file: %w", err)
}
defer func() {
err := out.Close()
if err != nil {
logger.Error(fmt.Sprintf("close new file: %s", err))
}
}()
_, err = io.Copy(out, in)
if err != nil {
return "", fmt.Errorf("copy file to new file: %w", err)
}
return newInputPath, nil
}
// Interface guards.
var (
_ gotenberg.Process = (*libreOfficeProcess)(nil)
@@ -40,6 +40,25 @@ Feature: /forms/libreoffice/convert
Page 2
"""
# See:
# https://github.com/gotenberg/gotenberg/issues/104
# https://github.com/gotenberg/gotenberg/issues/730
Scenario: POST /forms/libreoffice/convert (Non-basic Latin Characters)
Given I have a default Gotenberg container
When I make a "POST" request to Gotenberg at the "/forms/libreoffice/convert" endpoint with the following form data and header(s):
| files | testdata/Special_Chars_ß.docx | file |
| Gotenberg-Output-Filename | foo | header |
Then the response status code should be 200
Then the response header "Content-Type" should be "application/pdf"
Then there should be 1 PDF(s) in the response
Then there should be the following file(s) in the response:
| foo.pdf |
Then the "foo.pdf" PDF should have 1 page(s)
Then the "foo.pdf" PDF should have the following content at page 1:
"""
Page 1
"""
Scenario: POST /forms/libreoffice/convert (Protected)
Given I have a default Gotenberg container
When I make a "POST" request to Gotenberg at the "/forms/libreoffice/convert" endpoint with the following form data and header(s):
Binary file not shown.