mirror of
https://github.com/gotenberg/gotenberg.git
synced 2026-07-02 08:27:41 +08:00
chore(libreoffice): switch to env var for handling non-basic latin characters in filenames
This commit is contained in:
@@ -214,6 +214,13 @@ RUN \
|
||||
# Cleanup.
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
# Set default characterset encoding to UTF-8.
|
||||
# See:
|
||||
# https://github.com/gotenberg/gotenberg/issues/104
|
||||
# https://github.com/gotenberg/gotenberg/issues/730
|
||||
ENV LANG=C.UTF-8
|
||||
ENV LC_ALL=C.UTF-8
|
||||
|
||||
RUN \
|
||||
# Install LibreOffice & unoconverter.
|
||||
echo "deb http://deb.debian.org/debian bookworm-backports main" >> /etc/apt/sources.list &&\
|
||||
|
||||
@@ -4,16 +4,13 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/gotenberg/gotenberg/v8/pkg/gotenberg"
|
||||
@@ -333,11 +330,6 @@ func (p *libreOfficeProcess) pdf(ctx context.Context, logger *zap.Logger, inputP
|
||||
)
|
||||
}
|
||||
|
||||
inputPath, err := nonBasicLatinCharactersGuard(logger, inputPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("non-basic latin characters guard: %w", err)
|
||||
}
|
||||
|
||||
args = append(args, "--output", outputPath, inputPath)
|
||||
|
||||
cmd, err := gotenberg.CommandContext(ctx, logger, p.arguments.unoBinPath, args...)
|
||||
@@ -374,65 +366,6 @@ func (p *libreOfficeProcess) pdf(ctx context.Context, logger *zap.Logger, inputP
|
||||
return fmt.Errorf("convert to PDF: %w", err)
|
||||
}
|
||||
|
||||
// LibreOffice cannot convert a file with a name containing non-basic Latin
|
||||
// characters.
|
||||
// See:
|
||||
// https://github.com/gotenberg/gotenberg/issues/104
|
||||
// https://github.com/gotenberg/gotenberg/issues/730
|
||||
func nonBasicLatinCharactersGuard(logger *zap.Logger, inputPath string) (string, error) {
|
||||
hasNonBasicLatinChars := func(str string) bool {
|
||||
for _, r := range str {
|
||||
// Check if the character is outside basic Latin.
|
||||
if r != '.' && (r < ' ' || r > '~') {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
filename := filepath.Base(inputPath)
|
||||
if !hasNonBasicLatinChars(filename) {
|
||||
logger.Debug("no non-basic latin characters in filename, skip copy")
|
||||
return inputPath, nil
|
||||
}
|
||||
|
||||
logger.Warn("non-basic latin characters in filename, copy to a file with a valid filename")
|
||||
basePath := filepath.Dir(inputPath)
|
||||
ext := filepath.Ext(inputPath)
|
||||
newInputPath := filepath.Join(basePath, fmt.Sprintf("%s%s", uuid.NewString(), ext))
|
||||
|
||||
in, err := os.Open(inputPath)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("open file: %w", err)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
err := in.Close()
|
||||
if err != nil {
|
||||
logger.Error(fmt.Sprintf("close file: %s", err))
|
||||
}
|
||||
}()
|
||||
|
||||
out, err := os.Create(newInputPath)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("create new file: %w", err)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
err := out.Close()
|
||||
if err != nil {
|
||||
logger.Error(fmt.Sprintf("close new file: %s", err))
|
||||
}
|
||||
}()
|
||||
|
||||
_, err = io.Copy(out, in)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("copy file to new file: %w", err)
|
||||
}
|
||||
|
||||
return newInputPath, nil
|
||||
}
|
||||
|
||||
// Interface guards.
|
||||
var (
|
||||
_ gotenberg.Process = (*libreOfficeProcess)(nil)
|
||||
|
||||
@@ -40,6 +40,25 @@ Feature: /forms/libreoffice/convert
|
||||
Page 2
|
||||
"""
|
||||
|
||||
# See:
|
||||
# https://github.com/gotenberg/gotenberg/issues/104
|
||||
# https://github.com/gotenberg/gotenberg/issues/730
|
||||
Scenario: POST /forms/libreoffice/convert (Non-basic Latin Characters)
|
||||
Given I have a default Gotenberg container
|
||||
When I make a "POST" request to Gotenberg at the "/forms/libreoffice/convert" endpoint with the following form data and header(s):
|
||||
| files | testdata/Special_Chars_ß.docx | file |
|
||||
| Gotenberg-Output-Filename | foo | header |
|
||||
Then the response status code should be 200
|
||||
Then the response header "Content-Type" should be "application/pdf"
|
||||
Then there should be 1 PDF(s) in the response
|
||||
Then there should be the following file(s) in the response:
|
||||
| foo.pdf |
|
||||
Then the "foo.pdf" PDF should have 1 page(s)
|
||||
Then the "foo.pdf" PDF should have the following content at page 1:
|
||||
"""
|
||||
Page 1
|
||||
"""
|
||||
|
||||
Scenario: POST /forms/libreoffice/convert (Protected)
|
||||
Given I have a default Gotenberg container
|
||||
When I make a "POST" request to Gotenberg at the "/forms/libreoffice/convert" endpoint with the following form data and header(s):
|
||||
|
||||
BIN
Binary file not shown.
Reference in New Issue
Block a user