fix(exiftool): prevent line breaks

This commit is contained in:
Julien Neuhart
2026-04-11 13:11:47 +02:00
parent 924576d3d4
commit 55d19522a8
2 changed files with 60 additions and 8 deletions
+43 -8
View File
@@ -30,6 +30,24 @@ func init() {
// line-based protocol.
var safeKeyPattern = regexp.MustCompile(`^[a-zA-Z0-9\-_.:]+$`)
// validateMetadataValue rejects metadata values containing characters that
// could inject ExifTool stdin arguments. go-exiftool writes each key/value
// pair as a single line via fmt.Fprintln(stdin, "-"+k+"="+str), so a
// newline in the value splits into a second stdin line that ExifTool
// interprets as an additional argument. An attacker who controls a
// metadata value but not the key could otherwise inject pseudo-tags like
// -FileName=, -Directory=, -SymLink=, or -HardLink= and trigger arbitrary
// filesystem side effects. The same applies to carriage returns and NUL.
//
// The returned error wraps [gotenberg.ErrPdfEngineMetadataValueNotSupported]
// so the API layer surfaces it as HTTP 400.
func validateMetadataValue(key, value string) error {
if strings.ContainsAny(value, "\n\r\x00") {
return fmt.Errorf("write PDF metadata with ExifTool: invalid metadata value for key %q (contains control character): %w", key, gotenberg.ErrPdfEngineMetadataValueNotSupported)
}
return nil
}
// systemTags lists ExifTool tags that reflect internal filesystem state
// rather than actual PDF metadata. These are stripped from both read and
// write operations.
@@ -267,7 +285,7 @@ func (engine *ExifTool) WriteMetadata(ctx context.Context, logger *slog.Logger,
// period, and underscore — sufficient for all legitimate tag names.
for key := range metadata {
if !safeKeyPattern.MatchString(key) {
err = fmt.Errorf("write PDF metadata with ExifTool: invalid metadata key %q", key)
err = fmt.Errorf("write PDF metadata with ExifTool: invalid metadata key %q: %w", key, gotenberg.ErrPdfEngineMetadataValueNotSupported)
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
return err
@@ -285,21 +303,38 @@ func (engine *ExifTool) WriteMetadata(ctx context.Context, logger *slog.Logger,
for key, value := range metadata {
switch val := value.(type) {
case string:
if err = validateMetadataValue(key, val); err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
return err
}
fileMetadata[0].SetString(key, val)
case []string:
for _, s := range val {
if err = validateMetadataValue(key, s); err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
return err
}
}
fileMetadata[0].SetStrings(key, val)
case []any:
// See https://github.com/gotenberg/gotenberg/issues/1048.
strs := make([]string, len(val))
for i, entry := range val {
if str, ok := entry.(string); ok {
strs[i] = str
continue
str, ok := entry.(string)
if !ok {
err = fmt.Errorf("write PDF metadata with ExifTool: %s %+v %s %w", key, val, reflect.TypeFor[[]any](), gotenberg.ErrPdfEngineMetadataValueNotSupported)
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
return err
}
err = fmt.Errorf("write PDF metadata with ExifTool: %s %+v %s %w", key, val, reflect.TypeFor[[]any](), gotenberg.ErrPdfEngineMetadataValueNotSupported)
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
return err
if err = validateMetadataValue(key, str); err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
return err
}
strs[i] = str
}
fileMetadata[0].SetStrings(key, strs)
case bool:
@@ -106,6 +106,23 @@ Feature: /forms/pdfengines/metadata/{write|read}
Invalid form data: form field 'metadata' is invalid (got 'foo', resulting to unmarshal metadata: invalid character 'o' in literal false (expecting 'a'))
"""
Scenario: POST /forms/pdfengines/metadata/write (Reject Newline-Injected Pseudo-Tag)
# Regression: a newline in a metadata value would split go-exiftool's
# stdin line and inject an arbitrary ExifTool pseudo-tag such as
# -FileName=, -SymLink=, or -HardLink=, allowing arbitrary filesystem
# writes as the container user. WriteMetadata now rejects values
# containing control characters with HTTP 400.
Given I have a default Gotenberg container
When I make a "POST" request to Gotenberg at the "/forms/pdfengines/metadata/write" endpoint with the following form data and header(s):
| files | testdata/page_1.pdf | file |
| metadata | {"Title":"test\\n-FileName=/tmp/inject_proof"} | field |
Then the response status code should be 400
Then the response header "Content-Type" should be "text/plain; charset=UTF-8"
Then the response body should contain string:
"""
At least one PDF engine cannot process the requested metadata
"""
Scenario: POST /forms/pdfengines/metadata/read (Bad Request)
Given I have a default Gotenberg container
When I make a "POST" request to Gotenberg at the "/forms/pdfengines/metadata/read" endpoint with the following form data and header(s):