fix(chromium): better default security and DX for allow / deny lists

This commit is contained in:
Julien Neuhart
2026-03-27 09:47:21 +01:00
parent bd6d92be9b
commit 06b2b2e10c
13 changed files with 358 additions and 56 deletions
+54 -26
View File
@@ -12,40 +12,68 @@ import (
// ErrFiltered happens if a value is filtered by the [FilterDeadline] function.
var ErrFiltered = errors.New("value filtered")
// FilterDeadline checks if given value is allowed and not denied according to
// regex patterns. It returns a [context.DeadlineExceeded] if it takes too long
// to process.
func FilterDeadline(allowed, denied *regexp2.Regexp, s string, deadline time.Time) error {
// FIXME: not ideal to compile everytime, but is there another way to create a clone?
if allowed.String() != "" {
allow := regexp2.MustCompile(allowed.String(), 0)
allow.MatchTimeout = time.Until(deadline)
// RegexpToSlice wraps a single [regexp2.Regexp] into a slice suitable for
// [FilterDeadline]. If the regexp pattern is empty, it returns nil (meaning no
// filtering).
func RegexpToSlice(r *regexp2.Regexp) []*regexp2.Regexp {
if r == nil || r.String() == "" {
return nil
}
ok, err := allow.MatchString(s)
if err != nil {
if time.Now().After(deadline) {
return context.DeadlineExceeded
return []*regexp2.Regexp{r}
}
// FilterDeadline checks if the given value is allowed and not denied according
// to regex patterns. The allowed list uses OR semantics (value must match at
// least one pattern). The denied list uses OR semantics (value is denied if it
// matches any pattern). It returns a [context.DeadlineExceeded] if it takes
// too long to process.
func FilterDeadline(allowed, denied []*regexp2.Regexp, s string, deadline time.Time) error {
if len(allowed) > 0 {
matched := false
for _, pattern := range allowed {
// FIXME: not ideal to compile everytime, but is there another way to create a clone?
clone := regexp2.MustCompile(pattern.String(), 0)
clone.MatchTimeout = time.Until(deadline)
ok, err := clone.MatchString(s)
if err != nil {
if time.Now().After(deadline) {
return context.DeadlineExceeded
}
return fmt.Errorf("'%s' cannot handle '%s': %w", clone.String(), s, err)
}
if ok {
matched = true
break
}
return fmt.Errorf("'%s' cannot handle '%s': %w", allow.String(), s, err)
}
if !ok {
return fmt.Errorf("'%s' does not match the expression from the allowed list: %w", s, ErrFiltered)
if !matched {
return fmt.Errorf("'%s' does not match any expression from the allowed list: %w", s, ErrFiltered)
}
}
if denied.String() != "" {
deny := regexp2.MustCompile(denied.String(), 0)
deny.MatchTimeout = time.Until(deadline)
if len(denied) > 0 {
for _, pattern := range denied {
clone := regexp2.MustCompile(pattern.String(), 0)
clone.MatchTimeout = time.Until(deadline)
ok, err := deny.MatchString(s)
if err != nil {
if time.Now().After(deadline) {
return context.DeadlineExceeded
ok, err := clone.MatchString(s)
if err != nil {
if time.Now().After(deadline) {
return context.DeadlineExceeded
}
return fmt.Errorf("'%s' cannot handle '%s': %w", clone.String(), s, err)
}
if ok {
return fmt.Errorf("'%s' matches the expression from the denied list: %w", s, ErrFiltered)
}
return fmt.Errorf("'%s' cannot handle '%s': %w", deny.String(), s, err)
}
if ok {
return fmt.Errorf("'%s' matches the expression from the denied list: %w", s, ErrFiltered)
}
}
+90 -14
View File
@@ -12,8 +12,8 @@ import (
func TestFilterDeadline(t *testing.T) {
for _, tc := range []struct {
scenario string
allowed *regexp2.Regexp
denied *regexp2.Regexp
allowed []*regexp2.Regexp
denied []*regexp2.Regexp
s string
deadline time.Time
expectError bool
@@ -21,17 +21,17 @@ func TestFilterDeadline(t *testing.T) {
}{
{
scenario: "DeadlineExceeded (allowed)",
allowed: regexp2.MustCompile("foo", 0),
denied: regexp2.MustCompile("", 0),
allowed: []*regexp2.Regexp{regexp2.MustCompile("foo", 0)},
denied: nil,
s: "foo",
deadline: time.Now().Add(time.Duration(-1) * time.Hour),
expectError: true,
expectedError: context.DeadlineExceeded,
},
{
scenario: "ErrFiltered (allowed)",
allowed: regexp2.MustCompile("foo", 0),
denied: regexp2.MustCompile("", 0),
scenario: "ErrFiltered (allowed, no match)",
allowed: []*regexp2.Regexp{regexp2.MustCompile("foo", 0)},
denied: nil,
s: "bar",
deadline: time.Now().Add(time.Duration(5) * time.Second),
expectError: true,
@@ -39,8 +39,8 @@ func TestFilterDeadline(t *testing.T) {
},
{
scenario: "DeadlineExceeded (denied)",
allowed: regexp2.MustCompile("", 0),
denied: regexp2.MustCompile("foo", 0),
allowed: nil,
denied: []*regexp2.Regexp{regexp2.MustCompile("foo", 0)},
s: "foo",
deadline: time.Now().Add(time.Duration(-1) * time.Hour),
expectError: true,
@@ -48,21 +48,55 @@ func TestFilterDeadline(t *testing.T) {
},
{
scenario: "ErrFiltered (denied)",
allowed: regexp2.MustCompile("", 0),
denied: regexp2.MustCompile("foo", 0),
allowed: nil,
denied: []*regexp2.Regexp{regexp2.MustCompile("foo", 0)},
s: "foo",
deadline: time.Now().Add(time.Duration(5) * time.Second),
expectError: true,
expectedError: ErrFiltered,
},
{
scenario: "success",
allowed: regexp2.MustCompile("", 0),
denied: regexp2.MustCompile("", 0),
scenario: "success (empty lists)",
allowed: nil,
denied: nil,
s: "foo",
deadline: time.Now().Add(time.Duration(5) * time.Second),
expectError: false,
},
{
scenario: "multi-pattern allow list, second matches",
allowed: []*regexp2.Regexp{regexp2.MustCompile("^https://", 0), regexp2.MustCompile("^file:///tmp/", 0)},
denied: nil,
s: "file:///tmp/abc/index.html",
deadline: time.Now().Add(time.Duration(5) * time.Second),
expectError: false,
},
{
scenario: "multi-pattern allow list, none matches",
allowed: []*regexp2.Regexp{regexp2.MustCompile("^https://", 0), regexp2.MustCompile("^ftp://", 0)},
denied: nil,
s: "file:///tmp/abc/index.html",
deadline: time.Now().Add(time.Duration(5) * time.Second),
expectError: true,
expectedError: ErrFiltered,
},
{
scenario: "multi-pattern deny list, second matches",
allowed: nil,
denied: []*regexp2.Regexp{regexp2.MustCompile("^ftp://", 0), regexp2.MustCompile("^file:.*", 0)},
s: "file:///etc/passwd",
deadline: time.Now().Add(time.Duration(5) * time.Second),
expectError: true,
expectedError: ErrFiltered,
},
{
scenario: "https URL passes deny list targeting file://",
allowed: nil,
denied: []*regexp2.Regexp{regexp2.MustCompile("^file:.*", 0)},
s: "https://example.com",
deadline: time.Now().Add(time.Duration(5) * time.Second),
expectError: false,
},
} {
t.Run(tc.scenario, func(t *testing.T) {
err := FilterDeadline(tc.allowed, tc.denied, tc.s, tc.deadline)
@@ -81,3 +115,45 @@ func TestFilterDeadline(t *testing.T) {
})
}
}
func TestRegexpToSlice(t *testing.T) {
for _, tc := range []struct {
scenario string
input *regexp2.Regexp
expectNil bool
expectLen int
}{
{
scenario: "nil regexp",
input: nil,
expectNil: true,
},
{
scenario: "empty regexp",
input: regexp2.MustCompile("", 0),
expectNil: true,
},
{
scenario: "non-empty regexp",
input: regexp2.MustCompile("^file:.*", 0),
expectNil: false,
expectLen: 1,
},
} {
t.Run(tc.scenario, func(t *testing.T) {
result := RegexpToSlice(tc.input)
if tc.expectNil && result != nil {
t.Fatalf("expected nil but got: %v", result)
}
if !tc.expectNil && result == nil {
t.Fatal("expected non-nil but got nil")
}
if !tc.expectNil && len(result) != tc.expectLen {
t.Fatalf("expected length %d but got %d", tc.expectLen, len(result))
}
})
}
}
+29
View File
@@ -222,3 +222,32 @@ func (f *ParsedFlags) MustDeprecatedRegexp(deprecated string, newName string) *r
return f.MustRegexp(newName)
}
// MustRegexpSlice returns a slice of compiled regular expressions from a
// string-slice flag given by name. Empty strings are skipped.
// It panics if an error occurs.
func (f *ParsedFlags) MustRegexpSlice(name string) []*regexp2.Regexp {
vals := f.MustStringSlice(name)
var regexps []*regexp2.Regexp
for _, val := range vals {
if val == "" {
continue
}
regexps = append(regexps, regexp2.MustCompile(val, 0))
}
return regexps
}
// MustDeprecatedRegexpSlice returns the slice of compiled regular expressions
// of a deprecated flag if it was explicitly set or the slice of the new flag.
// It panics if an error occurs.
func (f *ParsedFlags) MustDeprecatedRegexpSlice(deprecated string, newName string) []*regexp2.Regexp {
if f.Changed(deprecated) {
return f.MustRegexpSlice(deprecated)
}
return f.MustRegexpSlice(newName)
}
+118
View File
@@ -6,6 +6,7 @@ import (
"testing"
"time"
"github.com/dlclark/regexp2"
flag "github.com/spf13/pflag"
)
@@ -833,3 +834,120 @@ func TestParsedFlags_MustDeprecatedRegexp(t *testing.T) {
})
}
}
func TestParsedFlags_MustRegexpSlice(t *testing.T) {
fs := flag.NewFlagSet("tests", flag.ContinueOnError)
fs.StringSlice("foo", []string{}, "")
err := fs.Parse([]string{"--foo=^file:.*", "--foo=^ftp://.*"})
if err != nil {
t.Fatalf("expected no error but got: %v", err)
}
parsedFlags := ParsedFlags{FlagSet: fs}
for _, tc := range []struct {
scenario string
name string
expectPanic bool
expectLen int
}{
{
scenario: "success with multiple patterns",
name: "foo",
expectPanic: false,
expectLen: 2,
},
{
scenario: "non-existing flag",
name: "bar",
expectPanic: true,
},
} {
t.Run(tc.scenario, func(t *testing.T) {
if tc.expectPanic {
defer func() {
if r := recover(); r == nil {
t.Fatal("expected panic but got none")
}
}()
}
if !tc.expectPanic {
defer func() {
if r := recover(); r != nil {
t.Fatalf("expected no panic but got: %v", r)
}
}()
}
result := parsedFlags.MustRegexpSlice(tc.name)
if !tc.expectPanic && len(result) != tc.expectLen {
t.Errorf("expected %d regexps but got %d", tc.expectLen, len(result))
}
})
}
// Test empty strings are skipped.
fs2 := flag.NewFlagSet("tests2", flag.ContinueOnError)
fs2.StringSlice("baz", []string{""}, "")
err = fs2.Parse([]string{})
if err != nil {
t.Fatalf("expected no error but got: %v", err)
}
parsedFlags2 := ParsedFlags{FlagSet: fs2}
result := parsedFlags2.MustRegexpSlice("baz")
if len(result) != 0 {
t.Errorf("expected 0 regexps for empty strings but got %d", len(result))
}
}
func TestParsedFlags_MustDeprecatedRegexpSlice(t *testing.T) {
for _, tc := range []struct {
scenario string
rawFlags []string
expectPattern string
}{
{
scenario: "deprecated flag value",
rawFlags: []string{"--foo=^file:.*"},
expectPattern: "^file:.*",
},
{
scenario: "non-deprecated flag value",
rawFlags: []string{"--bar=^ftp://.*"},
expectPattern: "^ftp://.*",
},
{
scenario: "deprecated flag value > non-deprecated flag value",
rawFlags: []string{"--foo=^file:.*", "--bar=^ftp://.*"},
expectPattern: "^file:.*",
},
} {
t.Run(tc.scenario, func(t *testing.T) {
fs := flag.NewFlagSet("tests", flag.ContinueOnError)
fs.StringSlice("foo", []string{}, "")
fs.StringSlice("bar", []string{}, "")
parsedFlags := ParsedFlags{FlagSet: fs}
err := parsedFlags.Parse(tc.rawFlags)
if err != nil {
t.Fatalf("expected no error but got: %v", err)
}
actual := parsedFlags.MustDeprecatedRegexpSlice("foo", "bar")
if len(actual) != 1 {
t.Fatalf("expected 1 regexp but got %d", len(actual))
}
if actual[0].String() != tc.expectPattern {
t.Errorf("expected pattern '%s' but got '%s'", tc.expectPattern, actual[0].String())
}
})
}
_ = regexp2.None // Keep import alive.
}
+11 -1
View File
@@ -225,7 +225,12 @@ func newContext(echoCtx echo.Context, logger *zap.Logger, fs *gotenberg.FileSyst
)
}
err := gotenberg.FilterDeadline(downloadFromCfg.allowList, downloadFromCfg.denyList, dl.Url, deadline)
err := gotenberg.FilterDeadline(
gotenberg.RegexpToSlice(downloadFromCfg.allowList),
gotenberg.RegexpToSlice(downloadFromCfg.denyList),
dl.Url,
deadline,
)
if err != nil {
return fmt.Errorf("filter URL: %w", err)
}
@@ -430,6 +435,11 @@ func (ctx *Context) FormData() *FormData {
}
}
// DirPath returns the path to the request's working directory.
func (ctx *Context) DirPath() string {
return ctx.dirPath
}
// GeneratePath generates a path within the context's working directory.
// It generates a new UUID-based filename. It does not create a file.
func (ctx *Context) GeneratePath(extension string) string {
+6 -5
View File
@@ -42,8 +42,8 @@ type browserArguments struct {
hyphenDataDirPath string
// Tasks specific.
allowList *regexp2.Regexp
denyList *regexp2.Regexp
allowList []*regexp2.Regexp
denyList []*regexp2.Regexp
clearCache bool
clearCookies bool
disableJavaScript bool
@@ -356,9 +356,10 @@ func (b *chromiumBrowser) do(ctx context.Context, logger *zap.Logger, url string
// the extra HTTP headers, if any.
// See https://github.com/gotenberg/gotenberg/issues/1011.
listenForEventRequestPaused(taskCtx, logger, eventRequestPausedOptions{
allowList: b.arguments.allowList,
denyList: b.arguments.denyList,
extraHttpHeaders: options.ExtraHttpHeaders,
allowList: b.arguments.allowList,
denyList: b.arguments.denyList,
allowedFilePrefixes: options.AllowedFilePrefixes,
extraHttpHeaders: options.ExtraHttpHeaders,
})
var (
+10 -4
View File
@@ -172,6 +172,12 @@ type Options struct {
// OmitBackground hides the default white background and allows generating
// PDFs with transparency.
OmitBackground bool
// AllowedFilePrefixes restricts file:// sub-resource access to only these
// directory prefixes. Applied in listenForEventRequestPaused in addition
// to the global allow/deny lists. Set internally by route handlers, not
// via form data.
AllowedFilePrefixes []string
}
// EmulatedMediaFeature gathers the available entries for emulating a media
@@ -421,8 +427,8 @@ func (mod *Chromium) Descriptor() gotenberg.ModuleDescriptor {
fs.Bool("chromium-allow-file-access-from-files", false, "Allow file:// URIs to read other file:// URIs")
fs.String("chromium-host-resolver-rules", "", "Set custom mappings to the host resolver")
fs.String("chromium-proxy-server", "", "Set the outbound proxy server; this switch only affects HTTP and HTTPS requests")
fs.String("chromium-allow-list", "", "Set the allowed URLs for Chromium using a regular expression")
fs.String("chromium-deny-list", `^file:(?!//\/tmp/).*`, "Set the denied URLs for Chromium using a regular expression")
fs.StringSlice("chromium-allow-list", []string{}, "Set the allowed URLs for Chromium using regular expressions - supports multiple values")
fs.StringSlice("chromium-deny-list", []string{`^file:(?!//\/tmp/).*`}, "Set the denied URLs for Chromium using regular expressions - supports multiple values")
fs.Bool("chromium-clear-cache", false, "Clear Chromium cache between each conversion")
fs.Bool("chromium-clear-cookies", false, "Clear Chromium cookies between each conversion")
fs.Bool("chromium-disable-javascript", false, "Disable JavaScript")
@@ -469,8 +475,8 @@ func (mod *Chromium) Provision(ctx *gotenberg.Context) error {
wsUrlReadTimeout: flags.MustDuration("chromium-start-timeout"),
hyphenDataDirPath: hyphenDataDirPath,
allowList: flags.MustRegexp("chromium-allow-list"),
denyList: flags.MustRegexp("chromium-deny-list"),
allowList: flags.MustRegexpSlice("chromium-allow-list"),
denyList: flags.MustRegexpSlice("chromium-deny-list"),
clearCache: flags.MustBool("chromium-clear-cache"),
clearCookies: flags.MustBool("chromium-clear-cookies"),
disableJavaScript: flags.MustBool("chromium-disable-javascript"),
+21 -1
View File
@@ -24,7 +24,8 @@ import (
)
type eventRequestPausedOptions struct {
allowList, denyList *regexp2.Regexp
allowList, denyList []*regexp2.Regexp
allowedFilePrefixes []string
extraHttpHeaders []ExtraHttpHeader
}
@@ -58,6 +59,25 @@ func listenForEventRequestPaused(ctx context.Context, logger *zap.Logger, option
allow = false
}
// Additional restriction: if the sub-resource is a file:// URL
// and we have allowed file prefixes, restrict access to only
// those directories. This prevents cross-request file access
// in /tmp.
if allow && strings.HasPrefix(e.Request.URL, "file://") && len(options.allowedFilePrefixes) > 0 {
prefixMatch := false
for _, prefix := range options.allowedFilePrefixes {
if strings.HasPrefix(e.Request.URL, "file://"+prefix) {
prefixMatch = true
break
}
}
if !prefixMatch {
logger.Warn(fmt.Sprintf("'%s' is not within any allowed file prefix", e.Request.URL))
allow = false
}
}
cctx := chromedp.FromContext(ctx)
executorCtx := cdp.WithExecutor(ctx, cctx.Target)
+4
View File
@@ -512,6 +512,7 @@ func convertHtmlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
}
url := fmt.Sprintf("file://%s", inputPath)
options.AllowedFilePrefixes = []string{ctx.DirPath()}
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, watermark, stamp, rotateAngle, rotatePages)
if err != nil {
return fmt.Errorf("convert HTML to PDF: %w", err)
@@ -542,6 +543,7 @@ func screenshotHtmlRoute(chromium Api) api.Route {
}
url := fmt.Sprintf("file://%s", inputPath)
options.AllowedFilePrefixes = []string{ctx.DirPath()}
err = screenshotUrl(ctx, chromium, url, options)
if err != nil {
return fmt.Errorf("HTML screenshot: %w", err)
@@ -598,6 +600,7 @@ func convertMarkdownRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
return fmt.Errorf("transform markdown file(s) to HTML: %w", err)
}
options.AllowedFilePrefixes = []string{ctx.DirPath()}
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, userPassword, ownerPassword, embedPaths, watermark, stamp, rotateAngle, rotatePages)
if err != nil {
return fmt.Errorf("convert markdown to PDF: %w", err)
@@ -637,6 +640,7 @@ func screenshotMarkdownRoute(chromium Api) api.Route {
return fmt.Errorf("transform markdown file(s) to HTML: %w", err)
}
options.AllowedFilePrefixes = []string{ctx.DirPath()}
err = screenshotUrl(ctx, chromium, url, options)
if err != nil {
return fmt.Errorf("markdown screenshot: %w", err)
+12 -2
View File
@@ -114,12 +114,22 @@ func webhookMiddleware(w *Webhook) api.Middleware {
// Let's check if the webhook URLs are acceptable according to our
// allowed/denied lists.
err := gotenberg.FilterDeadline(w.allowList, w.denyList, webhookUrl, deadline)
err := gotenberg.FilterDeadline(
gotenberg.RegexpToSlice(w.allowList),
gotenberg.RegexpToSlice(w.denyList),
webhookUrl,
deadline,
)
if err != nil {
return fmt.Errorf("filter webhook URL: %w", err)
}
err = gotenberg.FilterDeadline(w.errorAllowList, w.errorDenyList, webhookErrorUrl, deadline)
err = gotenberg.FilterDeadline(
gotenberg.RegexpToSlice(w.errorAllowList),
gotenberg.RegexpToSlice(w.errorDenyList),
webhookErrorUrl,
deadline,
)
if err != nil {
return fmt.Errorf("filter webhook error URL: %w", err)
}
@@ -401,7 +401,7 @@ Feature: /forms/chromium/convert/html
Then the response header "Content-Type" should be "application/pdf"
Then there should be 1 PDF(s) in the response
Then the Gotenberg container should log the following entries:
| 'file:///etc/passwd' does not match the expression from the allowed list |
| 'file:///etc/passwd' does not match any expression from the allowed list |
Scenario: POST /forms/chromium/convert/html (JavaScript Enabled)
Given I have a default Gotenberg container
@@ -357,7 +357,7 @@ Feature: /forms/chromium/convert/markdown
Then the response header "Content-Type" should be "application/pdf"
Then there should be 1 PDF(s) in the response
Then the Gotenberg container should log the following entries:
| 'file:///etc/passwd' does not match the expression from the allowed list |
| 'file:///etc/passwd' does not match any expression from the allowed list |
Scenario: POST /forms/chromium/convert/markdown (JavaScript Enabled)
Given I have a default Gotenberg container
@@ -476,7 +476,7 @@ Feature: /forms/chromium/convert/url
Then there should be 1 PDF(s) in the response
Then the Gotenberg container should NOT log the following entries:
# Modern browsers block file URIs from being loaded into iframes when the parent page is served over HTTP/HTTPS.
| 'file:///etc/passwd' does not match the expression from the allowed list |
| 'file:///etc/passwd' does not match any expression from the allowed list |
Scenario: POST /forms/chromium/convert/url (JavaScript Enabled)
Given I have a default Gotenberg container