diff --git a/pkg/modules/libreoffice/api/libreoffice.go b/pkg/modules/libreoffice/api/libreoffice.go index 4d2962a..a04a950 100644 --- a/pkg/modules/libreoffice/api/libreoffice.go +++ b/pkg/modules/libreoffice/api/libreoffice.go @@ -70,11 +70,13 @@ func (p *libreOfficeProcess) Start(logger *slog.Logger) error { // LibreOffice fetches external content (OOXML images via // TargetMode=External, RTF INCLUDEPICTURE, ODT linked images) inside - // its own libcurl. Route those fetches through the in-process proxy - // so the chromium/webhook SSRF filters apply. - if err := writeSofficeProxyConfig(userProfileDirPath, proxy.Addr()); err != nil { + // its own libcurl. The profile config routes those fetches through the + // in-process proxy so the chromium/webhook SSRF filters apply, and + // blocks content linked from untrusted locations so absolute-path + // (file://) and direct fetches are dropped at the source. + if err := writeSofficeProfileConfig(userProfileDirPath, proxy.Addr()); err != nil { _ = proxy.Stop(context.Background()) - return fmt.Errorf("write soffice proxy config: %w", err) + return fmt.Errorf("write soffice profile config: %w", err) } sofficeEnv := sofficeProxyEnv(os.Environ(), proxy.Addr()) diff --git a/pkg/modules/libreoffice/api/proxy.go b/pkg/modules/libreoffice/api/proxy.go index e8b369c..100f937 100644 --- a/pkg/modules/libreoffice/api/proxy.go +++ b/pkg/modules/libreoffice/api/proxy.go @@ -245,13 +245,26 @@ var hopByHopHeaders = []string{ "Upgrade", } -// sofficeProxyConfigTmpl is the registrymodifications.xcu fragment that -// tells soffice's UCB layer to route every HTTP and HTTPS fetch through -// proxyHost:proxyPort. The %s placeholders accept the proxy host and -// port respectively (host first, port second, repeated for HTTP and -// HTTPS). -const sofficeProxyConfigTmpl = ` +// sofficeProfileConfigTmpl is the registrymodifications.xcu the soffice +// daemon loads at startup. It does two things: +// +// 1. Routes every HTTP and HTTPS fetch through proxyHost:proxyPort so +// soffice's own libcurl fetches hit the in-process SSRF proxy. +// 2. Sets BlockUntrustedRefererLinks so soffice refuses to load content +// linked from a document that sits in an untrusted location. +// +// The second setting closes the local-read and direct-fetch vectors the +// proxy cannot see. A document that links an absolute path +// (file:///etc/...) or any URL is loaded from the per-request temp dir, +// which is never a trusted location, so soffice drops the linked content +// instead of resolving it. Embedded content (stored inside the document) +// is unaffected. +// +// The %s placeholders accept the proxy host and port respectively (host +// first, port second, repeated for HTTP and HTTPS). +const sofficeProfileConfigTmpl = ` + true 1 %s %s @@ -261,10 +274,11 @@ const sofficeProxyConfigTmpl = ` ` -// writeSofficeProxyConfig drops a registrymodifications.xcu file into +// writeSofficeProfileConfig drops a registrymodifications.xcu file into // userProfileDirPath/user/ that points soffice's UCB layer at proxyAddr -// for both HTTP and HTTPS. proxyAddr must be a host:port pair. -func writeSofficeProxyConfig(userProfileDirPath, proxyAddr string) error { +// for both HTTP and HTTPS and blocks linked content from untrusted +// locations. proxyAddr must be a host:port pair. +func writeSofficeProfileConfig(userProfileDirPath, proxyAddr string) error { host, port, err := net.SplitHostPort(proxyAddr) if err != nil { return fmt.Errorf("split proxy address %q: %w", proxyAddr, err) @@ -276,7 +290,7 @@ func writeSofficeProxyConfig(userProfileDirPath, proxyAddr string) error { return fmt.Errorf("create soffice user profile directory: %w", err) } - body := fmt.Sprintf(sofficeProxyConfigTmpl, host, port, host, port) + body := fmt.Sprintf(sofficeProfileConfigTmpl, host, port, host, port) err = os.WriteFile(userDir+"/registrymodifications.xcu", []byte(body), 0o600) if err != nil { return fmt.Errorf("write registrymodifications.xcu: %w", err) diff --git a/pkg/modules/libreoffice/api/proxy_test.go b/pkg/modules/libreoffice/api/proxy_test.go index 18773e3..efb4dd1 100644 --- a/pkg/modules/libreoffice/api/proxy_test.go +++ b/pkg/modules/libreoffice/api/proxy_test.go @@ -280,11 +280,11 @@ func TestLibreOfficeProxy_StopIsIdempotent(t *testing.T) { } } -func TestWriteSofficeProxyConfig(t *testing.T) { +func TestWriteSofficeProfileConfig(t *testing.T) { dir := t.TempDir() - if err := writeSofficeProxyConfig(dir, "127.0.0.1:9876"); err != nil { - t.Fatalf("writeSofficeProxyConfig: %v", err) + if err := writeSofficeProfileConfig(dir, "127.0.0.1:9876"); err != nil { + t.Fatalf("writeSofficeProfileConfig: %v", err) } body, err := os.ReadFile(filepath.Join(dir, "user", "registrymodifications.xcu")) @@ -297,6 +297,9 @@ func TestWriteSofficeProxyConfig(t *testing.T) { `ooInetHTTPProxyName`, `127.0.0.1`, `ooInetHTTPProxyPort`, `9876`, `ooInetHTTPSProxyName`, `ooInetHTTPSProxyPort`, + // Blocks linked content from untrusted locations, closing the + // file:// local-read and direct-fetch vectors the proxy cannot see. + `BlockUntrustedRefererLinks`, `true`, } { if !strings.Contains(string(body), want) { t.Errorf("xcu missing %q\nfull body:\n%s", want, body) @@ -304,8 +307,8 @@ func TestWriteSofficeProxyConfig(t *testing.T) { } } -func TestWriteSofficeProxyConfig_InvalidAddr(t *testing.T) { - err := writeSofficeProxyConfig(t.TempDir(), "not-a-host-port") +func TestWriteSofficeProfileConfig_InvalidAddr(t *testing.T) { + err := writeSofficeProfileConfig(t.TempDir(), "not-a-host-port") if err == nil { t.Fatal("expected error for malformed proxy address") } diff --git a/test/integration/features/libreoffice_convert.feature b/test/integration/features/libreoffice_convert.feature index ffb8bf3..d659347 100644 --- a/test/integration/features/libreoffice_convert.feature +++ b/test/integration/features/libreoffice_convert.feature @@ -939,3 +939,28 @@ Feature: /forms/libreoffice/convert Then the response header "Content-Type" should be "application/pdf" Then there should be 1 PDF(s) in the response Then the "foo.pdf" PDF should have 1 page(s) + + # An embedded image is stored inside the document, not linked, so blocking + # untrusted linked content leaves it untouched. Guards against over-blocking. + @libreoffice-linked-content + Scenario: POST /forms/libreoffice/convert (Embedded Image Survives) + Given I have a default Gotenberg container + When I make a "POST" request to Gotenberg at the "/forms/libreoffice/convert" endpoint with the following form data and header(s): + | files | testdata/libreoffice-embedded-image.fodt | file | + | Gotenberg-Output-Filename | foo | header | + Then the response status code should be 200 + Then there should be 1 PDF(s) in the response + Then the "foo.pdf" PDF should have 1 image(s) + + # An uploaded document always loads from an untrusted location, so soffice + # refuses to resolve any content it links (absolute file:// path or external + # URL). Closes the SSRF and local-file-read vector. + @libreoffice-linked-content + Scenario: POST /forms/libreoffice/convert (Linked External Resource Blocked) + Given I have a default Gotenberg container + When I make a "POST" request to Gotenberg at the "/forms/libreoffice/convert" endpoint with the following form data and header(s): + | files | testdata/libreoffice-linked-external.fodt | file | + | Gotenberg-Output-Filename | foo | header | + Then the response status code should be 200 + Then there should be 1 PDF(s) in the response + Then the "foo.pdf" PDF should have 0 image(s) diff --git a/test/integration/testdata/libreoffice-embedded-image.fodt b/test/integration/testdata/libreoffice-embedded-image.fodt new file mode 100644 index 0000000..ab7f7dd --- /dev/null +++ b/test/integration/testdata/libreoffice-embedded-image.fodt @@ -0,0 +1,7 @@ + + + + An embedded image is stored in the document and must survive. + iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAADElEQVR4nGP4z8AAAAMBAQDJ/pLvAAAAAElFTkSuQmCC + + diff --git a/test/integration/testdata/libreoffice-linked-external.fodt b/test/integration/testdata/libreoffice-linked-external.fodt new file mode 100644 index 0000000..b44b1e7 --- /dev/null +++ b/test/integration/testdata/libreoffice-linked-external.fodt @@ -0,0 +1,7 @@ + + + + An image linked by absolute path outside the document folder must not load. + + +