mirror of
https://github.com/gotenberg/gotenberg.git
synced 2026-07-02 08:27:41 +08:00
338 lines
11 KiB
Go
338 lines
11 KiB
Go
package api
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"net"
|
|
"net/http"
|
|
"net/url"
|
|
"os"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/dlclark/regexp2"
|
|
|
|
"github.com/gotenberg/gotenberg/v8/pkg/gotenberg"
|
|
)
|
|
|
|
// outboundProxyOptions configures a [libreOfficeProxy].
|
|
type outboundProxyOptions struct {
|
|
allowList []*regexp2.Regexp
|
|
denyList []*regexp2.Regexp
|
|
denyPrivateIPs bool
|
|
denyPublicIPs bool
|
|
}
|
|
|
|
// libreOfficeProxy is an HTTP/HTTPS forward proxy that LibreOffice routes
|
|
// outbound requests through. Every proxied request goes through
|
|
// [gotenberg.DecideOutbound] so the same allow/deny lists and IP-class
|
|
// filters that protect chromium and webhook fetches also apply to
|
|
// soffice's own libcurl-driven fetches.
|
|
//
|
|
// soffice triggers an outbound request whenever a document references
|
|
// external content (OOXML images via TargetMode="External", RTF
|
|
// INCLUDEPICTURE, ODT linked images). Without a filtering proxy in the
|
|
// path those fetches bypass every Go-side SSRF guard because they
|
|
// originate inside the soffice subprocess.
|
|
type libreOfficeProxy struct {
|
|
listener net.Listener
|
|
server *http.Server
|
|
client *http.Client
|
|
opts outboundProxyOptions
|
|
logger *slog.Logger
|
|
|
|
stopOnce sync.Once
|
|
}
|
|
|
|
// newLibreOfficeProxy binds a proxy listener to a free local port and
|
|
// applies opts to every proxied request. Callers must call [Start]
|
|
// before pointing soffice at the proxy and [Stop] on shutdown.
|
|
func newLibreOfficeProxy(logger *slog.Logger, opts outboundProxyOptions) (*libreOfficeProxy, error) {
|
|
listener, err := net.Listen("tcp", "127.0.0.1:0")
|
|
if err != nil {
|
|
return nil, fmt.Errorf("bind LibreOffice proxy listener: %w", err)
|
|
}
|
|
|
|
decideOpts := []gotenberg.DecideOption{
|
|
gotenberg.WithDenyPrivateIPs(opts.denyPrivateIPs),
|
|
gotenberg.WithDenyPublicIPs(opts.denyPublicIPs),
|
|
}
|
|
|
|
p := &libreOfficeProxy{
|
|
listener: listener,
|
|
client: gotenberg.NewOutboundHttpClient(0, opts.allowList, opts.denyList, decideOpts...),
|
|
opts: opts,
|
|
logger: logger.With(slog.String("logger", "libreoffice-proxy")),
|
|
}
|
|
p.server = &http.Server{
|
|
Handler: p,
|
|
ReadHeaderTimeout: 10 * time.Second,
|
|
}
|
|
return p, nil
|
|
}
|
|
|
|
// Addr returns the host:port the proxy listens on.
|
|
func (p *libreOfficeProxy) Addr() string {
|
|
return p.listener.Addr().String()
|
|
}
|
|
|
|
// Start serves proxy requests in a background goroutine until [Stop] is
|
|
// called.
|
|
func (p *libreOfficeProxy) Start() {
|
|
go func() {
|
|
err := p.server.Serve(p.listener)
|
|
if err != nil && !errors.Is(err, http.ErrServerClosed) {
|
|
p.logger.ErrorContext(context.Background(), fmt.Sprintf("LibreOffice proxy serve: %s", err))
|
|
}
|
|
}()
|
|
}
|
|
|
|
// Stop gracefully shuts the proxy down. Subsequent calls are no-ops.
|
|
func (p *libreOfficeProxy) Stop(ctx context.Context) error {
|
|
var err error
|
|
p.stopOnce.Do(func() {
|
|
err = p.server.Shutdown(ctx)
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("shutdown LibreOffice proxy: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ServeHTTP dispatches between CONNECT (HTTPS tunnels) and the absolute
|
|
// URL form (HTTP forward).
|
|
func (p *libreOfficeProxy) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
|
if r.Method == http.MethodConnect {
|
|
p.handleConnect(w, r)
|
|
return
|
|
}
|
|
p.handleHttp(w, r)
|
|
}
|
|
|
|
// handleHttp forwards a plain HTTP request whose URL line is absolute
|
|
// (RFC 7230 5.3.2) through the outbound HTTP client, which validates
|
|
// the destination and pins the dial.
|
|
func (p *libreOfficeProxy) handleHttp(w http.ResponseWriter, r *http.Request) {
|
|
if r.URL == nil || !r.URL.IsAbs() {
|
|
http.Error(w, "proxy: expected absolute URI", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
outReq := r.Clone(r.Context())
|
|
outReq.RequestURI = ""
|
|
removeHopByHopHeaders(outReq.Header)
|
|
|
|
// gosec G704: outReq.URL is exactly what the proxy is here to filter; the
|
|
// http.Client returned by NewOutboundHttpClient validates and pins it.
|
|
resp, err := p.client.Do(outReq) //nolint:gosec
|
|
if err != nil {
|
|
p.logger.WarnContext(r.Context(), fmt.Sprintf("LibreOffice proxy rejected forward to '%s': %s", r.URL.String(), err))
|
|
http.Error(w, "proxy: destination rejected", http.StatusForbidden)
|
|
return
|
|
}
|
|
defer func() {
|
|
closeErr := resp.Body.Close()
|
|
if closeErr != nil {
|
|
p.logger.DebugContext(r.Context(), fmt.Sprintf("close upstream response body: %s", closeErr))
|
|
}
|
|
}()
|
|
|
|
removeHopByHopHeaders(resp.Header)
|
|
for key, values := range resp.Header {
|
|
for _, value := range values {
|
|
w.Header().Add(key, value)
|
|
}
|
|
}
|
|
w.WriteHeader(resp.StatusCode)
|
|
_, copyErr := io.Copy(w, resp.Body)
|
|
if copyErr != nil {
|
|
p.logger.DebugContext(r.Context(), fmt.Sprintf("copy proxied response body: %s", copyErr))
|
|
}
|
|
}
|
|
|
|
// handleConnect implements an HTTPS tunnel. It validates the destination
|
|
// host through [gotenberg.DecideOutbound] (synthesizing an https URL),
|
|
// dials the pinned IPs returned by the decision, and splices bytes
|
|
// between client and server.
|
|
func (p *libreOfficeProxy) handleConnect(w http.ResponseWriter, r *http.Request) {
|
|
host, port, err := net.SplitHostPort(r.Host)
|
|
if err != nil {
|
|
http.Error(w, "proxy: invalid CONNECT target", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
deadline, ok := r.Context().Deadline()
|
|
if !ok {
|
|
deadline = time.Now().Add(30 * time.Second)
|
|
}
|
|
|
|
rawURL := (&url.URL{Scheme: "https", Host: net.JoinHostPort(host, port)}).String()
|
|
|
|
decision, err := gotenberg.DecideOutbound(r.Context(), rawURL, p.opts.allowList, p.opts.denyList, deadline,
|
|
gotenberg.WithDenyPrivateIPs(p.opts.denyPrivateIPs),
|
|
gotenberg.WithDenyPublicIPs(p.opts.denyPublicIPs),
|
|
)
|
|
if err != nil {
|
|
p.logger.WarnContext(r.Context(), fmt.Sprintf("LibreOffice proxy rejected CONNECT to '%s': %s", rawURL, err))
|
|
http.Error(w, "proxy: destination rejected", http.StatusForbidden)
|
|
return
|
|
}
|
|
|
|
var dest net.Conn
|
|
switch {
|
|
case len(decision.Pinned) > 0:
|
|
dest, err = gotenberg.DialPinned(r.Context(), "tcp", decision.Pinned, port)
|
|
default:
|
|
// Bypass (allow-list match) or non-http-like scheme: dial directly.
|
|
// gosec G704: host:port has cleared DecideOutbound above.
|
|
dest, err = net.DialTimeout("tcp", net.JoinHostPort(host, port), 10*time.Second) //nolint:gosec
|
|
}
|
|
if err != nil {
|
|
p.logger.WarnContext(r.Context(), fmt.Sprintf("LibreOffice proxy CONNECT dial to '%s' failed: %s", rawURL, err))
|
|
http.Error(w, "proxy: dial failed", http.StatusBadGateway)
|
|
return
|
|
}
|
|
|
|
hijacker, ok := w.(http.Hijacker)
|
|
if !ok {
|
|
_ = dest.Close()
|
|
http.Error(w, "proxy: hijack unsupported", http.StatusInternalServerError)
|
|
return
|
|
}
|
|
client, _, err := hijacker.Hijack()
|
|
if err != nil {
|
|
_ = dest.Close()
|
|
p.logger.WarnContext(r.Context(), fmt.Sprintf("LibreOffice proxy hijack failed: %s", err))
|
|
return
|
|
}
|
|
|
|
_, writeErr := client.Write([]byte("HTTP/1.1 200 Connection Established\r\n\r\n"))
|
|
if writeErr != nil {
|
|
_ = client.Close()
|
|
_ = dest.Close()
|
|
return
|
|
}
|
|
|
|
go pipeAndClose(client, dest)
|
|
go pipeAndClose(dest, client)
|
|
}
|
|
|
|
// pipeAndClose copies bytes from src to dst and closes both ends when
|
|
// the copy finishes.
|
|
func pipeAndClose(dst, src net.Conn) {
|
|
defer func() {
|
|
_ = dst.Close()
|
|
_ = src.Close()
|
|
}()
|
|
_, _ = io.Copy(dst, src)
|
|
}
|
|
|
|
// hopByHopHeaders is the set of hop-by-hop headers from RFC 7230 6.1
|
|
// plus the ones soffice adds when acting as a forward-proxy client.
|
|
var hopByHopHeaders = []string{
|
|
"Connection",
|
|
"Proxy-Connection",
|
|
"Keep-Alive",
|
|
"Proxy-Authenticate",
|
|
"Proxy-Authorization",
|
|
"Te",
|
|
"Trailer",
|
|
"Transfer-Encoding",
|
|
"Upgrade",
|
|
}
|
|
|
|
// sofficeProfileConfigTmpl is the registrymodifications.xcu the soffice
|
|
// daemon loads at startup. It does two things:
|
|
//
|
|
// 1. Routes every HTTP and HTTPS fetch through proxyHost:proxyPort so
|
|
// soffice's own libcurl fetches hit the in-process SSRF proxy.
|
|
// 2. Sets BlockUntrustedRefererLinks so soffice refuses to load content
|
|
// linked from a document that sits in an untrusted location.
|
|
//
|
|
// The second setting closes the local-read and direct-fetch vectors the
|
|
// proxy cannot see. A document that links an absolute path
|
|
// (file:///etc/...) or any URL is loaded from the per-request temp dir,
|
|
// which is never a trusted location, so soffice drops the linked content
|
|
// instead of resolving it. Embedded content (stored inside the document)
|
|
// is unaffected.
|
|
//
|
|
// The %s placeholders accept the proxy host and port respectively (host
|
|
// first, port second, repeated for HTTP and HTTPS).
|
|
const sofficeProfileConfigTmpl = `<?xml version="1.0" encoding="UTF-8"?>
|
|
<oor:items xmlns:oor="http://openoffice.org/2001/registry" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
|
<item oor:path="/org.openoffice.Office.Common/Security/Scripting"><prop oor:name="BlockUntrustedRefererLinks" oor:op="fuse"><value>true</value></prop></item>
|
|
<item oor:path="/org.openoffice.Inet/Settings"><prop oor:name="ooInetProxyType" oor:op="fuse"><value>1</value></prop></item>
|
|
<item oor:path="/org.openoffice.Inet/Settings"><prop oor:name="ooInetHTTPProxyName" oor:op="fuse"><value>%s</value></prop></item>
|
|
<item oor:path="/org.openoffice.Inet/Settings"><prop oor:name="ooInetHTTPProxyPort" oor:op="fuse"><value>%s</value></prop></item>
|
|
<item oor:path="/org.openoffice.Inet/Settings"><prop oor:name="ooInetHTTPSProxyName" oor:op="fuse"><value>%s</value></prop></item>
|
|
<item oor:path="/org.openoffice.Inet/Settings"><prop oor:name="ooInetHTTPSProxyPort" oor:op="fuse"><value>%s</value></prop></item>
|
|
<item oor:path="/org.openoffice.Inet/Settings"><prop oor:name="ooInetNoProxy" oor:op="fuse"><value></value></prop></item>
|
|
</oor:items>
|
|
`
|
|
|
|
// writeSofficeProfileConfig drops a registrymodifications.xcu file into
|
|
// userProfileDirPath/user/ that points soffice's UCB layer at proxyAddr
|
|
// for both HTTP and HTTPS and blocks linked content from untrusted
|
|
// locations. proxyAddr must be a host:port pair.
|
|
func writeSofficeProfileConfig(userProfileDirPath, proxyAddr string) error {
|
|
host, port, err := net.SplitHostPort(proxyAddr)
|
|
if err != nil {
|
|
return fmt.Errorf("split proxy address %q: %w", proxyAddr, err)
|
|
}
|
|
|
|
userDir := userProfileDirPath + "/user"
|
|
err = os.MkdirAll(userDir, 0o755)
|
|
if err != nil {
|
|
return fmt.Errorf("create soffice user profile directory: %w", err)
|
|
}
|
|
|
|
body := fmt.Sprintf(sofficeProfileConfigTmpl, host, port, host, port)
|
|
err = os.WriteFile(userDir+"/registrymodifications.xcu", []byte(body), 0o600)
|
|
if err != nil {
|
|
return fmt.Errorf("write registrymodifications.xcu: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// sofficeProxyEnv overlays http_proxy/https_proxy on env so soffice's
|
|
// libcurl path also routes through proxyAddr. The environment variables
|
|
// supplement the registrymodifications.xcu config so coverage stays
|
|
// intact if soffice upgrades and one of the two paths regresses.
|
|
func sofficeProxyEnv(env []string, proxyAddr string) []string {
|
|
proxyURL := "http://" + proxyAddr
|
|
|
|
filtered := env[:0:0]
|
|
for _, kv := range env {
|
|
switch strings.ToLower(strings.SplitN(kv, "=", 2)[0]) {
|
|
case "http_proxy", "https_proxy", "no_proxy":
|
|
continue
|
|
}
|
|
filtered = append(filtered, kv)
|
|
}
|
|
|
|
return append(filtered,
|
|
"http_proxy="+proxyURL,
|
|
"https_proxy="+proxyURL,
|
|
"HTTP_PROXY="+proxyURL,
|
|
"HTTPS_PROXY="+proxyURL,
|
|
"no_proxy=",
|
|
"NO_PROXY=",
|
|
)
|
|
}
|
|
|
|
func removeHopByHopHeaders(h http.Header) {
|
|
if connection := h.Get("Connection"); connection != "" {
|
|
for name := range strings.SplitSeq(connection, ",") {
|
|
h.Del(strings.TrimSpace(name))
|
|
}
|
|
}
|
|
for _, name := range hopByHopHeaders {
|
|
h.Del(name)
|
|
}
|
|
}
|