fix(webhook): detach async goroutine from pooled echo.Context

This commit is contained in:
Julien Neuhart
2026-04-21 20:16:04 +02:00
parent c204cadfc5
commit 4b192b1498
5 changed files with 195 additions and 3 deletions
+12 -2
View File
@@ -337,7 +337,10 @@ func basicAuthMiddleware(username, password string) echo.MiddlewareFunc {
func contextMiddleware(fs *gotenberg.FileSystem, timeout time.Duration, bodyLimit int64, downloadFromCfg downloadFromConfig) echo.MiddlewareFunc {
return func(next echo.HandlerFunc) echo.HandlerFunc {
return func(c echo.Context) error {
logger := c.Get("logger").(*slog.Logger)
logger, _ := c.Get("logger").(*slog.Logger)
if logger == nil {
return errors.New("no logger in context (possible pool reuse)")
}
// We create a context with a timeout so that underlying processes are
// able to stop early and correctly handle a timeout scenario.
@@ -395,7 +398,14 @@ func contextMiddleware(fs *gotenberg.FileSystem, timeout time.Duration, bodyLimi
func hardTimeoutMiddleware(hardTimeout time.Duration) echo.MiddlewareFunc {
return func(next echo.HandlerFunc) echo.HandlerFunc {
return func(c echo.Context) error {
logger := c.Get("logger").(*slog.Logger)
// Guard the type assertion so a pooled [echo.Context] whose
// store has been recycled under us does not crash the process.
// See the webhook async handler for the race this protects
// against.
logger, _ := c.Get("logger").(*slog.Logger)
if logger == nil {
return errors.New("no logger in context (possible pool reuse)")
}
// Define a hard timeout if the route handler fails to timeout as
// expected.
+39
View File
@@ -0,0 +1,39 @@
package api
import (
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"github.com/labstack/echo/v4"
)
func TestHardTimeoutMiddleware_MissingLoggerReturnsErrorInsteadOfPanicking(t *testing.T) {
mw := hardTimeoutMiddleware(100 * time.Millisecond)
handler := mw(func(c echo.Context) error { return nil })
e := echo.New()
req := httptest.NewRequest(http.MethodGet, "/", nil)
rec := httptest.NewRecorder()
c := e.NewContext(req, rec)
// c has no "logger" key, mimicking a pooled context whose store was
// recycled under a concurrently running webhook goroutine. The
// middleware must surface an error instead of panicking on the
// unchecked type assertion the pre-fix code relied on.
defer func() {
if r := recover(); r != nil {
t.Fatalf("hardTimeoutMiddleware panicked: %v", r)
}
}()
err := handler(c)
if err == nil {
t.Fatal("expected an error for missing logger, got nil")
}
if !strings.Contains(err.Error(), "logger") {
t.Fatalf("error = %q, want a message mentioning logger", err)
}
}
+26 -1
View File
@@ -334,13 +334,38 @@ func webhookMiddleware(w *Webhook) api.Middleware {
// As a webhook URL has been given, we handle the request in a
// goroutine and return immediately.
//
// Echo returns the echo.Context back to its sync.Pool as
// soon as this synchronous handler returns ErrAsyncProcess.
// A concurrent request can then claim the recycled context
// and c.Reset() wipes the shared store, which would cause
// any c.Get("...").(T) assertion downstream of the webhook
// goroutine to panic on a nil value and crash the process.
// Snapshot the keys downstream reads onto a detached
// wrapper before spawning the goroutine so pool reuse
// cannot reach into our async work.
detached := newPoolSafeContext(c, "logger", "context", "correlationId", "correlationIdHeader", "startTime")
w.asyncCount.Add(1)
go func() {
defer cancel()
defer w.asyncCount.Add(-1)
// Defense in depth: any panic that escapes the
// downstream chain (including future regressions of
// the pool-reuse bug) routes through handleError and
// leaves the process running.
defer func() {
r := recover()
if r == nil {
return
}
ctx.Log().Error(fmt.Sprintf("webhook goroutine panic: %v", r))
handleError(fmt.Errorf("internal error: %v", r))
}()
// Call the next middleware in the chain.
err := next(c)
err := next(detached)
if err != nil {
if errors.Is(err, api.ErrNoOutputFile) {
errNoOutputFile := fmt.Errorf("%w - the webhook middleware cannot handle the result of this route", err)
+58
View File
@@ -0,0 +1,58 @@
package webhook
import (
"sync"
"github.com/labstack/echo/v4"
)
// poolSafeContext wraps an [echo.Context] and keeps a private snapshot of
// the values that downstream middleware and route handlers read from the
// store. Echo returns an [echo.Context] to its sync.Pool as soon as the
// synchronous handler returns, including when the webhook middleware
// returns [api.ErrAsyncProcess]. A concurrent request can then claim the
// recycled context and c.Reset() wipes the shared store out from under
// the webhook goroutine, which causes any
// `c.Get("logger").(*slog.Logger)`-style assertion further down the
// chain to panic on a nil value.
//
// Wrapping c before handing it to the goroutine insulates the async work
// from pool reuse: Get/Set read and write the private store while every
// other [echo.Context] method delegates to the embedded context for
// anything the downstream might still need.
type poolSafeContext struct {
echo.Context
mu sync.RWMutex
store map[string]any
}
// newPoolSafeContext snapshots the given keys from c into a detached
// store and returns a wrapper whose Get/Set operate on that store
// exclusively. Keys absent from c are omitted; the wrapper still
// returns nil for them, matching [echo.Context.Get] behavior.
func newPoolSafeContext(c echo.Context, keys ...string) *poolSafeContext {
store := make(map[string]any, len(keys))
for _, key := range keys {
if v := c.Get(key); v != nil {
store[key] = v
}
}
return &poolSafeContext{Context: c, store: store}
}
// Get returns the value stored in the detached store, not the embedded
// context's pooled store.
func (p *poolSafeContext) Get(key string) any {
p.mu.RLock()
defer p.mu.RUnlock()
return p.store[key]
}
// Set writes to the detached store, not the embedded context's pooled
// store. This prevents downstream middleware writes from leaking into a
// later request that claims the same pooled context.
func (p *poolSafeContext) Set(key string, val any) {
p.mu.Lock()
defer p.mu.Unlock()
p.store[key] = val
}
@@ -0,0 +1,60 @@
package webhook
import (
"log/slog"
"net/http"
"net/http/httptest"
"testing"
"github.com/labstack/echo/v4"
)
func TestPoolSafeContext_SurvivesUnderlyingReset(t *testing.T) {
e := echo.New()
req := httptest.NewRequest(http.MethodPost, "/", nil)
rec := httptest.NewRecorder()
c := e.NewContext(req, rec)
logger := slog.Default()
c.Set("logger", logger)
c.Set("correlationId", "abc-123")
detached := newPoolSafeContext(c, "logger", "correlationId", "missing")
// Simulate Echo recycling c for a concurrent request. Reset wipes the
// shared store, which is exactly the crash scenario the wrapper
// guards against.
c.Reset(httptest.NewRequest(http.MethodGet, "/", nil), httptest.NewRecorder())
if got, _ := detached.Get("logger").(*slog.Logger); got != logger {
t.Fatalf("logger = %v, want snapshotted default logger", got)
}
if got, _ := detached.Get("correlationId").(string); got != "abc-123" {
t.Fatalf("correlationId = %q, want %q", got, "abc-123")
}
if got := detached.Get("missing"); got != nil {
t.Fatalf("missing key returned %v, want nil", got)
}
// Underlying c must remain clean.
if c.Get("logger") != nil {
t.Fatalf("underlying c.Get(\"logger\") leaked wrapper state after reset")
}
}
func TestPoolSafeContext_SetDoesNotTouchUnderlying(t *testing.T) {
e := echo.New()
req := httptest.NewRequest(http.MethodPost, "/", nil)
rec := httptest.NewRecorder()
c := e.NewContext(req, rec)
detached := newPoolSafeContext(c)
detached.Set("foo", "bar")
if got, _ := detached.Get("foo").(string); got != "bar" {
t.Fatalf("detached Get = %q, want bar", got)
}
if c.Get("foo") != nil {
t.Fatalf("Set leaked %q to the underlying pooled context", "foo")
}
}