diff --git a/pkg/modules/chromium/browser.go b/pkg/modules/chromium/browser.go index 4eb63c1..0e8bebc 100644 --- a/pkg/modules/chromium/browser.go +++ b/pkg/modules/chromium/browser.go @@ -317,7 +317,11 @@ func (b *chromiumBrowser) pdf(ctx context.Context, logger *slog.Logger, url, out disableJavaScriptActionFunc(logger, b.arguments.disableJavaScript), setCookiesActionFunc(logger, options.Cookies), userAgentOverride(logger, options.UserAgent), - navigateActionFunc(logger, url, options.SkipNetworkIdleEvent, options.SkipNetworkAlmostIdleEvent), + navigateActionFunc(logger, url, navigateOptions{ + SkipNetworkIdleEvent: options.SkipNetworkIdleEvent, + SkipNetworkAlmostIdleEvent: options.SkipNetworkAlmostIdleEvent, + SkipLifecycleEvents: options.WaitForExpression != "" || options.WaitForSelector != "", + }), hideDefaultWhiteBackgroundActionFunc(logger, options.OmitBackground, options.PrintBackground), forceExactColorsActionFunc(logger, options.PrintBackground), emulateMediaTypeActionFunc(logger, options.EmulatedMediaType, options.EmulatedMediaFeatures), @@ -343,7 +347,11 @@ func (b *chromiumBrowser) screenshot(ctx context.Context, logger *slog.Logger, u disableJavaScriptActionFunc(logger, b.arguments.disableJavaScript), setCookiesActionFunc(logger, options.Cookies), userAgentOverride(logger, options.UserAgent), - navigateActionFunc(logger, url, options.SkipNetworkIdleEvent, options.SkipNetworkAlmostIdleEvent), + navigateActionFunc(logger, url, navigateOptions{ + SkipNetworkIdleEvent: options.SkipNetworkIdleEvent, + SkipNetworkAlmostIdleEvent: options.SkipNetworkAlmostIdleEvent, + SkipLifecycleEvents: options.WaitForExpression != "" || options.WaitForSelector != "", + }), hideDefaultWhiteBackgroundActionFunc(logger, options.OmitBackground, true), forceExactColorsActionFunc(logger, true), emulateMediaTypeActionFunc(logger, options.EmulatedMediaType, options.EmulatedMediaFeatures), diff --git a/pkg/modules/chromium/chromium.go b/pkg/modules/chromium/chromium.go index ecabb54..1fa2ac6 100644 --- a/pkg/modules/chromium/chromium.go +++ b/pkg/modules/chromium/chromium.go @@ -160,11 +160,22 @@ type Options struct { WaitWindowStatus string // WaitForExpression is the custom JavaScript expression to wait before - // converting an HTML document until it returns true + // converting an HTML document until it returns true. + // + // When set, Gotenberg skips the browser lifecycle events + // (DomContentEventFired, LoadEventFired, LoadingFinished, + // networkIdle, networkAlmostIdle) during navigation and treats the + // expression as the sole readiness signal. Encode any load-event + // dependency into the expression itself (for example + // "document.readyState === 'complete' && window.myAppReady"). WaitForExpression string // WaitForSelector is the element query to wait until visible before // converting an HTML document. + // + // When set, Gotenberg skips the browser lifecycle events during + // navigation and treats the selector visibility as the sole + // readiness signal. WaitForSelector string // Cookies are the cookies to put in the Chromium cookies' jar. diff --git a/pkg/modules/chromium/tasks.go b/pkg/modules/chromium/tasks.go index b37f50c..8111f3e 100644 --- a/pkg/modules/chromium/tasks.go +++ b/pkg/modules/chromium/tasks.go @@ -328,7 +328,32 @@ func userAgentOverride(logger *slog.Logger, userAgent string) chromedp.ActionFun // } // } -func navigateActionFunc(logger *slog.Logger, url string, skipNetworkIdleEvent, skipNetworkAlmostIdleEvent bool) chromedp.ActionFunc { +// navigateOptions carries the lifecycle-gating knobs for +// [navigateActionFunc]. +type navigateOptions struct { + // SkipNetworkIdleEvent, when true, skips the wait for the + // "networkIdle" lifecycle event. + SkipNetworkIdleEvent bool + + // SkipNetworkAlmostIdleEvent, when true, skips the wait for the + // "networkAlmostIdle" lifecycle event. + SkipNetworkAlmostIdleEvent bool + + // SkipLifecycleEvents, when true, returns as soon as the + // [page.Navigate] RPC ack returns. All lifecycle waits + // (DomContentEventFired, LoadEventFired, LoadingFinished, and + // both network-idle events) are bypassed. Callers set this when + // the operator provided an explicit readiness signal + // (waitForExpression or waitForSelector) that will gate the + // downstream print or screenshot action, so Gotenberg does not + // need to also impose its own lifecycle gate. Pages whose load + // lifecycle never fires cleanly (blocking scripts, streaming + // responses, misbehaving iframes) otherwise stall navigate and + // starve the explicit signal. + SkipLifecycleEvents bool +} + +func navigateActionFunc(logger *slog.Logger, url string, opts navigateOptions) chromedp.ActionFunc { return func(ctx context.Context) error { logger.DebugContext(ctx, fmt.Sprintf("navigate to '%s'", url)) @@ -337,19 +362,24 @@ func navigateActionFunc(logger *slog.Logger, url string, skipNetworkIdleEvent, s return fmt.Errorf("navigate to '%s': %w", url, err) } + if opts.SkipLifecycleEvents { + logger.DebugContext(ctx, "skipping lifecycle events; waitForExpression or waitForSelector gates readiness") + return nil + } + waitFunc := []func() error{ waitForEventDomContentEventFired(ctx, logger), waitForEventLoadEventFired(ctx, logger), waitForEventLoadingFinished(ctx, logger), } - if !skipNetworkIdleEvent { + if !opts.SkipNetworkIdleEvent { waitFunc = append(waitFunc, waitForEventNetworkIdle(ctx, logger)) } else { logger.DebugContext(ctx, "skipping network idle event") } - if !skipNetworkAlmostIdleEvent { + if !opts.SkipNetworkAlmostIdleEvent { waitFunc = append(waitFunc, waitForEventNetworkAlmostIdle(ctx, logger)) } else { logger.DebugContext(ctx, "skipping network almost idle event") diff --git a/test/integration/features/chromium_convert_html.feature b/test/integration/features/chromium_convert_html.feature index e8b1414..fe5c338 100644 --- a/test/integration/features/chromium_convert_html.feature +++ b/test/integration/features/chromium_convert_html.feature @@ -195,6 +195,29 @@ Feature: /forms/chromium/convert/html Wait delay > 2 seconds or expression window globalVar === 'ready' returns true. """ + Scenario: POST /forms/chromium/convert/html (waitForExpression skips lifecycle events) + Given I have a Gotenberg container with the following environment variable(s): + | LOG_LEVEL | debug | + When I make a "POST" request to Gotenberg at the "/forms/chromium/convert/html" endpoint with the following form data and header(s): + | files | testdata/feature-rich-html/index.html | file | + | waitForExpression | window.globalVar === 'ready' | field | + Then the response status code should be 200 + Then the response header "Content-Type" should be "application/pdf" + Then there should be 1 PDF(s) in the response + Then the Gotenberg container should log the following entries: + | skipping lifecycle events; waitForExpression or waitForSelector gates readiness | + + Scenario: POST /forms/chromium/convert/html (no readiness signal keeps lifecycle waits) + Given I have a Gotenberg container with the following environment variable(s): + | LOG_LEVEL | debug | + When I make a "POST" request to Gotenberg at the "/forms/chromium/convert/html" endpoint with the following form data and header(s): + | files | testdata/page-1-html/index.html | file | + Then the response status code should be 200 + Then the response header "Content-Type" should be "application/pdf" + Then there should be 1 PDF(s) in the response + Then the Gotenberg container should NOT log the following entries: + | skipping lifecycle events; waitForExpression or waitForSelector gates readiness | + Scenario: POST /forms/chromium/convert/html (Wait For Selector) Given I have a default Gotenberg container When I make a "POST" request to Gotenberg at the "/forms/chromium/convert/html" endpoint with the following form data and header(s):