Files
gotenberg/pkg/modules/chromium/chromium.go
T

1109 lines
37 KiB
Go

package chromium
import (
"context"
"errors"
"fmt"
"log/slog"
"os"
"os/exec"
"strings"
"sync"
"syscall"
"time"
"github.com/alexliesenfeld/health"
"github.com/chromedp/cdproto/network"
"github.com/dlclark/regexp2"
flag "github.com/spf13/pflag"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/metric"
semconv "go.opentelemetry.io/otel/semconv/v1.40.0"
"go.opentelemetry.io/otel/trace"
"github.com/gotenberg/gotenberg/v8/pkg/gotenberg"
"github.com/gotenberg/gotenberg/v8/pkg/modules/api"
)
func init() {
gotenberg.MustRegisterModule(new(Chromium))
}
var (
// ErrInvalidEmulatedMediaType happens if the emulated media type is not
// "screen" nor "print". Empty value is allowed, though.
ErrInvalidEmulatedMediaType = errors.New("invalid emulated media type")
// ErrInvalidEvaluationExpression happens if an evaluation expression
// returns an exception or undefined.
ErrInvalidEvaluationExpression = errors.New("invalid evaluation expression")
// ErrInvalidSelectorQuery happens if a selector query returns an exception
// or undefined.
ErrInvalidSelectorQuery = errors.New("invalid selector query")
// ErrRpccMessageTooLarge happens when the messages received by
// ChromeDevTools are larger than 100 MB.
ErrRpccMessageTooLarge = errors.New("rpcc message too large")
// ErrInvalidHttpStatusCode happens when the status code from the main page
// matches with one of the entries in [Options.FailOnHttpStatusCodes].
ErrInvalidHttpStatusCode = errors.New("invalid HTTP status code")
// ErrInvalidResourceHttpStatusCode happens when the status code from one
// or more resources matches with one of the entries in
// [Options.FailOnResourceHttpStatusCodes].
ErrInvalidResourceHttpStatusCode = errors.New("invalid resource HTTP status code")
// ErrConsoleExceptions happens when there are exceptions in the Chromium
// console. It also happens only if the [Options.FailOnConsoleExceptions]
// is set to true.
ErrConsoleExceptions = errors.New("console exceptions")
// ErrLoadingFailed happens when the main page failed to load.
ErrLoadingFailed = errors.New("loading failed")
// ErrResourceLoadingFailed happens when one or more resources failed to load.
ErrResourceLoadingFailed = errors.New("resource loading failed")
// PDF specific.
// ErrOmitBackgroundWithoutPrintBackground happens if
// PdfOptions.OmitBackground is set to true but not PdfOptions.PrintBackground.
ErrOmitBackgroundWithoutPrintBackground = errors.New("omit background without print background")
// ErrPrintingFailed happens if the printing failed for an unknown reason.
ErrPrintingFailed = errors.New("printing failed")
// ErrInvalidPrinterSettings happens if the PdfOptions have one or more
// aberrant values.
ErrInvalidPrinterSettings = errors.New("invalid printer settings")
// ErrPageRangesSyntaxError happens if the PdfOptions page
// range syntax is invalid.
ErrPageRangesSyntaxError = errors.New("page ranges syntax error")
// ErrPageRangesExceedsPageCount happens if the PdfOptions have an invalid
// page range.
ErrPageRangesExceedsPageCount = errors.New("page ranges exceeds page count")
)
// Chromium is a module that provides both an [Api] and routes for converting
// an HTML document to PDF.
type Chromium struct {
autoStart bool
disableRoutes bool
maxConcurrency int64
args browserArguments
logger *slog.Logger
browser browser
supervisor gotenberg.ProcessSupervisor
engine gotenberg.PdfEngine
version string
versionOnce sync.Once
reqsCounter metric.Int64Counter
errsCounter metric.Int64Counter
conversionDurationCounter metric.Float64Histogram
queueWaitDurationCounter metric.Float64Histogram
pdfOutputSizeCounter metric.Int64Histogram
imageOutputSizeCounter metric.Int64Histogram
networkRequestsCounter metric.Int64Counter
networkBytesCounter metric.Int64Histogram
}
// Options are the common options for all conversions.
type Options struct {
// SkipNetworkIdleEvent set if the conversion should wait for the
// "networkIdle" event, drastically improving the conversion speed. It may
// not be suitable for all HTML documents, as some may not be fully
// rendered until this event is fired.
SkipNetworkIdleEvent bool
// SkipNetworkAlmostIdleEvent set if the conversion should wait for the
// "networkAlmostIdle" event.
SkipNetworkAlmostIdleEvent bool
// FailOnHttpStatusCodes sets if the conversion should fail if the status
// code from the main page matches with one of its entries.
FailOnHttpStatusCodes []int64
// FailOnResourceHttpStatusCodes sets if the conversion should fail if the
// status code from at least one resource matches with one if its entries.
FailOnResourceHttpStatusCodes []int64
// IgnoreResourceHttpStatusDomains excludes resources whose hostname matches
// one of these domains from the application of
// [Options.FailOnResourceHttpStatusCodes].
//
// A match happens if the hostname equals the domain or is a subdomain of it
// (e.g., "browser.sentry-cdn.com" matches "sentry-cdn.com").
//
// Values are normalized (trimmed, lowercased) and may be provided as:
// - "example.com"
// - "*.example.com" or ".example.com"
// - "example.com:443" (port is ignored)
// - "https://example.com/path" (scheme/path are ignored)
IgnoreResourceHttpStatusDomains []string
// FailOnResourceLoadingFailed sets if the conversion should fail like the
// main page if Chromium fails to load at least one resource.
FailOnResourceLoadingFailed bool
// FailOnConsoleExceptions sets if the conversion should fail if there are
// exceptions in the Chromium console.
FailOnConsoleExceptions bool
// WaitDelay is the duration to wait when loading an HTML document before
// converting it.
WaitDelay time.Duration
// WaitWindowStatus is the window.status value to wait for before
// converting an HTML document.
WaitWindowStatus string
// WaitForExpression is the custom JavaScript expression to wait before
// converting an HTML document until it returns true
WaitForExpression string
// WaitForSelector is the element query to wait until visible before
// converting an HTML document.
WaitForSelector string
// Cookies are the cookies to put in the Chromium cookies' jar.
Cookies []Cookie
// UserAgent overrides the default 'User-Agent' HTTP header.
UserAgent string
// ExtraHttpHeaders are extra HTTP headers to send by Chromium while
// loading the HTML document.
ExtraHttpHeaders []ExtraHttpHeader
// EmulatedMediaType is the media type to emulate, either "screen" or
// "print".
EmulatedMediaType string
// EmulatedMediaFeatures are the media features to emulate, e.g.,
// [{"name": "prefers-color-scheme", "value": "dark"}].
EmulatedMediaFeatures []EmulatedMediaFeature
// OmitBackground hides the default white background and allows generating
// PDFs with transparency.
OmitBackground bool
// AllowedFilePrefixes restricts file:// sub-resource access to only
// these directory prefixes. Applied in listenForEventRequestPaused in
// addition to the global allow/deny lists. An empty slice
// default-denies every file:// sub-resource, so routes that legitimately
// render local files (HTML, Markdown) must populate this with the
// request working directory while routes that navigate remote URLs
// leave it empty. Set internally by route handlers, not via form data.
AllowedFilePrefixes []string
}
// EmulatedMediaFeature gathers the available entries for emulating a media
// feature.
type EmulatedMediaFeature struct {
// Name is the media feature name (e.g., "prefers-color-scheme",
// "prefers-reduced-motion").
// Required.
Name string `json:"name"`
// Value is the media feature value (e.g., "dark", "reduce").
// Required.
Value string `json:"value"`
}
// DefaultOptions returns the default values for Options.
func DefaultOptions() Options {
return Options{
SkipNetworkIdleEvent: true,
SkipNetworkAlmostIdleEvent: true,
FailOnHttpStatusCodes: []int64{499, 599},
FailOnResourceHttpStatusCodes: nil,
IgnoreResourceHttpStatusDomains: nil,
FailOnResourceLoadingFailed: false,
FailOnConsoleExceptions: false,
WaitDelay: 0,
WaitWindowStatus: "",
WaitForExpression: "",
WaitForSelector: "",
Cookies: nil,
UserAgent: "",
ExtraHttpHeaders: nil,
EmulatedMediaType: "",
EmulatedMediaFeatures: nil,
OmitBackground: false,
}
}
// PdfOptions are the available options for converting an HTML document to PDF.
type PdfOptions struct {
Options
// Landscape sets the paper orientation.
Landscape bool
// PrintBackground prints the background graphics.
PrintBackground bool
// Scale is the scale of the page rendering.
Scale float64
// SinglePage defines whether to print the entire content in one single
// page.
SinglePage bool
// PaperWidth is the paper width, in inches.
PaperWidth float64
// PaperHeight is the paper height, in inches.
PaperHeight float64
// MarginTop is the top margin, in inches.
MarginTop float64
// MarginBottom is the bottom margin, in inches.
MarginBottom float64
// MarginLeft is the left margin, in inches.
MarginLeft float64
// MarginRight is the right margin, in inches.
MarginRight float64
// Page ranges to print, e.g., '1-5, 8, 11-13'. Empty means all pages.
PageRanges string
// HeaderTemplate is the HTML template of the header. It should be a valid
// HTML markup with the following classes used to inject printing values
// into them:
// - date: formatted print date
// - title: document title
// - url: document location
// - pageNumber: current page number
// - totalPages: total pages in the document
// For example, <span class=title></span> would generate span containing
// the title.
HeaderTemplate string
// FooterTemplate is the HTML template of the footer. It should use the
// same format as the HeaderTemplate.
FooterTemplate string
// PreferCssPageSize defines whether to prefer page size as defined by CSS.
// If false, the content will be scaled to fit the paper size.
PreferCssPageSize bool
// GenerateDocumentOutline defines whether the document outline should be
// embedded into the PDF.
GenerateDocumentOutline bool
// GenerateTaggedPdf defines whether to generate tagged (accessible)
// PDF.
GenerateTaggedPdf bool
}
// DefaultPdfOptions returns the default values for PdfOptions.
func DefaultPdfOptions() PdfOptions {
return PdfOptions{
Options: DefaultOptions(),
Landscape: false,
PrintBackground: false,
Scale: 1.0,
SinglePage: false,
PaperWidth: 8.5,
PaperHeight: 11,
MarginTop: 0.39,
MarginBottom: 0.39,
MarginLeft: 0.39,
MarginRight: 0.39,
PageRanges: "",
HeaderTemplate: "<html><head></head><body></body></html>",
FooterTemplate: "<html><head></head><body></body></html>",
PreferCssPageSize: false,
GenerateDocumentOutline: false,
GenerateTaggedPdf: false,
}
}
// ScreenshotOptions are the available options for capturing a screenshot from
// an HTML document.
type ScreenshotOptions struct {
Options
// Width is the device screen width in pixels.
Width int
// Height is the device screen height in pixels.
Height int
// Clip defines whether to clip the screenshot according to the device
// dimensions.
Clip bool
// Format is the image compression format, either "png" or "jpeg" or
// "webp".
Format string
// Quality is the compression quality from range [0..100] (jpeg only).
Quality int
// OptimizeForSpeed defines whether to optimize image encoding for speed,
// not for resulting size.
OptimizeForSpeed bool
// DeviceScaleFactor is the ratio of the resolution in physical pixels to
// the resolution in CSS pixels for the current display device.
DeviceScaleFactor float64
}
// DefaultScreenshotOptions returns the default values for ScreenshotOptions.
func DefaultScreenshotOptions() ScreenshotOptions {
return ScreenshotOptions{
Options: DefaultOptions(),
Width: 800,
Height: 600,
Clip: false,
Format: "png",
Quality: 100,
OptimizeForSpeed: false,
DeviceScaleFactor: 1.0,
}
}
// Cookie gathers the available entries for setting a cookie in the Chromium
// cookies' jar.
type Cookie struct {
// Name is the cookie name.
// Required.
Name string `json:"name"`
// Value is the cookie value.
// Required.
Value string `json:"value"`
// Domain is the cookie domain.
// Required.
Domain string `json:"domain"`
// Path is the cookie path.
// Optional.
Path string `json:"path,omitempty"`
// Secure sets the cookie secure if true.
// Optional.
Secure bool `json:"secure,omitempty"`
// HttpOnly sets the cookie as HTTP-only if true.
// Optional.
HttpOnly bool `json:"httpOnly,omitempty"`
// SameSite is cookie 'Same-Site' status.
// Optional.
SameSite network.CookieSameSite `json:"sameSite,omitempty"`
}
// ExtraHttpHeader are extra HTTP headers to send by Chromium.
type ExtraHttpHeader struct {
// Name is the header name.
// Required.
Name string
// Value is the header value.
// Required.
Value string
// Scope is the header scope. If nil, the header will be applied to ALL
// requests from the page.
// Optional.
Scope *regexp2.Regexp
}
// Api helps to interact with Chromium for converting HTML documents to PDF.
type Api interface {
Pdf(ctx context.Context, logger *slog.Logger, url, outputPath string, options PdfOptions) error
Screenshot(ctx context.Context, logger *slog.Logger, url, outputPath string, options ScreenshotOptions) error
}
// Provider is a module interface that exposes a method for creating an [Api]
// for other modules.
//
// func (m *YourModule) Provision(ctx *gotenberg.Context) error {
// provider, _ := ctx.Module(new(chromium.Provider))
// api, _ := provider.(chromium.Provider).Chromium()
// }
type Provider interface {
Chromium() (Api, error)
}
// Descriptor returns a [Chromium]'s module descriptor.
func (mod *Chromium) Descriptor() gotenberg.ModuleDescriptor {
return gotenberg.ModuleDescriptor{
ID: "chromium",
FlagSet: func() *flag.FlagSet {
fs := flag.NewFlagSet("chromium", flag.ExitOnError)
fs.Int64("chromium-restart-after", 100, "Number of conversions after which Chromium will automatically restart. Set to 0 to disable this feature")
fs.Int64("chromium-max-queue-size", 0, "Maximum request queue size for Chromium. Set to 0 to disable this feature")
fs.Duration("chromium-idle-shutdown-timeout", 0, "Shutdown Chromium after being idle for the given duration. Set to 0 to disable this feature")
fs.Int64("chromium-max-concurrency", 6, "Maximum number of concurrent conversions. Chromium supports up to 6")
fs.Bool("chromium-auto-start", false, "Automatically launch Chromium upon initialization if set to true; otherwise, Chromium will start at the time of the first conversion")
fs.Duration("chromium-start-timeout", time.Duration(20)*time.Second, "Maximum duration to wait for Chromium to start or restart")
fs.Bool("chromium-allow-insecure-localhost", false, "Ignore TLS/SSL errors on localhost")
fs.Bool("chromium-ignore-certificate-errors", false, "Ignore the certificate errors")
fs.Bool("chromium-disable-web-security", false, "Don't enforce the same-origin policy")
fs.Bool("chromium-allow-file-access-from-files", false, "Allow file:// URIs to read other file:// URIs")
fs.String("chromium-host-resolver-rules", "", "Set custom mappings to the host resolver")
fs.String("chromium-proxy-server", "", "Set the outbound proxy server; this switch only affects HTTP and HTTPS requests")
fs.StringSlice("chromium-allow-list", []string{}, "Set the allowed URLs for Chromium using regular expressions - supports multiple values")
fs.StringSlice("chromium-deny-list", []string{`^file:(?!//\/tmp/).*`}, "Set the denied URLs for Chromium using regular expressions - supports multiple values")
fs.Bool("chromium-deny-private-ips", false, "Reject URLs whose host resolves to a non-public IP address (loopback, RFC1918, link-local, unique-local). Enable on deployments that accept untrusted form input to mitigate SSRF against internal services")
fs.Bool("chromium-deny-public-ips", false, "Reject URLs whose host resolves to a public IP address. Enable on air-gapped or data-governed deployments to prevent outbound traffic from leaving a private network")
fs.Bool("chromium-clear-cache", false, "Clear Chromium cache between each conversion")
fs.Bool("chromium-clear-cookies", false, "Clear Chromium cookies between each conversion")
fs.Bool("chromium-disable-javascript", false, "Disable JavaScript")
fs.Bool("chromium-disable-routes", false, "Disable the routes")
// Deprecated flags.
fs.Bool("chromium-incognito", false, "Start Chromium with incognito mode")
err := fs.MarkDeprecated("chromium-incognito", "this flag is ignored as it provides no benefits")
if err != nil {
panic(err)
}
return fs
}(),
New: func() gotenberg.Module { return new(Chromium) },
}
}
// Provision sets the module properties.
func (mod *Chromium) Provision(ctx *gotenberg.Context) error {
flags := ctx.ParsedFlags()
mod.autoStart = flags.MustBool("chromium-auto-start")
mod.disableRoutes = flags.MustBool("chromium-disable-routes")
mod.maxConcurrency = flags.MustInt64("chromium-max-concurrency")
binPath, ok := os.LookupEnv("CHROMIUM_BIN_PATH")
if !ok {
return errors.New("CHROMIUM_BIN_PATH environment variable is not set; set it to the absolute path of the Chromium or Chrome binary")
}
hyphenDataDirPath, ok := os.LookupEnv("CHROMIUM_HYPHEN_DATA_DIR_PATH")
if !ok {
return errors.New("CHROMIUM_HYPHEN_DATA_DIR_PATH environment variable is not set; set it to the absolute path of the Chromium hyphenation data directory (it ships in the Gotenberg image)")
}
mod.args = browserArguments{
binPath: binPath,
allowInsecureLocalhost: flags.MustBool("chromium-allow-insecure-localhost"),
ignoreCertificateErrors: flags.MustBool("chromium-ignore-certificate-errors"),
disableWebSecurity: flags.MustBool("chromium-disable-web-security"),
allowFileAccessFromFiles: flags.MustBool("chromium-allow-file-access-from-files"),
hostResolverRules: flags.MustString("chromium-host-resolver-rules"),
proxyServer: flags.MustString("chromium-proxy-server"),
wsUrlReadTimeout: flags.MustDuration("chromium-start-timeout"),
hyphenDataDirPath: hyphenDataDirPath,
allowList: flags.MustRegexpSlice("chromium-allow-list"),
denyList: flags.MustRegexpSlice("chromium-deny-list"),
denyPrivateIPs: flags.MustBool("chromium-deny-private-ips"),
denyPublicIPs: flags.MustBool("chromium-deny-public-ips"),
clearCache: flags.MustBool("chromium-clear-cache"),
clearCookies: flags.MustBool("chromium-clear-cookies"),
disableJavaScript: flags.MustBool("chromium-disable-javascript"),
}
// Logger.
mod.logger = gotenberg.Logger(mod).With(slog.String("logger", "browser"))
// Process.
mod.browser = newChromiumBrowser(mod.args)
mod.supervisor = gotenberg.NewProcessSupervisor(mod.logger, "chromium", mod.browser, flags.MustInt64("chromium-restart-after"), flags.MustInt64("chromium-max-queue-size"), mod.maxConcurrency, flags.MustDuration("chromium-idle-shutdown-timeout"))
// PDF Engine.
provider, err := ctx.Module(new(gotenberg.PdfEngineProvider))
if err != nil {
return fmt.Errorf("get PDF engine provider: %w", err)
}
engine, err := provider.(gotenberg.PdfEngineProvider).PdfEngine()
if err != nil {
return fmt.Errorf("get PDF engine: %w", err)
}
mod.engine = engine
// Metrics.
meter := gotenberg.Meter()
// Observable gauges.
_, err = meter.Int64ObservableGauge(
"chromium.requests.active",
metric.WithDescription("Current number of active Chromium requests"),
metric.WithUnit("{request}"),
metric.WithInt64Callback(func(_ context.Context, o metric.Int64Observer) error {
o.Observe(mod.supervisor.ActiveTasksCount())
return nil
}),
)
if err != nil {
return fmt.Errorf("create chromium.requests.active gauge: %w", err)
}
_, err = meter.Int64ObservableGauge(
"chromium.requests.queue_size",
metric.WithDescription("Current number of Chromium conversion requests waiting to be treated"),
metric.WithUnit("{request}"),
metric.WithInt64Callback(func(_ context.Context, o metric.Int64Observer) error {
o.Observe(mod.supervisor.ReqQueueSize())
return nil
}),
)
if err != nil {
return fmt.Errorf("create chromium.requests.queue_size gauge: %w", err)
}
_, err = meter.Int64ObservableCounter(
"chromium.process.restarts.total",
metric.WithDescription("Current number of Chromium restarts"),
metric.WithUnit("{restart}"),
metric.WithInt64Callback(func(_ context.Context, o metric.Int64Observer) error {
o.Observe(mod.supervisor.RestartsCount())
return nil
}),
)
if err != nil {
return fmt.Errorf("create chromium.process.restarts.total counter: %w", err)
}
// Counters.
mod.reqsCounter, err = meter.Int64Counter(
"chromium.requests.total",
metric.WithDescription("Total number of Chromium conversion requests"),
metric.WithUnit("{request}"),
)
if err != nil {
return fmt.Errorf("create chromium.requests.total counter: %w", err)
}
mod.errsCounter, err = meter.Int64Counter(
"chromium.errors.total",
metric.WithDescription("Total number of Chromium conversion errors"),
metric.WithUnit("{error}"),
)
if err != nil {
return fmt.Errorf("create chromium.errors.total counter: %w", err)
}
// Histograms.
durationBuckets := metric.WithExplicitBucketBoundaries(0.5, 1, 2, 5, 10, 30, 60)
mod.conversionDurationCounter, err = meter.Float64Histogram(
"chromium.conversion.duration",
metric.WithDescription("Duration of Chromium conversions"),
metric.WithUnit("s"),
durationBuckets,
)
if err != nil {
return fmt.Errorf("create chromium.conversion.duration histogram: %w", err)
}
mod.queueWaitDurationCounter, err = meter.Float64Histogram(
"chromium.queue.wait.duration",
metric.WithDescription("Duration of waiting in queue for Chromium conversions"),
metric.WithUnit("s"),
durationBuckets,
)
if err != nil {
return fmt.Errorf("create chromium.queue.wait.duration histogram: %w", err)
}
mod.pdfOutputSizeCounter, err = meter.Int64Histogram(
"chromium.pdf.output.size",
metric.WithDescription("Size of PDF output from Chromium conversions"),
metric.WithUnit("By"),
)
if err != nil {
return fmt.Errorf("create chromium.pdf.output.size histogram: %w", err)
}
mod.imageOutputSizeCounter, err = meter.Int64Histogram(
"chromium.image.output.size",
metric.WithDescription("Size of image output from Chromium screenshots"),
metric.WithUnit("By"),
)
if err != nil {
return fmt.Errorf("create chromium.image.output.size histogram: %w", err)
}
mod.networkRequestsCounter, err = meter.Int64Counter(
"chromium.network.requests.total",
metric.WithDescription("Total number of network requests made during Chromium conversions"),
metric.WithUnit("{request}"),
)
if err != nil {
return fmt.Errorf("create chromium.network.requests.total counter: %w", err)
}
mod.networkBytesCounter, err = meter.Int64Histogram(
"chromium.network.bytes",
metric.WithDescription("Bytes fetched over the network during a Chromium conversion"),
metric.WithUnit("By"),
)
if err != nil {
return fmt.Errorf("create chromium.network.bytes histogram: %w", err)
}
return nil
}
// Validate validates the module properties.
func (mod *Chromium) Validate() error {
if mod.maxConcurrency < 1 || mod.maxConcurrency > 6 {
return fmt.Errorf("chromium-max-concurrency must be between 1 and 6, got %d", mod.maxConcurrency)
}
_, err := os.Stat(mod.args.binPath)
if os.IsNotExist(err) {
return fmt.Errorf("Chromium binary does not exist at %q; check the CHROMIUM_BIN_PATH environment variable: %w", mod.args.binPath, err)
}
_, err = os.Stat(mod.args.hyphenDataDirPath)
if os.IsNotExist(err) {
return fmt.Errorf("Chromium hyphenation data directory does not exist at %q; check the CHROMIUM_HYPHEN_DATA_DIR_PATH environment variable (it ships in the Gotenberg image): %w", mod.args.hyphenDataDirPath, err)
}
return nil
}
// Start does nothing if auto-start is not enabled. Otherwise, it starts a
// browser instance.
func (mod *Chromium) Start() error {
if !mod.autoStart {
return nil
}
err := mod.supervisor.Launch()
if err != nil {
return fmt.Errorf("launch supervisor: %w", err)
}
return nil
}
// StartupMessage returns a custom startup message.
func (mod *Chromium) StartupMessage() string {
if !mod.autoStart {
return "Chromium ready to start"
}
return "Chromium automatically started"
}
// Stop stops the current browser instance.
func (mod *Chromium) Stop(ctx context.Context) error {
// Block until the context is done so that another module may gracefully
// stop before we do a shutdown.
mod.logger.DebugContext(ctx, "wait for the end of grace duration")
<-ctx.Done()
err := mod.supervisor.Shutdown()
if err == nil {
return nil
}
return fmt.Errorf("stop Chromium: %w", err)
}
// Debug returns additional debug data.
func (mod *Chromium) Debug() map[string]any {
return map[string]any{"version": mod.detectVersion()}
}
// detectVersion resolves the Chromium version once, preferring the value
// captured at image build time so it never spawns Chromium at runtime. It falls
// back to running chromium --version for local or non-Docker builds.
func (mod *Chromium) detectVersion() string {
mod.versionOnce.Do(func() {
if v, ok := gotenberg.BuildVersion("chromium"); ok {
mod.version = v
return
}
cmd := exec.Command(mod.args.binPath, "--version") //nolint:gosec
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
output, err := cmd.Output()
if err != nil {
mod.version = err.Error()
return
}
mod.version = strings.TrimSpace(string(output))
})
return mod.version
}
// spanAttrs returns the client-span attributes for a Chromium invocation: the
// server address and the Chromium version, plus any extra attributes. The
// version rides on every conversion span so a trace records which Chromium
// rendered the document.
func (mod *Chromium) spanAttrs(extra ...attribute.KeyValue) []attribute.KeyValue {
attrs := make([]attribute.KeyValue, 0, 2+len(extra))
attrs = append(attrs, semconv.ServerAddress(mod.args.binPath))
if v := mod.detectVersion(); v != "" {
attrs = append(attrs, attribute.String("gotenberg.chromium.version", v))
}
return append(attrs, extra...)
}
// Metrics returns the metrics.
func (mod *Chromium) Metrics() ([]gotenberg.Metric, error) {
return []gotenberg.Metric{
{
Name: "chromium_requests_queue_size",
Description: "Current number of Chromium conversion requests waiting to be treated.",
Read: func() float64 {
return float64(mod.supervisor.ReqQueueSize())
},
},
{
Name: "chromium_restarts_count",
Description: "Current number of Chromium restarts.",
Read: func() float64 {
return float64(mod.supervisor.RestartsCount())
},
},
}, nil
}
// Checks adds a health check that verifies if Chromium is healthy.
func (mod *Chromium) Checks() ([]health.CheckerOption, error) {
return []health.CheckerOption{
health.WithCheck(health.Check{
Name: "chromium",
Check: func(_ context.Context) error {
if mod.supervisor.Healthy() {
return nil
}
return errors.New("Chromium is unhealthy")
},
}),
}, nil
}
// Ready returns no error if the module is ready.
func (mod *Chromium) Ready() error {
if !mod.autoStart {
return nil
}
ctx, cancel := context.WithTimeout(context.Background(), mod.args.wsUrlReadTimeout)
defer cancel()
ticker := time.NewTicker(time.Duration(100) * time.Millisecond)
for {
select {
case <-ctx.Done():
ticker.Stop()
return fmt.Errorf("context done while waiting for Chromium to be ready: %w", ctx.Err())
case <-ticker.C:
ok := mod.browser.Healthy(mod.logger)
if ok {
ticker.Stop()
return nil
}
continue
}
}
}
// Chromium returns an [Api] for interacting with Chromium for converting HTML
// documents to PDF.
func (mod *Chromium) Chromium() (Api, error) {
return mod, nil
}
// Routes returns the HTTP routes.
func (mod *Chromium) Routes() ([]api.Route, error) {
if mod.disableRoutes {
return nil, nil
}
return []api.Route{
convertUrlRoute(mod, mod.engine),
screenshotUrlRoute(mod),
convertHtmlRoute(mod, mod.engine),
screenshotHtmlRoute(mod),
convertMarkdownRoute(mod, mod.engine),
screenshotMarkdownRoute(mod),
}, nil
}
// Pdf converts a URL to PDF.
//
//nolint:dupl
func (mod *Chromium) Pdf(ctx context.Context, logger *slog.Logger, url, outputPath string, options PdfOptions) error {
// Read input attributes before Start rebinds ctx to the span context, which
// would shadow the underlying [api.Context].
inputAttrs := conversionInputAttrs(ctx, url)
ctx, span := gotenberg.Tracer().Start(ctx, "chromium.Pdf",
trace.WithSpanKind(trace.SpanKindClient),
trace.WithAttributes(mod.spanAttrs()...),
)
defer span.End()
span.SetAttributes(inputAttrs...)
span.SetAttributes(
attribute.Int64("gotenberg.queue.depth_at_arrival", mod.supervisor.ReqQueueSize()),
attribute.Int64("gotenberg.conversions_since_last_restart", mod.supervisor.ConversionsSinceRestart()),
)
start := time.Now()
var conversionStart time.Time
aggregate := newNetworkAggregate()
err := mod.supervisor.Run(ctx, logger, func() error {
conversionStart = time.Now()
return mod.browser.pdf(ctx, logger, url, outputPath, options, aggregate)
})
end := time.Now()
status := "success"
if err != nil {
if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
status = "timeout"
} else {
status = "error"
}
reason := chromiumErrorType(err, "chromium_unavailable")
mod.errsCounter.Add(ctx, 1, metric.WithAttributes(
attribute.String("reason", reason),
))
gotenberg.SpanErrorType(span, reason)
}
if !conversionStart.IsZero() {
waitDuration := conversionStart.Sub(start).Seconds()
conversionDuration := end.Sub(conversionStart).Seconds()
mod.queueWaitDurationCounter.Record(ctx, waitDuration, metric.WithAttributes(
attribute.String("status", status),
))
mod.conversionDurationCounter.Record(ctx, conversionDuration, metric.WithAttributes(
attribute.String("status", status),
))
} else {
waitDuration := end.Sub(start).Seconds()
mod.queueWaitDurationCounter.Record(ctx, waitDuration, metric.WithAttributes(
attribute.String("status", status),
))
}
mod.reqsCounter.Add(ctx, 1, metric.WithAttributes(
attribute.String("status", status),
))
mod.recordNetwork(ctx, span, aggregate)
if err == nil {
if fileInfo, statErr := os.Stat(outputPath); statErr == nil {
mod.pdfOutputSizeCounter.Record(ctx, fileInfo.Size())
span.SetAttributes(attribute.Int64("gotenberg.conversion.output.bytes", fileInfo.Size()))
}
span.SetStatus(codes.Ok, "")
return nil
}
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
return err
}
// Screenshot captures a screenshot from a URL.
//
//nolint:dupl
func (mod *Chromium) Screenshot(ctx context.Context, logger *slog.Logger, url, outputPath string, options ScreenshotOptions) error {
ctx, span := gotenberg.Tracer().Start(ctx, "chromium.Screenshot",
trace.WithSpanKind(trace.SpanKindClient),
trace.WithAttributes(mod.spanAttrs()...),
)
defer span.End()
span.SetAttributes(
attribute.Int64("gotenberg.queue.depth_at_arrival", mod.supervisor.ReqQueueSize()),
attribute.Int64("gotenberg.conversions_since_last_restart", mod.supervisor.ConversionsSinceRestart()),
)
start := time.Now()
var conversionStart time.Time
aggregate := newNetworkAggregate()
err := mod.supervisor.Run(ctx, logger, func() error {
conversionStart = time.Now()
return mod.browser.screenshot(ctx, logger, url, outputPath, options, aggregate)
})
end := time.Now()
status := "success"
if err != nil {
if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
status = "timeout"
} else {
status = "error"
}
reason := chromiumErrorType(err, "chromium_maximum_queue_size_exceeded")
mod.errsCounter.Add(ctx, 1, metric.WithAttributes(
attribute.String("reason", reason),
))
gotenberg.SpanErrorType(span, reason)
}
if !conversionStart.IsZero() {
waitDuration := conversionStart.Sub(start).Seconds()
conversionDuration := end.Sub(conversionStart).Seconds()
mod.queueWaitDurationCounter.Record(ctx, waitDuration, metric.WithAttributes(
attribute.String("status", status),
))
mod.conversionDurationCounter.Record(ctx, conversionDuration, metric.WithAttributes(
attribute.String("status", status),
))
} else {
waitDuration := end.Sub(start).Seconds()
mod.queueWaitDurationCounter.Record(ctx, waitDuration, metric.WithAttributes(
attribute.String("status", status),
))
}
mod.reqsCounter.Add(ctx, 1, metric.WithAttributes(
attribute.String("status", status),
))
mod.recordNetwork(ctx, span, aggregate)
if err == nil {
if fileInfo, statErr := os.Stat(outputPath); statErr == nil {
mod.imageOutputSizeCounter.Record(ctx, fileInfo.Size())
}
span.SetStatus(codes.Ok, "")
return nil
}
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
return err
}
// recordNetwork lifts per-conversion network aggregates onto the span and the
// network metrics. Counts are dimensioned by outcome and bytes feed a
// histogram; both are recorded with the conversion context so the SDK attaches
// trace exemplars. The heaviest resource URL is redacted before it lands on the
// span event.
func (mod *Chromium) recordNetwork(ctx context.Context, span trace.Span, aggregate *networkAggregate) {
if aggregate == nil {
return
}
stats := aggregate.snapshot()
span.SetAttributes(
attribute.Int64("gotenberg.chromium.resources.count", stats.requestCount),
attribute.Int64("gotenberg.chromium.resources.bytes_total", stats.bytesTotal),
attribute.Int64("gotenberg.chromium.resources.failed_count", stats.failedCount),
attribute.Int64("gotenberg.chromium.resources.unique_origins", stats.uniqueOrigins),
)
if stats.heaviestURL != "" {
span.AddEvent("chromium.heaviest_resource", trace.WithAttributes(
attribute.String("url", gotenberg.RedactURL(stats.heaviestURL)),
attribute.Int64("bytes", stats.heaviestBytes),
))
}
if ok := stats.requestCount - stats.failedCount; ok > 0 {
mod.networkRequestsCounter.Add(ctx, ok, metric.WithAttributes(attribute.String("outcome", "ok")))
}
if stats.failedCount > 0 {
mod.networkRequestsCounter.Add(ctx, stats.failedCount, metric.WithAttributes(attribute.String("outcome", "failed")))
}
mod.networkBytesCounter.Record(ctx, stats.bytesTotal)
}
// conversionInputAttrs derives low-cardinality input attributes for a
// conversion span: the number of received files (when ctx is an [api.Context])
// and the size of the local HTML input (when url is a file:// URL). Remote URL
// conversions yield no html.bytes.
func conversionInputAttrs(ctx context.Context, url string) []attribute.KeyValue {
var attrs []attribute.KeyValue
if apiCtx, ok := ctx.(*api.Context); ok {
attrs = append(attrs, attribute.Int("gotenberg.conversion.input.files.count", apiCtx.FileCount()))
}
if after, ok := strings.CutPrefix(url, "file://"); ok {
if info, err := os.Stat(after); err == nil {
attrs = append(attrs, attribute.Int64("gotenberg.conversion.input.html.bytes", info.Size()))
}
}
return attrs
}
// chromiumErrorType maps a conversion error to chromium's bounded reason value,
// reused as the span error.type. queueReason preserves the historical,
// route-specific label for a saturated queue: Pdf collapses it into
// "chromium_unavailable", whereas Screenshot keeps
// "chromium_maximum_queue_size_exceeded". Generic failures fall back to
// [gotenberg.ClassifyError].
func chromiumErrorType(err error, queueReason string) string {
switch {
case errors.Is(err, ErrInvalidHttpStatusCode),
errors.Is(err, ErrInvalidResourceHttpStatusCode),
errors.Is(err, ErrLoadingFailed),
errors.Is(err, ErrResourceLoadingFailed),
errors.Is(err, ErrInvalidEvaluationExpression),
errors.Is(err, ErrInvalidSelectorQuery):
return gotenberg.ErrorTypeInvalidInput
case errors.Is(err, gotenberg.ErrMaximumQueueSizeExceeded):
return queueReason
case errors.Is(err, gotenberg.ErrProcessAlreadyRestarting):
return "chromium_unavailable"
default:
return gotenberg.ClassifyError(err)
}
}
// Interface guards.
var (
_ gotenberg.Module = (*Chromium)(nil)
_ gotenberg.Provisioner = (*Chromium)(nil)
_ gotenberg.Validator = (*Chromium)(nil)
_ gotenberg.App = (*Chromium)(nil)
_ gotenberg.Debuggable = (*Chromium)(nil)
_ gotenberg.MetricsProvider = (*Chromium)(nil)
_ api.HealthChecker = (*Chromium)(nil)
_ api.Router = (*Chromium)(nil)
_ Api = (*Chromium)(nil)
_ Provider = (*Chromium)(nil)
)