mirror of
https://github.com/gotenberg/gotenberg.git
synced 2026-07-02 00:17:40 +08:00
492 lines
16 KiB
Go
492 lines
16 KiB
Go
package chromium
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"log/slog"
|
|
"os"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
cdprotobrowser "github.com/chromedp/cdproto/browser"
|
|
"github.com/chromedp/cdproto/fetch"
|
|
"github.com/chromedp/cdproto/network"
|
|
"github.com/chromedp/cdproto/page"
|
|
"github.com/chromedp/cdproto/runtime"
|
|
"github.com/chromedp/chromedp"
|
|
"github.com/dlclark/regexp2"
|
|
"github.com/shirou/gopsutil/v4/process"
|
|
|
|
"github.com/gotenberg/gotenberg/v8/pkg/gotenberg"
|
|
)
|
|
|
|
type browser interface {
|
|
gotenberg.Process
|
|
pdf(ctx context.Context, logger *slog.Logger, url, outputPath string, options PdfOptions) error
|
|
screenshot(ctx context.Context, logger *slog.Logger, url, outputPath string, options ScreenshotOptions) error
|
|
}
|
|
|
|
type browserArguments struct {
|
|
// Executor args.
|
|
binPath string
|
|
allowInsecureLocalhost bool
|
|
ignoreCertificateErrors bool
|
|
disableWebSecurity bool
|
|
allowFileAccessFromFiles bool
|
|
hostResolverRules string
|
|
proxyServer string
|
|
wsUrlReadTimeout time.Duration
|
|
hyphenDataDirPath string
|
|
|
|
// Tasks specific.
|
|
allowList []*regexp2.Regexp
|
|
denyList []*regexp2.Regexp
|
|
clearCache bool
|
|
clearCookies bool
|
|
disableJavaScript bool
|
|
}
|
|
|
|
type chromiumBrowser struct {
|
|
initialCtx context.Context
|
|
ctx context.Context
|
|
cancelFunc context.CancelFunc
|
|
userProfileDirPath string
|
|
ctxMu sync.RWMutex
|
|
isStarted atomic.Bool
|
|
|
|
arguments browserArguments
|
|
fs *gotenberg.FileSystem
|
|
}
|
|
|
|
func newChromiumBrowser(arguments browserArguments) browser {
|
|
b := &chromiumBrowser{
|
|
initialCtx: context.Background(),
|
|
arguments: arguments,
|
|
fs: gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)),
|
|
}
|
|
b.isStarted.Store(false)
|
|
|
|
return b
|
|
}
|
|
|
|
func (b *chromiumBrowser) Start(logger *slog.Logger) error {
|
|
if b.isStarted.Load() {
|
|
return errors.New("browser is already started")
|
|
}
|
|
|
|
debug := &debugLogger{logger: logger}
|
|
b.userProfileDirPath = b.fs.NewDirPath()
|
|
|
|
// See https://github.com/gotenberg/gotenberg/issues/1293.
|
|
err := os.MkdirAll(b.userProfileDirPath, 0o755)
|
|
if err != nil {
|
|
return fmt.Errorf("could not create user profile directory: %w", err)
|
|
}
|
|
err = os.Symlink(b.arguments.hyphenDataDirPath, fmt.Sprintf("%s/hyphen-data", b.userProfileDirPath))
|
|
if err != nil {
|
|
return fmt.Errorf("create symlink to hyphen-data directory: %w", err)
|
|
}
|
|
|
|
opts := append(chromedp.DefaultExecAllocatorOptions[:],
|
|
chromedp.CombinedOutput(debug),
|
|
chromedp.ExecPath(b.arguments.binPath),
|
|
chromedp.NoSandbox,
|
|
// See:
|
|
// https://github.com/puppeteer/puppeteer/issues/661
|
|
// https://github.com/puppeteer/puppeteer/issues/2410
|
|
chromedp.Flag("font-render-hinting", "none"),
|
|
chromedp.UserDataDir(b.userProfileDirPath),
|
|
// See https://github.com/gotenberg/gotenberg/issues/831.
|
|
chromedp.Flag("disable-pdf-tagging", true),
|
|
// See https://github.com/gotenberg/gotenberg/issues/1177.
|
|
chromedp.Flag("no-zygote", true),
|
|
chromedp.Flag("disable-dev-shm-usage", true),
|
|
// See https://github.com/gotenberg/gotenberg/issues/1293.
|
|
chromedp.Flag("disable-component-update", false),
|
|
)
|
|
|
|
if b.arguments.allowInsecureLocalhost {
|
|
// See https://github.com/gotenberg/gotenberg/issues/488.
|
|
opts = append(opts, chromedp.Flag("allow-insecure-localhost", true))
|
|
}
|
|
|
|
if b.arguments.ignoreCertificateErrors {
|
|
opts = append(opts, chromedp.IgnoreCertErrors)
|
|
}
|
|
|
|
if b.arguments.disableWebSecurity {
|
|
opts = append(opts, chromedp.Flag("disable-web-security", true))
|
|
}
|
|
|
|
if b.arguments.allowFileAccessFromFiles {
|
|
// See https://github.com/gotenberg/gotenberg/issues/356.
|
|
opts = append(opts, chromedp.Flag("allow-file-access-from-files", true))
|
|
}
|
|
|
|
if b.arguments.hostResolverRules != "" {
|
|
// See https://github.com/gotenberg/gotenberg/issues/488.
|
|
opts = append(opts, chromedp.Flag("host-resolver-rules", b.arguments.hostResolverRules))
|
|
}
|
|
|
|
if b.arguments.proxyServer != "" {
|
|
// See https://github.com/gotenberg/gotenberg/issues/376.
|
|
opts = append(opts, chromedp.ProxyServer(b.arguments.proxyServer))
|
|
}
|
|
|
|
// See https://github.com/gotenberg/gotenberg/issues/524.
|
|
opts = append(opts, chromedp.WSURLReadTimeout(b.arguments.wsUrlReadTimeout))
|
|
|
|
allocatorCtx, allocatorCancel := chromedp.NewExecAllocator(b.initialCtx, opts...)
|
|
ctx, cancel := chromedp.NewContext(allocatorCtx, chromedp.WithDebugf(debug.Printf))
|
|
|
|
err = chromedp.Run(ctx)
|
|
if err != nil {
|
|
cancel()
|
|
allocatorCancel()
|
|
return fmt.Errorf("run exec allocator: %w", err)
|
|
}
|
|
|
|
b.ctxMu.Lock()
|
|
defer b.ctxMu.Unlock()
|
|
|
|
// We have to keep the context around, as we need it to create new tabs
|
|
// later.
|
|
b.ctx = ctx
|
|
b.cancelFunc = func() {
|
|
cancel()
|
|
allocatorCancel()
|
|
}
|
|
b.isStarted.Store(true)
|
|
|
|
return nil
|
|
}
|
|
|
|
func (b *chromiumBrowser) Stop(logger *slog.Logger) error {
|
|
if !b.isStarted.Load() {
|
|
// No big deal? Like calling cancel twice.
|
|
return nil
|
|
}
|
|
|
|
// Always remove the user profile directory created by Chromium.
|
|
copyUserProfileDirPath := b.userProfileDirPath
|
|
expirationTime := time.Now()
|
|
defer func(userProfileDirPath string, expirationTime time.Time) {
|
|
// See:
|
|
// https://github.com/SeleniumHQ/docker-selenium/blob/7216d060d86872afe853ccda62db0dfab5118dc7/NodeChrome/chrome-cleanup.sh
|
|
// https://github.com/SeleniumHQ/docker-selenium/blob/7216d060d86872afe853ccda62db0dfab5118dc7/NodeChromium/chrome-cleanup.sh
|
|
|
|
// Clean up stuck processes.
|
|
ps, err := process.Processes()
|
|
if err != nil {
|
|
logger.ErrorContext(context.Background(), fmt.Sprintf("list processes: %v", err))
|
|
} else {
|
|
for _, p := range ps {
|
|
func() {
|
|
cmdline, err := p.Cmdline()
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
if !strings.Contains(cmdline, "chromium/chromium") && !strings.Contains(cmdline, "chrome/chrome") {
|
|
return
|
|
}
|
|
|
|
killCtx, cancel := context.WithTimeout(context.Background(), time.Second*5)
|
|
defer cancel()
|
|
|
|
err = p.KillWithContext(killCtx)
|
|
if err != nil {
|
|
logger.ErrorContext(context.Background(), fmt.Sprintf("kill process: %v", err))
|
|
} else {
|
|
logger.DebugContext(context.Background(), fmt.Sprintf("Chromium process %d killed", p.Pid))
|
|
}
|
|
}()
|
|
}
|
|
}
|
|
|
|
go func() {
|
|
// FIXME: Chromium seems to recreate the user profile directory
|
|
// right after its deletion if we do not wait a certain amount
|
|
// of time before deleting it.
|
|
<-time.After(10 * time.Second)
|
|
|
|
err = os.RemoveAll(userProfileDirPath)
|
|
if err != nil {
|
|
logger.ErrorContext(context.Background(), fmt.Sprintf("remove Chromium's user profile directory: %s", err))
|
|
} else {
|
|
logger.DebugContext(context.Background(), fmt.Sprintf("'%s' Chromium's user profile directory removed", userProfileDirPath))
|
|
}
|
|
|
|
// Also, remove Chromium-specific files in the temporary directory.
|
|
err = gotenberg.GarbageCollect(context.Background(), logger, os.TempDir(), []string{".org.chromium.Chromium", ".com.google.Chrome"}, expirationTime)
|
|
if err != nil {
|
|
logger.ErrorContext(context.Background(), err.Error())
|
|
}
|
|
}()
|
|
}(copyUserProfileDirPath, expirationTime)
|
|
|
|
b.ctxMu.Lock()
|
|
defer b.ctxMu.Unlock()
|
|
|
|
b.cancelFunc()
|
|
b.ctx = nil
|
|
b.userProfileDirPath = ""
|
|
b.isStarted.Store(false)
|
|
|
|
return nil
|
|
}
|
|
|
|
func (b *chromiumBrowser) Healthy(logger *slog.Logger) bool {
|
|
// Good to know: the supervisor does not call this method if no first start
|
|
// or if the process is restarting.
|
|
|
|
if !b.isStarted.Load() {
|
|
// Non-started browser but not restarting?
|
|
return false
|
|
}
|
|
|
|
b.ctxMu.RLock()
|
|
defer b.ctxMu.RUnlock()
|
|
|
|
// Create a timeout based on the existing browser context (b.ctx).
|
|
// IMPORTANT: We do NOT call chromedp.NewContext here.
|
|
// We want to execute this against the main browser connection,
|
|
// avoiding the creation of a new target (tab).
|
|
ctx, cancel := context.WithTimeout(b.ctx, 5*time.Second)
|
|
defer cancel()
|
|
|
|
// Check if the browser is responsive by asking for its version.
|
|
// This involves a simple JSON payload roundtrip over the websocket.
|
|
// See https://github.com/gotenberg/gotenberg/issues/1169.
|
|
err := chromedp.Run(ctx, chromedp.ActionFunc(func(ctx context.Context) error {
|
|
_, _, _, _, _, err := cdprotobrowser.GetVersion().Do(ctx)
|
|
return err
|
|
}))
|
|
if err != nil {
|
|
logger.ErrorContext(context.Background(), fmt.Sprintf("browser health check failed: %s", err))
|
|
return false
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
func (b *chromiumBrowser) pdf(ctx context.Context, logger *slog.Logger, url, outputPath string, options PdfOptions) error {
|
|
// Note: no error wrapping because it leaks on errors we want to display to
|
|
// the end user.
|
|
return b.do(ctx, logger, url, options.Options, chromedp.Tasks{
|
|
network.Enable(),
|
|
fetch.Enable(),
|
|
runtime.Enable(),
|
|
clearCacheActionFunc(logger, b.arguments.clearCache),
|
|
clearCookiesActionFunc(logger, b.arguments.clearCookies),
|
|
disableJavaScriptActionFunc(logger, b.arguments.disableJavaScript),
|
|
setCookiesActionFunc(logger, options.Cookies),
|
|
userAgentOverride(logger, options.UserAgent),
|
|
navigateActionFunc(logger, url, options.SkipNetworkIdleEvent, options.SkipNetworkAlmostIdleEvent),
|
|
hideDefaultWhiteBackgroundActionFunc(logger, options.OmitBackground, options.PrintBackground),
|
|
forceExactColorsActionFunc(logger, options.PrintBackground),
|
|
emulateMediaTypeActionFunc(logger, options.EmulatedMediaType, options.EmulatedMediaFeatures),
|
|
waitForExpressionBeforePrintActionFunc(logger, b.arguments.disableJavaScript, options.WaitForExpression),
|
|
waitForSelectorVisibleBeforePrintActionFunc(logger, options.WaitForSelector),
|
|
waitDelayBeforePrintActionFunc(logger, b.arguments.disableJavaScript, options.WaitDelay),
|
|
// PDF specific.
|
|
printToPdfActionFunc(logger, outputPath, options),
|
|
// Teardown.
|
|
page.Close(),
|
|
})
|
|
}
|
|
|
|
func (b *chromiumBrowser) screenshot(ctx context.Context, logger *slog.Logger, url, outputPath string, options ScreenshotOptions) error {
|
|
// Note: no error wrapping because it leaks on errors we want to display to
|
|
// the end user.
|
|
return b.do(ctx, logger, url, options.Options, chromedp.Tasks{
|
|
network.Enable(),
|
|
fetch.Enable(),
|
|
runtime.Enable(),
|
|
clearCacheActionFunc(logger, b.arguments.clearCache),
|
|
clearCookiesActionFunc(logger, b.arguments.clearCookies),
|
|
disableJavaScriptActionFunc(logger, b.arguments.disableJavaScript),
|
|
setCookiesActionFunc(logger, options.Cookies),
|
|
userAgentOverride(logger, options.UserAgent),
|
|
navigateActionFunc(logger, url, options.SkipNetworkIdleEvent, options.SkipNetworkAlmostIdleEvent),
|
|
hideDefaultWhiteBackgroundActionFunc(logger, options.OmitBackground, true),
|
|
forceExactColorsActionFunc(logger, true),
|
|
emulateMediaTypeActionFunc(logger, options.EmulatedMediaType, options.EmulatedMediaFeatures),
|
|
waitForExpressionBeforePrintActionFunc(logger, b.arguments.disableJavaScript, options.WaitForExpression),
|
|
waitForSelectorVisibleBeforePrintActionFunc(logger, options.WaitForSelector),
|
|
waitDelayBeforePrintActionFunc(logger, b.arguments.disableJavaScript, options.WaitDelay),
|
|
// Screenshot specific.
|
|
setDeviceMetricsOverride(logger, options.Width, options.Height),
|
|
captureScreenshotActionFunc(logger, outputPath, options),
|
|
// Teardown.
|
|
page.Close(),
|
|
})
|
|
}
|
|
|
|
func (b *chromiumBrowser) do(ctx context.Context, logger *slog.Logger, url string, options Options, tasks chromedp.Tasks) error {
|
|
if !b.isStarted.Load() {
|
|
return errors.New("browser not started, cannot handle tasks")
|
|
}
|
|
|
|
deadline, ok := ctx.Deadline()
|
|
if !ok {
|
|
return errors.New("context has no deadline")
|
|
}
|
|
|
|
// We validate the "main" URL against our allowed / deny lists.
|
|
err := gotenberg.FilterDeadline(b.arguments.allowList, b.arguments.denyList, url, deadline)
|
|
if err != nil {
|
|
return fmt.Errorf("filter URL: %w", err)
|
|
}
|
|
|
|
b.ctxMu.RLock()
|
|
defer b.ctxMu.RUnlock()
|
|
|
|
timeoutCtx, timeoutCancel := context.WithTimeout(b.ctx, time.Until(deadline))
|
|
defer timeoutCancel()
|
|
|
|
taskCtx, taskCancel := chromedp.NewContext(timeoutCtx)
|
|
defer taskCancel()
|
|
|
|
// We validate all other requests against our allowed / deny lists.
|
|
// If a request does not pass the validation, we make it fail. It also set
|
|
// the extra HTTP headers, if any.
|
|
// See https://github.com/gotenberg/gotenberg/issues/1011.
|
|
listenForEventRequestPaused(taskCtx, logger, eventRequestPausedOptions{
|
|
allowList: b.arguments.allowList,
|
|
denyList: b.arguments.denyList,
|
|
allowedFilePrefixes: options.AllowedFilePrefixes,
|
|
extraHttpHeaders: options.ExtraHttpHeaders,
|
|
})
|
|
|
|
var (
|
|
invalidHttpStatusCode error
|
|
invalidHttpStatusCodeMu sync.RWMutex
|
|
invalidResourceHttpStatusCode error
|
|
invalidResourceHttpStatusCodeMu sync.RWMutex
|
|
)
|
|
|
|
// See:
|
|
// https://github.com/gotenberg/gotenberg/issues/613.
|
|
// https://github.com/gotenberg/gotenberg/issues/1021.
|
|
if len(options.FailOnHttpStatusCodes) != 0 || len(options.FailOnResourceHttpStatusCodes) != 0 {
|
|
listenForEventResponseReceived(taskCtx, logger, eventResponseReceivedOptions{
|
|
mainPageUrl: url,
|
|
failOnHttpStatusCodes: options.FailOnHttpStatusCodes,
|
|
invalidHttpStatusCode: &invalidHttpStatusCode,
|
|
invalidHttpStatusCodeMu: &invalidHttpStatusCodeMu,
|
|
failOnResourceOnHttpStatusCode: options.FailOnResourceHttpStatusCodes,
|
|
ignoreResourceHttpStatusDomains: options.IgnoreResourceHttpStatusDomains,
|
|
invalidResourceHttpStatusCode: &invalidResourceHttpStatusCode,
|
|
invalidResourceHttpStatusCodeMu: &invalidResourceHttpStatusCodeMu,
|
|
})
|
|
}
|
|
|
|
var (
|
|
consoleExceptions error
|
|
consoleExceptionsMu sync.RWMutex
|
|
)
|
|
|
|
// See https://github.com/gotenberg/gotenberg/issues/262.
|
|
if options.FailOnConsoleExceptions && !b.arguments.disableJavaScript {
|
|
listenForEventExceptionThrown(taskCtx, logger, &consoleExceptions, &consoleExceptionsMu)
|
|
}
|
|
|
|
var (
|
|
loadingFailed error
|
|
loadingFailedMu sync.RWMutex
|
|
resourceLoadingFailed error
|
|
resourceLoadingFailedMu sync.RWMutex
|
|
)
|
|
|
|
// See:
|
|
// https://github.com/gotenberg/gotenberg/issues/913.
|
|
// https://github.com/gotenberg/gotenberg/issues/959.
|
|
// https://github.com/gotenberg/gotenberg/issues/1021.
|
|
listenForEventLoadingFailed(taskCtx, logger, eventLoadingFailedOptions{
|
|
loadingFailed: &loadingFailed,
|
|
loadingFailedMu: &loadingFailedMu,
|
|
resourceLoadingFailed: &resourceLoadingFailed,
|
|
resourceLoadingFailedMu: &resourceLoadingFailedMu,
|
|
})
|
|
|
|
err = chromedp.Run(taskCtx, tasks...)
|
|
if err != nil {
|
|
errMessage := err.Error()
|
|
|
|
if strings.Contains(errMessage, "Printing failed (-32000)") {
|
|
return ErrPrintingFailed
|
|
}
|
|
|
|
if strings.Contains(errMessage, "Show invalid printer settings error (-32000)") || strings.Contains(errMessage, "content area is empty (-32602)") {
|
|
return ErrInvalidPrinterSettings
|
|
}
|
|
|
|
if strings.Contains(errMessage, "Page range syntax error") {
|
|
return ErrPageRangesSyntaxError
|
|
}
|
|
|
|
if strings.Contains(errMessage, "Page range exceeds page count (-32000)") {
|
|
return ErrPageRangesExceedsPageCount
|
|
}
|
|
|
|
if strings.Contains(errMessage, "rpcc: message too large") {
|
|
return ErrRpccMessageTooLarge
|
|
}
|
|
|
|
return fmt.Errorf("handle tasks: %w", err)
|
|
}
|
|
|
|
// See https://github.com/gotenberg/gotenberg/issues/613.
|
|
invalidHttpStatusCodeMu.RLock()
|
|
defer invalidHttpStatusCodeMu.RUnlock()
|
|
|
|
if invalidHttpStatusCode != nil {
|
|
return fmt.Errorf("%v: %w", invalidHttpStatusCode, ErrInvalidHttpStatusCode)
|
|
}
|
|
|
|
// See https://github.com/gotenberg/gotenberg/issues/1021.
|
|
invalidResourceHttpStatusCodeMu.RLock()
|
|
defer invalidResourceHttpStatusCodeMu.RUnlock()
|
|
|
|
if invalidResourceHttpStatusCode != nil {
|
|
return fmt.Errorf("%v: %w", invalidResourceHttpStatusCode, ErrInvalidResourceHttpStatusCode)
|
|
}
|
|
|
|
// See https://github.com/gotenberg/gotenberg/issues/262.
|
|
consoleExceptionsMu.RLock()
|
|
defer consoleExceptionsMu.RUnlock()
|
|
|
|
if consoleExceptions != nil {
|
|
return fmt.Errorf("%v: %w", consoleExceptions, ErrConsoleExceptions)
|
|
}
|
|
|
|
// See:
|
|
// https://github.com/gotenberg/gotenberg/issues/913.
|
|
// https://github.com/gotenberg/gotenberg/issues/959.
|
|
loadingFailedMu.RLock()
|
|
defer loadingFailedMu.RUnlock()
|
|
|
|
if loadingFailed != nil {
|
|
return fmt.Errorf("%v: %w", loadingFailed, ErrLoadingFailed)
|
|
}
|
|
|
|
// See https://github.com/gotenberg/gotenberg/issues/1021.
|
|
if options.FailOnResourceLoadingFailed {
|
|
if resourceLoadingFailed != nil {
|
|
return fmt.Errorf("%v: %w", resourceLoadingFailed, ErrResourceLoadingFailed)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Interface guards.
|
|
var (
|
|
_ gotenberg.Process = (*chromiumBrowser)(nil)
|
|
_ browser = (*chromiumBrowser)(nil)
|
|
)
|