mirror of
https://github.com/gotenberg/gotenberg.git
synced 2026-07-02 00:17:40 +08:00
1046 lines
32 KiB
Go
1046 lines
32 KiB
Go
package chromium
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"html/template"
|
|
"net/http"
|
|
"net/url"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/dlclark/regexp2"
|
|
"github.com/gomarkdown/markdown"
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/microcosm-cc/bluemonday"
|
|
|
|
"github.com/gotenberg/gotenberg/v8/pkg/gotenberg"
|
|
"github.com/gotenberg/gotenberg/v8/pkg/modules/api"
|
|
"github.com/gotenberg/gotenberg/v8/pkg/modules/pdfengines"
|
|
)
|
|
|
|
var sameSiteRegexp = regexp2.MustCompile(
|
|
`("sameSite"\s*:\s*")(?i:(lax|strict|none))(")`,
|
|
regexp2.None,
|
|
)
|
|
|
|
// FormDataChromiumOptions creates [Options] from the form data.
|
|
//
|
|
// It falls back to the default value if the considered key is not present.
|
|
//
|
|
// JSON-encoded fields:
|
|
// - failOnHttpStatusCodes: []int
|
|
// - failOnResourceHttpStatusCodes: []int
|
|
// - ignoreResourceHttpStatusDomains: []string
|
|
// - cookies: []Cookie
|
|
// - extraHttpHeaders: map[string]string
|
|
// - emulatedMediaFeatures: map[string]string
|
|
//
|
|
// Domain filtering only applies to resource checks triggered by
|
|
// "failOnResourceHttpStatusCodes".
|
|
func FormDataChromiumOptions(ctx *api.Context) (*api.FormData, Options) {
|
|
defaultOptions := DefaultOptions()
|
|
|
|
var (
|
|
skipNetworkIdleEvent bool
|
|
skipNetworkAlmostIdleEvent bool
|
|
failOnHttpStatusCodes []int64
|
|
failOnResourceHttpStatusCodes []int64
|
|
ignoreResourceHttpStatusDomains []string
|
|
failOnResourceLoadingFailed bool
|
|
failOnConsoleExceptions bool
|
|
waitDelay time.Duration
|
|
waitWindowStatus string
|
|
waitForExpression string
|
|
waitForSelector string
|
|
cookies []Cookie
|
|
userAgent string
|
|
extraHttpHeaders []ExtraHttpHeader
|
|
emulatedMediaType string
|
|
emulatedMediaFeatures []EmulatedMediaFeature
|
|
omitBackground bool
|
|
)
|
|
|
|
form := ctx.FormData().
|
|
Bool("skipNetworkIdleEvent", &skipNetworkIdleEvent, defaultOptions.SkipNetworkIdleEvent).
|
|
Bool("skipNetworkAlmostIdleEvent", &skipNetworkAlmostIdleEvent, defaultOptions.SkipNetworkAlmostIdleEvent).
|
|
Custom("failOnHttpStatusCodes", func(value string) error {
|
|
if value == "" {
|
|
failOnHttpStatusCodes = defaultOptions.FailOnHttpStatusCodes
|
|
return nil
|
|
}
|
|
|
|
err := json.Unmarshal([]byte(value), &failOnHttpStatusCodes)
|
|
if err != nil {
|
|
return fmt.Errorf("unmarshal failOnHttpStatusCodes: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}).
|
|
Custom("failOnResourceHttpStatusCodes", func(value string) error {
|
|
if value == "" {
|
|
failOnResourceHttpStatusCodes = defaultOptions.FailOnResourceHttpStatusCodes
|
|
return nil
|
|
}
|
|
|
|
err := json.Unmarshal([]byte(value), &failOnResourceHttpStatusCodes)
|
|
if err != nil {
|
|
return fmt.Errorf("unmarshal failOnResourceHttpStatusCodes: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}).
|
|
Custom("ignoreResourceHttpStatusDomains", func(value string) error {
|
|
if value == "" {
|
|
ignoreResourceHttpStatusDomains = defaultOptions.IgnoreResourceHttpStatusDomains
|
|
return nil
|
|
}
|
|
|
|
err := json.Unmarshal([]byte(value), &ignoreResourceHttpStatusDomains)
|
|
if err != nil {
|
|
return fmt.Errorf("unmarshal ignoreResourceHttpStatusDomains: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}).
|
|
Bool("failOnResourceLoadingFailed", &failOnResourceLoadingFailed, defaultOptions.FailOnResourceLoadingFailed).
|
|
Bool("failOnConsoleExceptions", &failOnConsoleExceptions, defaultOptions.FailOnConsoleExceptions).
|
|
Duration("waitDelay", &waitDelay, defaultOptions.WaitDelay).
|
|
String("waitWindowStatus", &waitWindowStatus, defaultOptions.WaitWindowStatus).
|
|
String("waitForExpression", &waitForExpression, defaultOptions.WaitForExpression).
|
|
String("waitForSelector", &waitForSelector, defaultOptions.WaitForSelector).
|
|
Custom("cookies", func(value string) error {
|
|
if value == "" {
|
|
cookies = defaultOptions.Cookies
|
|
return nil
|
|
}
|
|
|
|
// sameSite attribute from cookies must accept case-insensitive
|
|
// values.
|
|
// See https://github.com/gotenberg/gotenberg/issues/1331.
|
|
normalized, err := sameSiteRegexp.ReplaceFunc(value, func(m regexp2.Match) string {
|
|
groups := m.Groups()
|
|
provided := groups[2].String()
|
|
var canon string
|
|
switch strings.ToLower(provided) {
|
|
case "lax":
|
|
canon = "Lax"
|
|
case "strict":
|
|
canon = "Strict"
|
|
case "none":
|
|
canon = "None"
|
|
default:
|
|
canon = provided
|
|
}
|
|
return groups[1].String() + canon + groups[3].String()
|
|
}, -1, -1)
|
|
if err != nil {
|
|
return fmt.Errorf("normalize sameSite from cookies: %w", err)
|
|
}
|
|
|
|
err = json.Unmarshal([]byte(normalized), &cookies)
|
|
if err != nil {
|
|
return fmt.Errorf("unmarshal cookies: %w", err)
|
|
}
|
|
|
|
for i, cookie := range cookies {
|
|
if strings.TrimSpace(cookie.Name) == "" || strings.TrimSpace(cookie.Value) == "" || strings.TrimSpace(cookie.Domain) == "" {
|
|
err = errors.Join(err, fmt.Errorf("cookie %d must have its name, value and domain set", i))
|
|
}
|
|
}
|
|
|
|
return err
|
|
}).
|
|
String("userAgent", &userAgent, defaultOptions.UserAgent).
|
|
Custom("extraHttpHeaders", func(value string) error {
|
|
if value == "" {
|
|
extraHttpHeaders = defaultOptions.ExtraHttpHeaders
|
|
return nil
|
|
}
|
|
|
|
var headers map[string]string
|
|
err := json.Unmarshal([]byte(value), &headers)
|
|
if err != nil {
|
|
return fmt.Errorf("unmarshal extraHttpHeaders: %w", err)
|
|
}
|
|
|
|
for k, v := range headers {
|
|
var scope string
|
|
var valueTokens []string
|
|
var invalidScopeToken bool
|
|
|
|
tokens := strings.SplitSeq(v, ";")
|
|
for token := range tokens {
|
|
if strings.HasPrefix(strings.ToLower(strings.TrimSpace(token)), "scope") {
|
|
tokenNoSpaces := strings.Join(strings.Fields(token), "")
|
|
parts := strings.SplitN(tokenNoSpaces, "=", 2)
|
|
|
|
if len(parts) == 2 && strings.ToLower(parts[0]) == "scope" && parts[1] != "" {
|
|
scope = parts[1]
|
|
} else {
|
|
err = errors.Join(err, fmt.Errorf("invalid scope '%s' for header '%s'", scope, k))
|
|
invalidScopeToken = true
|
|
break
|
|
}
|
|
} else if token != "" {
|
|
valueTokens = append(valueTokens, token)
|
|
}
|
|
}
|
|
|
|
if invalidScopeToken {
|
|
continue
|
|
}
|
|
|
|
var scopeRegexp *regexp2.Regexp
|
|
if len(scope) > 0 {
|
|
p, errCompile := regexp2.Compile(scope, regexp2.None)
|
|
if errCompile != nil {
|
|
err = errors.Join(err, fmt.Errorf("invalid scope regex pattern for header '%s': %w", k, errCompile))
|
|
continue
|
|
}
|
|
p.MatchTimeout = 5 * time.Second
|
|
scopeRegexp = p
|
|
}
|
|
|
|
extraHttpHeaders = append(extraHttpHeaders, ExtraHttpHeader{
|
|
Name: k,
|
|
Value: strings.Join(valueTokens, "; "),
|
|
Scope: scopeRegexp,
|
|
})
|
|
}
|
|
|
|
return err
|
|
}).
|
|
Custom("emulatedMediaType", func(value string) error {
|
|
if value == "" {
|
|
emulatedMediaType = defaultOptions.EmulatedMediaType
|
|
return nil
|
|
}
|
|
|
|
if value != "screen" && value != "print" {
|
|
return errors.New("wrong value, expected either 'screen', 'print' or empty")
|
|
}
|
|
|
|
emulatedMediaType = value
|
|
|
|
return nil
|
|
}).
|
|
Custom("emulatedMediaFeatures", func(value string) error {
|
|
if value == "" {
|
|
emulatedMediaFeatures = defaultOptions.EmulatedMediaFeatures
|
|
return nil
|
|
}
|
|
|
|
var features map[string]string
|
|
err := json.Unmarshal([]byte(value), &features)
|
|
if err != nil {
|
|
return fmt.Errorf("unmarshal emulatedMediaFeatures: %w", err)
|
|
}
|
|
|
|
for k, v := range features {
|
|
emulatedMediaFeatures = append(emulatedMediaFeatures, EmulatedMediaFeature{
|
|
Name: k,
|
|
Value: v,
|
|
})
|
|
}
|
|
|
|
return err
|
|
}).
|
|
Bool("omitBackground", &omitBackground, defaultOptions.OmitBackground)
|
|
|
|
options := Options{
|
|
SkipNetworkIdleEvent: skipNetworkIdleEvent,
|
|
SkipNetworkAlmostIdleEvent: skipNetworkAlmostIdleEvent,
|
|
FailOnHttpStatusCodes: failOnHttpStatusCodes,
|
|
FailOnResourceHttpStatusCodes: failOnResourceHttpStatusCodes,
|
|
IgnoreResourceHttpStatusDomains: ignoreResourceHttpStatusDomains,
|
|
FailOnResourceLoadingFailed: failOnResourceLoadingFailed,
|
|
FailOnConsoleExceptions: failOnConsoleExceptions,
|
|
WaitDelay: waitDelay,
|
|
WaitWindowStatus: waitWindowStatus,
|
|
WaitForExpression: waitForExpression,
|
|
WaitForSelector: waitForSelector,
|
|
Cookies: cookies,
|
|
UserAgent: userAgent,
|
|
ExtraHttpHeaders: extraHttpHeaders,
|
|
EmulatedMediaType: emulatedMediaType,
|
|
EmulatedMediaFeatures: emulatedMediaFeatures,
|
|
OmitBackground: omitBackground,
|
|
}
|
|
|
|
return form, options
|
|
}
|
|
|
|
// FormDataChromiumPdfOptions creates [PdfOptions] from the form data. Fallback to
|
|
// the default value if the considered key is not present.
|
|
func FormDataChromiumPdfOptions(ctx *api.Context) (*api.FormData, PdfOptions) {
|
|
form, options := FormDataChromiumOptions(ctx)
|
|
defaultPdfOptions := DefaultPdfOptions()
|
|
|
|
var (
|
|
landscape, printBackground, singlePage bool
|
|
scale, paperWidth, paperHeight float64
|
|
marginTop, marginBottom, marginLeft, marginRight float64
|
|
pageRanges string
|
|
headerTemplate, footerTemplate string
|
|
preferCssPageSize bool
|
|
generateDocumentOutline bool
|
|
generateTaggedPdf bool
|
|
)
|
|
|
|
form.
|
|
Bool("landscape", &landscape, defaultPdfOptions.Landscape).
|
|
Bool("printBackground", &printBackground, defaultPdfOptions.PrintBackground).
|
|
Float64("scale", &scale, defaultPdfOptions.Scale).
|
|
Bool("singlePage", &singlePage, defaultPdfOptions.SinglePage).
|
|
Inches("paperWidth", &paperWidth, defaultPdfOptions.PaperWidth).
|
|
Inches("paperHeight", &paperHeight, defaultPdfOptions.PaperHeight).
|
|
Inches("marginTop", &marginTop, defaultPdfOptions.MarginTop).
|
|
Inches("marginBottom", &marginBottom, defaultPdfOptions.MarginBottom).
|
|
Inches("marginLeft", &marginLeft, defaultPdfOptions.MarginLeft).
|
|
Inches("marginRight", &marginRight, defaultPdfOptions.MarginRight).
|
|
String("nativePageRanges", &pageRanges, defaultPdfOptions.PageRanges).
|
|
Content("header.html", &headerTemplate, defaultPdfOptions.HeaderTemplate).
|
|
Content("footer.html", &footerTemplate, defaultPdfOptions.FooterTemplate).
|
|
Bool("preferCssPageSize", &preferCssPageSize, defaultPdfOptions.PreferCssPageSize).
|
|
Bool("generateDocumentOutline", &generateDocumentOutline, defaultPdfOptions.GenerateDocumentOutline).
|
|
Bool("generateTaggedPdf", &generateTaggedPdf, defaultPdfOptions.GenerateTaggedPdf)
|
|
|
|
pdfOptions := PdfOptions{
|
|
Options: options,
|
|
Landscape: landscape,
|
|
PrintBackground: printBackground,
|
|
Scale: scale,
|
|
SinglePage: singlePage,
|
|
PaperWidth: paperWidth,
|
|
PaperHeight: paperHeight,
|
|
MarginTop: marginTop,
|
|
MarginBottom: marginBottom,
|
|
MarginLeft: marginLeft,
|
|
MarginRight: marginRight,
|
|
PageRanges: pageRanges,
|
|
HeaderTemplate: headerTemplate,
|
|
FooterTemplate: footerTemplate,
|
|
PreferCssPageSize: preferCssPageSize,
|
|
GenerateDocumentOutline: generateDocumentOutline,
|
|
GenerateTaggedPdf: generateTaggedPdf,
|
|
}
|
|
|
|
return form, pdfOptions
|
|
}
|
|
|
|
// FormDataChromiumScreenshotOptions creates [ScreenshotOptions] from the form
|
|
// data. Fallback to the default value if the considered key is not present.
|
|
func FormDataChromiumScreenshotOptions(ctx *api.Context) (*api.FormData, ScreenshotOptions) {
|
|
form, options := FormDataChromiumOptions(ctx)
|
|
defaultScreenshotOptions := DefaultScreenshotOptions()
|
|
|
|
var (
|
|
width, height int
|
|
clip bool
|
|
format string
|
|
quality int
|
|
optimizeForSpeed bool
|
|
deviceScaleFactor float64
|
|
)
|
|
|
|
form.
|
|
Int("width", &width, defaultScreenshotOptions.Width).
|
|
Int("height", &height, defaultScreenshotOptions.Height).
|
|
Bool("clip", &clip, defaultScreenshotOptions.Clip).
|
|
Custom("format", func(value string) error {
|
|
if value == "" {
|
|
format = defaultScreenshotOptions.Format
|
|
return nil
|
|
}
|
|
|
|
if value != "png" && value != "jpeg" && value != "webp" {
|
|
return fmt.Errorf("wrong value, expected either 'png', 'jpeg' or 'webp'")
|
|
}
|
|
|
|
format = value
|
|
|
|
return nil
|
|
}).
|
|
Custom("quality", func(value string) error {
|
|
if value == "" {
|
|
quality = defaultScreenshotOptions.Quality
|
|
return nil
|
|
}
|
|
|
|
intValue, err := strconv.Atoi(value)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if intValue < 0 {
|
|
return errors.New("value is negative")
|
|
}
|
|
|
|
if intValue > 100 {
|
|
return errors.New("value is superior to 100")
|
|
}
|
|
|
|
quality = intValue
|
|
return nil
|
|
}).
|
|
Bool("optimizeForSpeed", &optimizeForSpeed, defaultScreenshotOptions.OptimizeForSpeed).
|
|
Float64("deviceScaleFactor", &deviceScaleFactor, defaultScreenshotOptions.DeviceScaleFactor)
|
|
|
|
screenshotOptions := ScreenshotOptions{
|
|
Options: options,
|
|
Width: width,
|
|
Height: height,
|
|
Clip: clip,
|
|
Format: format,
|
|
Quality: quality,
|
|
OptimizeForSpeed: optimizeForSpeed,
|
|
DeviceScaleFactor: deviceScaleFactor,
|
|
}
|
|
|
|
return form, screenshotOptions
|
|
}
|
|
|
|
// rejectFileScheme returns an HTTP 400 [api] error when rawURL uses the
|
|
// file:// scheme. /forms/chromium/convert/url and
|
|
// /forms/chromium/screenshot/url accept user-supplied URLs and are
|
|
// intended for navigating to remote HTTP(S) resources; allowing file://
|
|
// lets a caller reach Chromium's working directory through the default
|
|
// deny-list's /tmp/ allowance, which exists only to serve main-page
|
|
// HTML/Markdown that the other routes generate. Filter the scheme at the
|
|
// route layer where no request-scoped allowedFilePrefixes exists.
|
|
func rejectFileScheme(rawURL string) error {
|
|
parsed, err := url.Parse(rawURL)
|
|
if err != nil {
|
|
return api.WrapError(
|
|
fmt.Errorf("parse URL: %w", err),
|
|
api.NewSentinelHttpError(http.StatusBadRequest, fmt.Sprintf("Invalid URL: %s", err)),
|
|
)
|
|
}
|
|
if strings.EqualFold(parsed.Scheme, "file") {
|
|
return api.WrapError(
|
|
fmt.Errorf("file:// scheme not allowed on URL route"),
|
|
api.NewSentinelHttpError(
|
|
http.StatusBadRequest,
|
|
"file:// URLs are not accepted on this route. Use the /convert/html or /convert/markdown routes to render local HTML",
|
|
),
|
|
)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// convertUrlRoute returns an [api.Route] which can convert a URL to PDF.
|
|
func convertUrlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
|
|
return api.Route{
|
|
Method: http.MethodPost,
|
|
Path: "/forms/chromium/convert/url",
|
|
IsMultipart: true,
|
|
Handler: func(c echo.Context) error {
|
|
ctx := c.Get("context").(*api.Context)
|
|
form, options := FormDataChromiumPdfOptions(ctx)
|
|
mode := pdfengines.FormDataPdfSplitMode(form, false)
|
|
pdfFormats := pdfengines.FormDataPdfFormats(form)
|
|
metadata := pdfengines.FormDataPdfMetadata(form, false)
|
|
encrypt := pdfengines.FormDataPdfEncrypt(form)
|
|
embedPaths := pdfengines.FormDataPdfEmbeds(form)
|
|
watermark := pdfengines.FormDataPdfWatermark(form, false)
|
|
watermarkFile := pdfengines.FormDataPdfWatermarkFile(form)
|
|
stamp := pdfengines.FormDataPdfStamp(form, false)
|
|
stampFile := pdfengines.FormDataPdfStampFile(form)
|
|
rotateAngle, rotatePages := pdfengines.FormDataPdfRotate(form, false)
|
|
embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form)
|
|
facturX, facturxXmlPath := pdfengines.FormDataPdfFacturX(form)
|
|
|
|
var url string
|
|
err := form.
|
|
MandatoryString("url", &url).
|
|
Validate()
|
|
if err != nil {
|
|
return fmt.Errorf("validate form data: %w", err)
|
|
}
|
|
|
|
err = rejectFileScheme(url)
|
|
if err != nil {
|
|
return fmt.Errorf("reject URL scheme: %w", err)
|
|
}
|
|
|
|
err = pdfengines.EnsureWatermarkFile(&watermark, watermarkFile)
|
|
if err != nil {
|
|
return fmt.Errorf("validate watermark: %w", err)
|
|
}
|
|
err = pdfengines.EnsureStampFile(&stamp, stampFile)
|
|
if err != nil {
|
|
return fmt.Errorf("validate stamp: %w", err)
|
|
}
|
|
|
|
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, encrypt, embedPaths, embedsMetadata, facturX, facturxXmlPath, watermark, stamp, rotateAngle, rotatePages)
|
|
if err != nil {
|
|
return fmt.Errorf("convert URL to PDF: %w", err)
|
|
}
|
|
|
|
return nil
|
|
},
|
|
}
|
|
}
|
|
|
|
// screenshotUrlRoute returns an [api.Route] which can take a screenshot from a
|
|
// URL.
|
|
func screenshotUrlRoute(chromium Api) api.Route {
|
|
return api.Route{
|
|
Method: http.MethodPost,
|
|
Path: "/forms/chromium/screenshot/url",
|
|
IsMultipart: true,
|
|
Handler: func(c echo.Context) error {
|
|
ctx := c.Get("context").(*api.Context)
|
|
form, options := FormDataChromiumScreenshotOptions(ctx)
|
|
|
|
var url string
|
|
err := form.
|
|
MandatoryString("url", &url).
|
|
Validate()
|
|
if err != nil {
|
|
return fmt.Errorf("validate form data: %w", err)
|
|
}
|
|
|
|
err = rejectFileScheme(url)
|
|
if err != nil {
|
|
return fmt.Errorf("reject URL scheme: %w", err)
|
|
}
|
|
|
|
err = screenshotUrl(ctx, chromium, url, options)
|
|
if err != nil {
|
|
return fmt.Errorf("URL screenshot: %w", err)
|
|
}
|
|
|
|
return nil
|
|
},
|
|
}
|
|
}
|
|
|
|
// convertHtmlRoute returns an [api.Route] which can convert an HTML file to
|
|
// PDF.
|
|
func convertHtmlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
|
|
return api.Route{
|
|
Method: http.MethodPost,
|
|
Path: "/forms/chromium/convert/html",
|
|
IsMultipart: true,
|
|
Handler: func(c echo.Context) error {
|
|
ctx := c.Get("context").(*api.Context)
|
|
form, options := FormDataChromiumPdfOptions(ctx)
|
|
mode := pdfengines.FormDataPdfSplitMode(form, false)
|
|
pdfFormats := pdfengines.FormDataPdfFormats(form)
|
|
metadata := pdfengines.FormDataPdfMetadata(form, false)
|
|
encrypt := pdfengines.FormDataPdfEncrypt(form)
|
|
embedPaths := pdfengines.FormDataPdfEmbeds(form)
|
|
watermark := pdfengines.FormDataPdfWatermark(form, false)
|
|
watermarkFile := pdfengines.FormDataPdfWatermarkFile(form)
|
|
stamp := pdfengines.FormDataPdfStamp(form, false)
|
|
stampFile := pdfengines.FormDataPdfStampFile(form)
|
|
rotateAngle, rotatePages := pdfengines.FormDataPdfRotate(form, false)
|
|
embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form)
|
|
facturX, facturxXmlPath := pdfengines.FormDataPdfFacturX(form)
|
|
|
|
var inputPath string
|
|
err := form.
|
|
MandatoryPath("index.html", &inputPath).
|
|
Validate()
|
|
if err != nil {
|
|
return fmt.Errorf("validate form data: %w", err)
|
|
}
|
|
|
|
err = pdfengines.EnsureWatermarkFile(&watermark, watermarkFile)
|
|
if err != nil {
|
|
return fmt.Errorf("validate watermark: %w", err)
|
|
}
|
|
err = pdfengines.EnsureStampFile(&stamp, stampFile)
|
|
if err != nil {
|
|
return fmt.Errorf("validate stamp: %w", err)
|
|
}
|
|
|
|
url := fmt.Sprintf("file://%s", inputPath)
|
|
options.AllowedFilePrefixes = []string{ctx.DirPath()}
|
|
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, encrypt, embedPaths, embedsMetadata, facturX, facturxXmlPath, watermark, stamp, rotateAngle, rotatePages)
|
|
if err != nil {
|
|
return fmt.Errorf("convert HTML to PDF: %w", err)
|
|
}
|
|
|
|
return nil
|
|
},
|
|
}
|
|
}
|
|
|
|
// screenshotHtmlRoute returns an [api.Route] which can take a screenshot from
|
|
// an HTML file.
|
|
func screenshotHtmlRoute(chromium Api) api.Route {
|
|
return api.Route{
|
|
Method: http.MethodPost,
|
|
Path: "/forms/chromium/screenshot/html",
|
|
IsMultipart: true,
|
|
Handler: func(c echo.Context) error {
|
|
ctx := c.Get("context").(*api.Context)
|
|
form, options := FormDataChromiumScreenshotOptions(ctx)
|
|
|
|
var inputPath string
|
|
err := form.
|
|
MandatoryPath("index.html", &inputPath).
|
|
Validate()
|
|
if err != nil {
|
|
return fmt.Errorf("validate form data: %w", err)
|
|
}
|
|
|
|
url := fmt.Sprintf("file://%s", inputPath)
|
|
options.AllowedFilePrefixes = []string{ctx.DirPath()}
|
|
err = screenshotUrl(ctx, chromium, url, options)
|
|
if err != nil {
|
|
return fmt.Errorf("HTML screenshot: %w", err)
|
|
}
|
|
|
|
return nil
|
|
},
|
|
}
|
|
}
|
|
|
|
// convertMarkdownRoute returns an [api.Route] which can convert markdown files
|
|
// to PDF.
|
|
func convertMarkdownRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
|
|
return api.Route{
|
|
Method: http.MethodPost,
|
|
Path: "/forms/chromium/convert/markdown",
|
|
IsMultipart: true,
|
|
Handler: func(c echo.Context) error {
|
|
ctx := c.Get("context").(*api.Context)
|
|
form, options := FormDataChromiumPdfOptions(ctx)
|
|
mode := pdfengines.FormDataPdfSplitMode(form, false)
|
|
pdfFormats := pdfengines.FormDataPdfFormats(form)
|
|
metadata := pdfengines.FormDataPdfMetadata(form, false)
|
|
encrypt := pdfengines.FormDataPdfEncrypt(form)
|
|
embedPaths := pdfengines.FormDataPdfEmbeds(form)
|
|
watermark := pdfengines.FormDataPdfWatermark(form, false)
|
|
watermarkFile := pdfengines.FormDataPdfWatermarkFile(form)
|
|
stamp := pdfengines.FormDataPdfStamp(form, false)
|
|
stampFile := pdfengines.FormDataPdfStampFile(form)
|
|
rotateAngle, rotatePages := pdfengines.FormDataPdfRotate(form, false)
|
|
embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form)
|
|
facturX, facturxXmlPath := pdfengines.FormDataPdfFacturX(form)
|
|
|
|
var (
|
|
inputPath string
|
|
markdownPaths []string
|
|
)
|
|
|
|
err := form.
|
|
MandatoryPath("index.html", &inputPath).
|
|
MandatoryPaths([]string{".md"}, &markdownPaths).
|
|
Validate()
|
|
if err != nil {
|
|
return fmt.Errorf("validate form data: %w", err)
|
|
}
|
|
|
|
err = pdfengines.EnsureWatermarkFile(&watermark, watermarkFile)
|
|
if err != nil {
|
|
return fmt.Errorf("validate watermark: %w", err)
|
|
}
|
|
err = pdfengines.EnsureStampFile(&stamp, stampFile)
|
|
if err != nil {
|
|
return fmt.Errorf("validate stamp: %w", err)
|
|
}
|
|
|
|
url, err := markdownToHtml(ctx, inputPath, markdownPaths)
|
|
if err != nil {
|
|
return fmt.Errorf("transform markdown file(s) to HTML: %w", err)
|
|
}
|
|
|
|
options.AllowedFilePrefixes = []string{ctx.DirPath()}
|
|
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, encrypt, embedPaths, embedsMetadata, facturX, facturxXmlPath, watermark, stamp, rotateAngle, rotatePages)
|
|
if err != nil {
|
|
return fmt.Errorf("convert markdown to PDF: %w", err)
|
|
}
|
|
|
|
return nil
|
|
},
|
|
}
|
|
}
|
|
|
|
// screenshotMarkdownRoute returns an [api.Route] which can take a screenshot
|
|
// from Markdown files.
|
|
func screenshotMarkdownRoute(chromium Api) api.Route {
|
|
return api.Route{
|
|
Method: http.MethodPost,
|
|
Path: "/forms/chromium/screenshot/markdown",
|
|
IsMultipart: true,
|
|
Handler: func(c echo.Context) error {
|
|
ctx := c.Get("context").(*api.Context)
|
|
form, options := FormDataChromiumScreenshotOptions(ctx)
|
|
|
|
var (
|
|
inputPath string
|
|
markdownPaths []string
|
|
)
|
|
|
|
err := form.
|
|
MandatoryPath("index.html", &inputPath).
|
|
MandatoryPaths([]string{".md"}, &markdownPaths).
|
|
Validate()
|
|
if err != nil {
|
|
return fmt.Errorf("validate form data: %w", err)
|
|
}
|
|
|
|
url, err := markdownToHtml(ctx, inputPath, markdownPaths)
|
|
if err != nil {
|
|
return fmt.Errorf("transform markdown file(s) to HTML: %w", err)
|
|
}
|
|
|
|
options.AllowedFilePrefixes = []string{ctx.DirPath()}
|
|
err = screenshotUrl(ctx, chromium, url, options)
|
|
if err != nil {
|
|
return fmt.Errorf("markdown screenshot: %w", err)
|
|
}
|
|
|
|
return nil
|
|
},
|
|
}
|
|
}
|
|
|
|
func markdownToHtml(ctx *api.Context, inputPath string, markdownPaths []string) (string, error) {
|
|
// We have to convert each Markdown file referenced in the HTML
|
|
// file to... HTML. Thanks to the "html/template" package, we are
|
|
// able to provide the "toHTML" function which the user may call
|
|
// directly inside the HTML file.
|
|
|
|
var markdownFilesNotFoundErr error
|
|
|
|
tmpl, err := template.
|
|
New(filepath.Base(inputPath)).
|
|
Funcs(template.FuncMap{
|
|
"toHTML": func(filename string) (template.HTML, error) {
|
|
var path string
|
|
|
|
for _, markdownPath := range markdownPaths {
|
|
markdownFilename := ctx.OriginalFilename(markdownPath)
|
|
|
|
if filename == markdownFilename {
|
|
path = markdownPath
|
|
break
|
|
}
|
|
}
|
|
|
|
if path == "" {
|
|
markdownFilesNotFoundErr = errors.Join(
|
|
markdownFilesNotFoundErr,
|
|
fmt.Errorf("'%s'", filename),
|
|
)
|
|
|
|
return "", nil
|
|
}
|
|
|
|
b, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return "", fmt.Errorf("read markdown file '%s': %w", filename, err)
|
|
}
|
|
|
|
unsafe := markdown.ToHTML(b, nil, nil)
|
|
sanitized := bluemonday.UGCPolicy().SanitizeBytes(unsafe)
|
|
|
|
// #nosec
|
|
return template.HTML(sanitized), nil
|
|
},
|
|
}).ParseFiles(inputPath)
|
|
if err != nil {
|
|
return "", fmt.Errorf("parse template file: %w", err)
|
|
}
|
|
|
|
var buffer bytes.Buffer
|
|
|
|
err = tmpl.Execute(&buffer, &struct{}{})
|
|
if err != nil {
|
|
return "", fmt.Errorf("execute template: %w", err)
|
|
}
|
|
|
|
if markdownFilesNotFoundErr != nil {
|
|
return "", api.WrapError(
|
|
fmt.Errorf("markdown files not found: %w", markdownFilesNotFoundErr),
|
|
api.NewSentinelHttpError(
|
|
http.StatusBadRequest,
|
|
fmt.Sprintf("Markdown file(s) not found: %s", markdownFilesNotFoundErr),
|
|
),
|
|
)
|
|
}
|
|
|
|
inputPath = ctx.GeneratePath(".html")
|
|
|
|
err = os.WriteFile(inputPath, buffer.Bytes(), 0o600)
|
|
if err != nil {
|
|
return "", fmt.Errorf("write template result: %w", err)
|
|
}
|
|
|
|
return fmt.Sprintf("file://%s", inputPath), nil
|
|
}
|
|
|
|
func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url string, options PdfOptions, mode gotenberg.SplitMode, pdfFormats gotenberg.PdfFormats, metadata map[string]any, encrypt gotenberg.EncryptOptions, embedPaths []string, embedsMetadata map[string]map[string]string, facturX gotenberg.FacturX, facturxXmlPath string, watermark, stamp gotenberg.Stamp, rotateAngle int, rotatePages string) error {
|
|
outputPath := ctx.GeneratePath(".pdf")
|
|
// See https://github.com/gotenberg/gotenberg/issues/1130.
|
|
filename := ctx.OutputFilename(outputPath)
|
|
outputPath = ctx.GeneratePathFromFilename(filename)
|
|
|
|
err := chromium.Pdf(ctx, ctx.Log(), url, outputPath, options)
|
|
err = handleChromiumError(err, options.Options)
|
|
if err != nil {
|
|
if errors.Is(err, ErrOmitBackgroundWithoutPrintBackground) {
|
|
return api.WrapError(
|
|
fmt.Errorf("convert to PDF: %w", err),
|
|
api.NewSentinelHttpError(
|
|
http.StatusBadRequest,
|
|
"omitBackground requires printBackground set to true",
|
|
),
|
|
)
|
|
}
|
|
|
|
if errors.Is(err, ErrPrintingFailed) {
|
|
return api.WrapError(
|
|
fmt.Errorf("convert to PDF: %w", err),
|
|
api.NewSentinelHttpError(
|
|
http.StatusBadRequest,
|
|
"Chromium failed to print the PDF; this usually happens when the page is too large",
|
|
),
|
|
)
|
|
}
|
|
|
|
if errors.Is(err, ErrInvalidPrinterSettings) {
|
|
return api.WrapError(
|
|
fmt.Errorf("convert to PDF: %w", err),
|
|
api.NewSentinelHttpError(
|
|
http.StatusBadRequest,
|
|
"Chromium does not handle the provided settings; please check for aberrant form values",
|
|
),
|
|
)
|
|
}
|
|
|
|
if errors.Is(err, ErrPageRangesExceedsPageCount) {
|
|
return api.WrapError(
|
|
fmt.Errorf("convert to PDF: %w", err),
|
|
api.NewSentinelHttpError(
|
|
http.StatusBadRequest,
|
|
fmt.Sprintf("The page ranges '%s' (nativePageRanges) exceeds the page count", options.PageRanges),
|
|
),
|
|
)
|
|
}
|
|
|
|
if errors.Is(err, ErrPageRangesSyntaxError) {
|
|
return api.WrapError(
|
|
fmt.Errorf("convert to PDF: %w", err),
|
|
api.NewSentinelHttpError(
|
|
http.StatusBadRequest,
|
|
fmt.Sprintf("Chromium does not handle the page ranges '%s' (nativePageRanges) syntax", options.PageRanges),
|
|
),
|
|
)
|
|
}
|
|
|
|
return fmt.Errorf("convert to PDF: %w", err)
|
|
}
|
|
|
|
err = pdfengines.ValidatePdfFormatsCompat(pdfFormats, encrypt.UserPassword, embedPaths)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
err = pdfengines.ValidatePdfEncryptCompat(encrypt)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
err = pdfengines.ValidateFacturXCompat(facturX, facturxXmlPath, pdfFormats)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
outputPaths, err := pdfengines.SplitPdfStub(ctx, engine, mode, []string{outputPath})
|
|
if err != nil {
|
|
return fmt.Errorf("split PDF: %w", err)
|
|
}
|
|
|
|
err = pdfengines.WatermarkStub(ctx, engine, watermark, outputPaths)
|
|
if err != nil {
|
|
return fmt.Errorf("watermark PDFs: %w", err)
|
|
}
|
|
|
|
err = pdfengines.StampStub(ctx, engine, stamp, outputPaths)
|
|
if err != nil {
|
|
return fmt.Errorf("stamp PDFs: %w", err)
|
|
}
|
|
|
|
err = pdfengines.RotateStub(ctx, engine, rotateAngle, rotatePages, outputPaths)
|
|
if err != nil {
|
|
return fmt.Errorf("rotate PDFs: %w", err)
|
|
}
|
|
|
|
pdfFormats = pdfengines.FacturXPdfFormats(ctx, engine, facturX, pdfFormats, true, nil)
|
|
|
|
convertOutputPaths, err := pdfengines.ConvertStub(ctx, engine, pdfFormats, outputPaths)
|
|
if err != nil {
|
|
return fmt.Errorf("convert PDF(s): %w", err)
|
|
}
|
|
|
|
// Metadata, embeds are written after Convert, as LibreOffice
|
|
// strips them during PDF/A conversion.
|
|
err = pdfengines.WriteMetadataStub(ctx, engine, metadata, convertOutputPaths)
|
|
if err != nil {
|
|
return fmt.Errorf("write metadata: %w", err)
|
|
}
|
|
|
|
err = pdfengines.EmbedFilesStub(ctx, engine, embedPaths, convertOutputPaths)
|
|
if err != nil {
|
|
return fmt.Errorf("embed files into PDFs: %w", err)
|
|
}
|
|
|
|
err = pdfengines.EmbedFilesMetadataStub(ctx, engine, embedsMetadata, convertOutputPaths)
|
|
if err != nil {
|
|
return fmt.Errorf("set embeds metadata: %w", err)
|
|
}
|
|
|
|
err = pdfengines.ApplyFacturXStub(ctx, engine, facturX, facturxXmlPath, convertOutputPaths)
|
|
if err != nil {
|
|
return fmt.Errorf("apply Factur-X: %w", err)
|
|
}
|
|
|
|
err = pdfengines.EncryptPdfStub(ctx, engine, encrypt, convertOutputPaths)
|
|
if err != nil {
|
|
return fmt.Errorf("encrypt PDFs: %w", err)
|
|
}
|
|
|
|
zeroValuedSplitMode := gotenberg.SplitMode{}
|
|
zeroValuedPdfFormats := gotenberg.PdfFormats{}
|
|
if mode != zeroValuedSplitMode && pdfFormats != zeroValuedPdfFormats {
|
|
// The PDF has been split and split parts have been converted to a
|
|
// specific format. We want to keep the split naming.
|
|
for i, convertOutputPath := range convertOutputPaths {
|
|
err = ctx.Rename(convertOutputPath, outputPaths[i])
|
|
if err != nil {
|
|
return fmt.Errorf("rename output path: %w", err)
|
|
}
|
|
}
|
|
} else {
|
|
outputPaths = convertOutputPaths
|
|
}
|
|
|
|
err = ctx.AddOutputPaths(outputPaths...)
|
|
if err != nil {
|
|
return fmt.Errorf("add output paths: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func screenshotUrl(ctx *api.Context, chromium Api, url string, options ScreenshotOptions) error {
|
|
ext := fmt.Sprintf(".%s", options.Format)
|
|
outputPath := ctx.GeneratePath(ext)
|
|
|
|
err := chromium.Screenshot(ctx, ctx.Log(), url, outputPath, options)
|
|
err = handleChromiumError(err, options.Options)
|
|
if err != nil {
|
|
return fmt.Errorf("screenshot: %w", err)
|
|
}
|
|
|
|
err = ctx.AddOutputPaths(outputPath)
|
|
if err != nil {
|
|
return fmt.Errorf("add output path: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func handleChromiumError(err error, options Options) error {
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
|
|
if errors.Is(err, ErrInvalidEvaluationExpression) {
|
|
if options.WaitForExpression == "" {
|
|
// We do not expect the 'waitWindowStatus' form field to return
|
|
// an ErrInvalidEvaluationExpression error. In such a scenario,
|
|
// we return a 500.
|
|
return err
|
|
}
|
|
|
|
return api.WrapError(
|
|
err,
|
|
api.NewSentinelHttpError(
|
|
http.StatusBadRequest,
|
|
fmt.Sprintf("The expression '%s' (waitForExpression) returned an exception or undefined", options.WaitForExpression),
|
|
),
|
|
)
|
|
}
|
|
|
|
if errors.Is(err, ErrInvalidSelectorQuery) {
|
|
if options.WaitForSelector == "" {
|
|
// We only expect to see this error if the user specified a selector.
|
|
// If they didn't and we still generated the error, return a 500.
|
|
return err
|
|
}
|
|
|
|
return api.WrapError(
|
|
err,
|
|
api.NewSentinelHttpError(
|
|
http.StatusBadRequest,
|
|
fmt.Sprintf("The selector '%s' (waitForSelector) returned an exception or undefined", options.WaitForSelector),
|
|
),
|
|
)
|
|
}
|
|
|
|
if errors.Is(err, ErrInvalidHttpStatusCode) {
|
|
return api.WrapError(
|
|
err,
|
|
api.NewSentinelHttpError(
|
|
http.StatusConflict,
|
|
fmt.Sprintf("Invalid HTTP status code from the main page: %s", strings.ReplaceAll(err.Error(), fmt.Sprintf(": %s", ErrInvalidHttpStatusCode.Error()), "")),
|
|
),
|
|
)
|
|
}
|
|
|
|
if errors.Is(err, ErrInvalidResourceHttpStatusCode) {
|
|
return api.WrapError(
|
|
err,
|
|
api.NewSentinelHttpError(
|
|
http.StatusConflict,
|
|
fmt.Sprintf("Invalid HTTP status code from resources:\n%s", strings.ReplaceAll(err.Error(), fmt.Sprintf(": %s", ErrInvalidResourceHttpStatusCode.Error()), "")),
|
|
),
|
|
)
|
|
}
|
|
|
|
if errors.Is(err, ErrConsoleExceptions) {
|
|
return api.WrapError(
|
|
err,
|
|
api.NewSentinelHttpError(
|
|
http.StatusConflict,
|
|
fmt.Sprintf("Chromium console exceptions:\n%s", strings.ReplaceAll(err.Error(), ErrConsoleExceptions.Error(), "")),
|
|
),
|
|
)
|
|
}
|
|
|
|
if errors.Is(err, ErrLoadingFailed) {
|
|
return api.WrapError(
|
|
err,
|
|
api.NewSentinelHttpError(
|
|
http.StatusBadRequest,
|
|
fmt.Sprintf("Chromium returned %v", err),
|
|
),
|
|
)
|
|
}
|
|
|
|
if errors.Is(err, ErrResourceLoadingFailed) {
|
|
return api.WrapError(
|
|
err,
|
|
api.NewSentinelHttpError(
|
|
http.StatusConflict,
|
|
fmt.Sprintf("Chromium failed to load resources: %v", strings.ReplaceAll(err.Error(), fmt.Sprintf(": %s", ErrResourceLoadingFailed.Error()), "")),
|
|
),
|
|
)
|
|
}
|
|
|
|
return err
|
|
}
|