Files
gotenberg/pkg/modules/chromium/routes.go
T

1046 lines
32 KiB
Go

package chromium
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"html/template"
"net/http"
"net/url"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/dlclark/regexp2"
"github.com/gomarkdown/markdown"
"github.com/labstack/echo/v4"
"github.com/microcosm-cc/bluemonday"
"github.com/gotenberg/gotenberg/v8/pkg/gotenberg"
"github.com/gotenberg/gotenberg/v8/pkg/modules/api"
"github.com/gotenberg/gotenberg/v8/pkg/modules/pdfengines"
)
var sameSiteRegexp = regexp2.MustCompile(
`("sameSite"\s*:\s*")(?i:(lax|strict|none))(")`,
regexp2.None,
)
// FormDataChromiumOptions creates [Options] from the form data.
//
// It falls back to the default value if the considered key is not present.
//
// JSON-encoded fields:
// - failOnHttpStatusCodes: []int
// - failOnResourceHttpStatusCodes: []int
// - ignoreResourceHttpStatusDomains: []string
// - cookies: []Cookie
// - extraHttpHeaders: map[string]string
// - emulatedMediaFeatures: map[string]string
//
// Domain filtering only applies to resource checks triggered by
// "failOnResourceHttpStatusCodes".
func FormDataChromiumOptions(ctx *api.Context) (*api.FormData, Options) {
defaultOptions := DefaultOptions()
var (
skipNetworkIdleEvent bool
skipNetworkAlmostIdleEvent bool
failOnHttpStatusCodes []int64
failOnResourceHttpStatusCodes []int64
ignoreResourceHttpStatusDomains []string
failOnResourceLoadingFailed bool
failOnConsoleExceptions bool
waitDelay time.Duration
waitWindowStatus string
waitForExpression string
waitForSelector string
cookies []Cookie
userAgent string
extraHttpHeaders []ExtraHttpHeader
emulatedMediaType string
emulatedMediaFeatures []EmulatedMediaFeature
omitBackground bool
)
form := ctx.FormData().
Bool("skipNetworkIdleEvent", &skipNetworkIdleEvent, defaultOptions.SkipNetworkIdleEvent).
Bool("skipNetworkAlmostIdleEvent", &skipNetworkAlmostIdleEvent, defaultOptions.SkipNetworkAlmostIdleEvent).
Custom("failOnHttpStatusCodes", func(value string) error {
if value == "" {
failOnHttpStatusCodes = defaultOptions.FailOnHttpStatusCodes
return nil
}
err := json.Unmarshal([]byte(value), &failOnHttpStatusCodes)
if err != nil {
return fmt.Errorf("unmarshal failOnHttpStatusCodes: %w", err)
}
return nil
}).
Custom("failOnResourceHttpStatusCodes", func(value string) error {
if value == "" {
failOnResourceHttpStatusCodes = defaultOptions.FailOnResourceHttpStatusCodes
return nil
}
err := json.Unmarshal([]byte(value), &failOnResourceHttpStatusCodes)
if err != nil {
return fmt.Errorf("unmarshal failOnResourceHttpStatusCodes: %w", err)
}
return nil
}).
Custom("ignoreResourceHttpStatusDomains", func(value string) error {
if value == "" {
ignoreResourceHttpStatusDomains = defaultOptions.IgnoreResourceHttpStatusDomains
return nil
}
err := json.Unmarshal([]byte(value), &ignoreResourceHttpStatusDomains)
if err != nil {
return fmt.Errorf("unmarshal ignoreResourceHttpStatusDomains: %w", err)
}
return nil
}).
Bool("failOnResourceLoadingFailed", &failOnResourceLoadingFailed, defaultOptions.FailOnResourceLoadingFailed).
Bool("failOnConsoleExceptions", &failOnConsoleExceptions, defaultOptions.FailOnConsoleExceptions).
Duration("waitDelay", &waitDelay, defaultOptions.WaitDelay).
String("waitWindowStatus", &waitWindowStatus, defaultOptions.WaitWindowStatus).
String("waitForExpression", &waitForExpression, defaultOptions.WaitForExpression).
String("waitForSelector", &waitForSelector, defaultOptions.WaitForSelector).
Custom("cookies", func(value string) error {
if value == "" {
cookies = defaultOptions.Cookies
return nil
}
// sameSite attribute from cookies must accept case-insensitive
// values.
// See https://github.com/gotenberg/gotenberg/issues/1331.
normalized, err := sameSiteRegexp.ReplaceFunc(value, func(m regexp2.Match) string {
groups := m.Groups()
provided := groups[2].String()
var canon string
switch strings.ToLower(provided) {
case "lax":
canon = "Lax"
case "strict":
canon = "Strict"
case "none":
canon = "None"
default:
canon = provided
}
return groups[1].String() + canon + groups[3].String()
}, -1, -1)
if err != nil {
return fmt.Errorf("normalize sameSite from cookies: %w", err)
}
err = json.Unmarshal([]byte(normalized), &cookies)
if err != nil {
return fmt.Errorf("unmarshal cookies: %w", err)
}
for i, cookie := range cookies {
if strings.TrimSpace(cookie.Name) == "" || strings.TrimSpace(cookie.Value) == "" || strings.TrimSpace(cookie.Domain) == "" {
err = errors.Join(err, fmt.Errorf("cookie %d must have its name, value and domain set", i))
}
}
return err
}).
String("userAgent", &userAgent, defaultOptions.UserAgent).
Custom("extraHttpHeaders", func(value string) error {
if value == "" {
extraHttpHeaders = defaultOptions.ExtraHttpHeaders
return nil
}
var headers map[string]string
err := json.Unmarshal([]byte(value), &headers)
if err != nil {
return fmt.Errorf("unmarshal extraHttpHeaders: %w", err)
}
for k, v := range headers {
var scope string
var valueTokens []string
var invalidScopeToken bool
tokens := strings.SplitSeq(v, ";")
for token := range tokens {
if strings.HasPrefix(strings.ToLower(strings.TrimSpace(token)), "scope") {
tokenNoSpaces := strings.Join(strings.Fields(token), "")
parts := strings.SplitN(tokenNoSpaces, "=", 2)
if len(parts) == 2 && strings.ToLower(parts[0]) == "scope" && parts[1] != "" {
scope = parts[1]
} else {
err = errors.Join(err, fmt.Errorf("invalid scope '%s' for header '%s'", scope, k))
invalidScopeToken = true
break
}
} else if token != "" {
valueTokens = append(valueTokens, token)
}
}
if invalidScopeToken {
continue
}
var scopeRegexp *regexp2.Regexp
if len(scope) > 0 {
p, errCompile := regexp2.Compile(scope, regexp2.None)
if errCompile != nil {
err = errors.Join(err, fmt.Errorf("invalid scope regex pattern for header '%s': %w", k, errCompile))
continue
}
p.MatchTimeout = 5 * time.Second
scopeRegexp = p
}
extraHttpHeaders = append(extraHttpHeaders, ExtraHttpHeader{
Name: k,
Value: strings.Join(valueTokens, "; "),
Scope: scopeRegexp,
})
}
return err
}).
Custom("emulatedMediaType", func(value string) error {
if value == "" {
emulatedMediaType = defaultOptions.EmulatedMediaType
return nil
}
if value != "screen" && value != "print" {
return errors.New("wrong value, expected either 'screen', 'print' or empty")
}
emulatedMediaType = value
return nil
}).
Custom("emulatedMediaFeatures", func(value string) error {
if value == "" {
emulatedMediaFeatures = defaultOptions.EmulatedMediaFeatures
return nil
}
var features map[string]string
err := json.Unmarshal([]byte(value), &features)
if err != nil {
return fmt.Errorf("unmarshal emulatedMediaFeatures: %w", err)
}
for k, v := range features {
emulatedMediaFeatures = append(emulatedMediaFeatures, EmulatedMediaFeature{
Name: k,
Value: v,
})
}
return err
}).
Bool("omitBackground", &omitBackground, defaultOptions.OmitBackground)
options := Options{
SkipNetworkIdleEvent: skipNetworkIdleEvent,
SkipNetworkAlmostIdleEvent: skipNetworkAlmostIdleEvent,
FailOnHttpStatusCodes: failOnHttpStatusCodes,
FailOnResourceHttpStatusCodes: failOnResourceHttpStatusCodes,
IgnoreResourceHttpStatusDomains: ignoreResourceHttpStatusDomains,
FailOnResourceLoadingFailed: failOnResourceLoadingFailed,
FailOnConsoleExceptions: failOnConsoleExceptions,
WaitDelay: waitDelay,
WaitWindowStatus: waitWindowStatus,
WaitForExpression: waitForExpression,
WaitForSelector: waitForSelector,
Cookies: cookies,
UserAgent: userAgent,
ExtraHttpHeaders: extraHttpHeaders,
EmulatedMediaType: emulatedMediaType,
EmulatedMediaFeatures: emulatedMediaFeatures,
OmitBackground: omitBackground,
}
return form, options
}
// FormDataChromiumPdfOptions creates [PdfOptions] from the form data. Fallback to
// the default value if the considered key is not present.
func FormDataChromiumPdfOptions(ctx *api.Context) (*api.FormData, PdfOptions) {
form, options := FormDataChromiumOptions(ctx)
defaultPdfOptions := DefaultPdfOptions()
var (
landscape, printBackground, singlePage bool
scale, paperWidth, paperHeight float64
marginTop, marginBottom, marginLeft, marginRight float64
pageRanges string
headerTemplate, footerTemplate string
preferCssPageSize bool
generateDocumentOutline bool
generateTaggedPdf bool
)
form.
Bool("landscape", &landscape, defaultPdfOptions.Landscape).
Bool("printBackground", &printBackground, defaultPdfOptions.PrintBackground).
Float64("scale", &scale, defaultPdfOptions.Scale).
Bool("singlePage", &singlePage, defaultPdfOptions.SinglePage).
Inches("paperWidth", &paperWidth, defaultPdfOptions.PaperWidth).
Inches("paperHeight", &paperHeight, defaultPdfOptions.PaperHeight).
Inches("marginTop", &marginTop, defaultPdfOptions.MarginTop).
Inches("marginBottom", &marginBottom, defaultPdfOptions.MarginBottom).
Inches("marginLeft", &marginLeft, defaultPdfOptions.MarginLeft).
Inches("marginRight", &marginRight, defaultPdfOptions.MarginRight).
String("nativePageRanges", &pageRanges, defaultPdfOptions.PageRanges).
Content("header.html", &headerTemplate, defaultPdfOptions.HeaderTemplate).
Content("footer.html", &footerTemplate, defaultPdfOptions.FooterTemplate).
Bool("preferCssPageSize", &preferCssPageSize, defaultPdfOptions.PreferCssPageSize).
Bool("generateDocumentOutline", &generateDocumentOutline, defaultPdfOptions.GenerateDocumentOutline).
Bool("generateTaggedPdf", &generateTaggedPdf, defaultPdfOptions.GenerateTaggedPdf)
pdfOptions := PdfOptions{
Options: options,
Landscape: landscape,
PrintBackground: printBackground,
Scale: scale,
SinglePage: singlePage,
PaperWidth: paperWidth,
PaperHeight: paperHeight,
MarginTop: marginTop,
MarginBottom: marginBottom,
MarginLeft: marginLeft,
MarginRight: marginRight,
PageRanges: pageRanges,
HeaderTemplate: headerTemplate,
FooterTemplate: footerTemplate,
PreferCssPageSize: preferCssPageSize,
GenerateDocumentOutline: generateDocumentOutline,
GenerateTaggedPdf: generateTaggedPdf,
}
return form, pdfOptions
}
// FormDataChromiumScreenshotOptions creates [ScreenshotOptions] from the form
// data. Fallback to the default value if the considered key is not present.
func FormDataChromiumScreenshotOptions(ctx *api.Context) (*api.FormData, ScreenshotOptions) {
form, options := FormDataChromiumOptions(ctx)
defaultScreenshotOptions := DefaultScreenshotOptions()
var (
width, height int
clip bool
format string
quality int
optimizeForSpeed bool
deviceScaleFactor float64
)
form.
Int("width", &width, defaultScreenshotOptions.Width).
Int("height", &height, defaultScreenshotOptions.Height).
Bool("clip", &clip, defaultScreenshotOptions.Clip).
Custom("format", func(value string) error {
if value == "" {
format = defaultScreenshotOptions.Format
return nil
}
if value != "png" && value != "jpeg" && value != "webp" {
return fmt.Errorf("wrong value, expected either 'png', 'jpeg' or 'webp'")
}
format = value
return nil
}).
Custom("quality", func(value string) error {
if value == "" {
quality = defaultScreenshotOptions.Quality
return nil
}
intValue, err := strconv.Atoi(value)
if err != nil {
return err
}
if intValue < 0 {
return errors.New("value is negative")
}
if intValue > 100 {
return errors.New("value is superior to 100")
}
quality = intValue
return nil
}).
Bool("optimizeForSpeed", &optimizeForSpeed, defaultScreenshotOptions.OptimizeForSpeed).
Float64("deviceScaleFactor", &deviceScaleFactor, defaultScreenshotOptions.DeviceScaleFactor)
screenshotOptions := ScreenshotOptions{
Options: options,
Width: width,
Height: height,
Clip: clip,
Format: format,
Quality: quality,
OptimizeForSpeed: optimizeForSpeed,
DeviceScaleFactor: deviceScaleFactor,
}
return form, screenshotOptions
}
// rejectFileScheme returns an HTTP 400 [api] error when rawURL uses the
// file:// scheme. /forms/chromium/convert/url and
// /forms/chromium/screenshot/url accept user-supplied URLs and are
// intended for navigating to remote HTTP(S) resources; allowing file://
// lets a caller reach Chromium's working directory through the default
// deny-list's /tmp/ allowance, which exists only to serve main-page
// HTML/Markdown that the other routes generate. Filter the scheme at the
// route layer where no request-scoped allowedFilePrefixes exists.
func rejectFileScheme(rawURL string) error {
parsed, err := url.Parse(rawURL)
if err != nil {
return api.WrapError(
fmt.Errorf("parse URL: %w", err),
api.NewSentinelHttpError(http.StatusBadRequest, fmt.Sprintf("Invalid URL: %s", err)),
)
}
if strings.EqualFold(parsed.Scheme, "file") {
return api.WrapError(
fmt.Errorf("file:// scheme not allowed on URL route"),
api.NewSentinelHttpError(
http.StatusBadRequest,
"file:// URLs are not accepted on this route. Use the /convert/html or /convert/markdown routes to render local HTML",
),
)
}
return nil
}
// convertUrlRoute returns an [api.Route] which can convert a URL to PDF.
func convertUrlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
return api.Route{
Method: http.MethodPost,
Path: "/forms/chromium/convert/url",
IsMultipart: true,
Handler: func(c echo.Context) error {
ctx := c.Get("context").(*api.Context)
form, options := FormDataChromiumPdfOptions(ctx)
mode := pdfengines.FormDataPdfSplitMode(form, false)
pdfFormats := pdfengines.FormDataPdfFormats(form)
metadata := pdfengines.FormDataPdfMetadata(form, false)
encrypt := pdfengines.FormDataPdfEncrypt(form)
embedPaths := pdfengines.FormDataPdfEmbeds(form)
watermark := pdfengines.FormDataPdfWatermark(form, false)
watermarkFile := pdfengines.FormDataPdfWatermarkFile(form)
stamp := pdfengines.FormDataPdfStamp(form, false)
stampFile := pdfengines.FormDataPdfStampFile(form)
rotateAngle, rotatePages := pdfengines.FormDataPdfRotate(form, false)
embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form)
facturX, facturxXmlPath := pdfengines.FormDataPdfFacturX(form)
var url string
err := form.
MandatoryString("url", &url).
Validate()
if err != nil {
return fmt.Errorf("validate form data: %w", err)
}
err = rejectFileScheme(url)
if err != nil {
return fmt.Errorf("reject URL scheme: %w", err)
}
err = pdfengines.EnsureWatermarkFile(&watermark, watermarkFile)
if err != nil {
return fmt.Errorf("validate watermark: %w", err)
}
err = pdfengines.EnsureStampFile(&stamp, stampFile)
if err != nil {
return fmt.Errorf("validate stamp: %w", err)
}
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, encrypt, embedPaths, embedsMetadata, facturX, facturxXmlPath, watermark, stamp, rotateAngle, rotatePages)
if err != nil {
return fmt.Errorf("convert URL to PDF: %w", err)
}
return nil
},
}
}
// screenshotUrlRoute returns an [api.Route] which can take a screenshot from a
// URL.
func screenshotUrlRoute(chromium Api) api.Route {
return api.Route{
Method: http.MethodPost,
Path: "/forms/chromium/screenshot/url",
IsMultipart: true,
Handler: func(c echo.Context) error {
ctx := c.Get("context").(*api.Context)
form, options := FormDataChromiumScreenshotOptions(ctx)
var url string
err := form.
MandatoryString("url", &url).
Validate()
if err != nil {
return fmt.Errorf("validate form data: %w", err)
}
err = rejectFileScheme(url)
if err != nil {
return fmt.Errorf("reject URL scheme: %w", err)
}
err = screenshotUrl(ctx, chromium, url, options)
if err != nil {
return fmt.Errorf("URL screenshot: %w", err)
}
return nil
},
}
}
// convertHtmlRoute returns an [api.Route] which can convert an HTML file to
// PDF.
func convertHtmlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
return api.Route{
Method: http.MethodPost,
Path: "/forms/chromium/convert/html",
IsMultipart: true,
Handler: func(c echo.Context) error {
ctx := c.Get("context").(*api.Context)
form, options := FormDataChromiumPdfOptions(ctx)
mode := pdfengines.FormDataPdfSplitMode(form, false)
pdfFormats := pdfengines.FormDataPdfFormats(form)
metadata := pdfengines.FormDataPdfMetadata(form, false)
encrypt := pdfengines.FormDataPdfEncrypt(form)
embedPaths := pdfengines.FormDataPdfEmbeds(form)
watermark := pdfengines.FormDataPdfWatermark(form, false)
watermarkFile := pdfengines.FormDataPdfWatermarkFile(form)
stamp := pdfengines.FormDataPdfStamp(form, false)
stampFile := pdfengines.FormDataPdfStampFile(form)
rotateAngle, rotatePages := pdfengines.FormDataPdfRotate(form, false)
embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form)
facturX, facturxXmlPath := pdfengines.FormDataPdfFacturX(form)
var inputPath string
err := form.
MandatoryPath("index.html", &inputPath).
Validate()
if err != nil {
return fmt.Errorf("validate form data: %w", err)
}
err = pdfengines.EnsureWatermarkFile(&watermark, watermarkFile)
if err != nil {
return fmt.Errorf("validate watermark: %w", err)
}
err = pdfengines.EnsureStampFile(&stamp, stampFile)
if err != nil {
return fmt.Errorf("validate stamp: %w", err)
}
url := fmt.Sprintf("file://%s", inputPath)
options.AllowedFilePrefixes = []string{ctx.DirPath()}
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, encrypt, embedPaths, embedsMetadata, facturX, facturxXmlPath, watermark, stamp, rotateAngle, rotatePages)
if err != nil {
return fmt.Errorf("convert HTML to PDF: %w", err)
}
return nil
},
}
}
// screenshotHtmlRoute returns an [api.Route] which can take a screenshot from
// an HTML file.
func screenshotHtmlRoute(chromium Api) api.Route {
return api.Route{
Method: http.MethodPost,
Path: "/forms/chromium/screenshot/html",
IsMultipart: true,
Handler: func(c echo.Context) error {
ctx := c.Get("context").(*api.Context)
form, options := FormDataChromiumScreenshotOptions(ctx)
var inputPath string
err := form.
MandatoryPath("index.html", &inputPath).
Validate()
if err != nil {
return fmt.Errorf("validate form data: %w", err)
}
url := fmt.Sprintf("file://%s", inputPath)
options.AllowedFilePrefixes = []string{ctx.DirPath()}
err = screenshotUrl(ctx, chromium, url, options)
if err != nil {
return fmt.Errorf("HTML screenshot: %w", err)
}
return nil
},
}
}
// convertMarkdownRoute returns an [api.Route] which can convert markdown files
// to PDF.
func convertMarkdownRoute(chromium Api, engine gotenberg.PdfEngine) api.Route {
return api.Route{
Method: http.MethodPost,
Path: "/forms/chromium/convert/markdown",
IsMultipart: true,
Handler: func(c echo.Context) error {
ctx := c.Get("context").(*api.Context)
form, options := FormDataChromiumPdfOptions(ctx)
mode := pdfengines.FormDataPdfSplitMode(form, false)
pdfFormats := pdfengines.FormDataPdfFormats(form)
metadata := pdfengines.FormDataPdfMetadata(form, false)
encrypt := pdfengines.FormDataPdfEncrypt(form)
embedPaths := pdfengines.FormDataPdfEmbeds(form)
watermark := pdfengines.FormDataPdfWatermark(form, false)
watermarkFile := pdfengines.FormDataPdfWatermarkFile(form)
stamp := pdfengines.FormDataPdfStamp(form, false)
stampFile := pdfengines.FormDataPdfStampFile(form)
rotateAngle, rotatePages := pdfengines.FormDataPdfRotate(form, false)
embedsMetadata := pdfengines.FormDataPdfEmbedsMetadata(form)
facturX, facturxXmlPath := pdfengines.FormDataPdfFacturX(form)
var (
inputPath string
markdownPaths []string
)
err := form.
MandatoryPath("index.html", &inputPath).
MandatoryPaths([]string{".md"}, &markdownPaths).
Validate()
if err != nil {
return fmt.Errorf("validate form data: %w", err)
}
err = pdfengines.EnsureWatermarkFile(&watermark, watermarkFile)
if err != nil {
return fmt.Errorf("validate watermark: %w", err)
}
err = pdfengines.EnsureStampFile(&stamp, stampFile)
if err != nil {
return fmt.Errorf("validate stamp: %w", err)
}
url, err := markdownToHtml(ctx, inputPath, markdownPaths)
if err != nil {
return fmt.Errorf("transform markdown file(s) to HTML: %w", err)
}
options.AllowedFilePrefixes = []string{ctx.DirPath()}
err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata, encrypt, embedPaths, embedsMetadata, facturX, facturxXmlPath, watermark, stamp, rotateAngle, rotatePages)
if err != nil {
return fmt.Errorf("convert markdown to PDF: %w", err)
}
return nil
},
}
}
// screenshotMarkdownRoute returns an [api.Route] which can take a screenshot
// from Markdown files.
func screenshotMarkdownRoute(chromium Api) api.Route {
return api.Route{
Method: http.MethodPost,
Path: "/forms/chromium/screenshot/markdown",
IsMultipart: true,
Handler: func(c echo.Context) error {
ctx := c.Get("context").(*api.Context)
form, options := FormDataChromiumScreenshotOptions(ctx)
var (
inputPath string
markdownPaths []string
)
err := form.
MandatoryPath("index.html", &inputPath).
MandatoryPaths([]string{".md"}, &markdownPaths).
Validate()
if err != nil {
return fmt.Errorf("validate form data: %w", err)
}
url, err := markdownToHtml(ctx, inputPath, markdownPaths)
if err != nil {
return fmt.Errorf("transform markdown file(s) to HTML: %w", err)
}
options.AllowedFilePrefixes = []string{ctx.DirPath()}
err = screenshotUrl(ctx, chromium, url, options)
if err != nil {
return fmt.Errorf("markdown screenshot: %w", err)
}
return nil
},
}
}
func markdownToHtml(ctx *api.Context, inputPath string, markdownPaths []string) (string, error) {
// We have to convert each Markdown file referenced in the HTML
// file to... HTML. Thanks to the "html/template" package, we are
// able to provide the "toHTML" function which the user may call
// directly inside the HTML file.
var markdownFilesNotFoundErr error
tmpl, err := template.
New(filepath.Base(inputPath)).
Funcs(template.FuncMap{
"toHTML": func(filename string) (template.HTML, error) {
var path string
for _, markdownPath := range markdownPaths {
markdownFilename := ctx.OriginalFilename(markdownPath)
if filename == markdownFilename {
path = markdownPath
break
}
}
if path == "" {
markdownFilesNotFoundErr = errors.Join(
markdownFilesNotFoundErr,
fmt.Errorf("'%s'", filename),
)
return "", nil
}
b, err := os.ReadFile(path)
if err != nil {
return "", fmt.Errorf("read markdown file '%s': %w", filename, err)
}
unsafe := markdown.ToHTML(b, nil, nil)
sanitized := bluemonday.UGCPolicy().SanitizeBytes(unsafe)
// #nosec
return template.HTML(sanitized), nil
},
}).ParseFiles(inputPath)
if err != nil {
return "", fmt.Errorf("parse template file: %w", err)
}
var buffer bytes.Buffer
err = tmpl.Execute(&buffer, &struct{}{})
if err != nil {
return "", fmt.Errorf("execute template: %w", err)
}
if markdownFilesNotFoundErr != nil {
return "", api.WrapError(
fmt.Errorf("markdown files not found: %w", markdownFilesNotFoundErr),
api.NewSentinelHttpError(
http.StatusBadRequest,
fmt.Sprintf("Markdown file(s) not found: %s", markdownFilesNotFoundErr),
),
)
}
inputPath = ctx.GeneratePath(".html")
err = os.WriteFile(inputPath, buffer.Bytes(), 0o600)
if err != nil {
return "", fmt.Errorf("write template result: %w", err)
}
return fmt.Sprintf("file://%s", inputPath), nil
}
func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url string, options PdfOptions, mode gotenberg.SplitMode, pdfFormats gotenberg.PdfFormats, metadata map[string]any, encrypt gotenberg.EncryptOptions, embedPaths []string, embedsMetadata map[string]map[string]string, facturX gotenberg.FacturX, facturxXmlPath string, watermark, stamp gotenberg.Stamp, rotateAngle int, rotatePages string) error {
outputPath := ctx.GeneratePath(".pdf")
// See https://github.com/gotenberg/gotenberg/issues/1130.
filename := ctx.OutputFilename(outputPath)
outputPath = ctx.GeneratePathFromFilename(filename)
err := chromium.Pdf(ctx, ctx.Log(), url, outputPath, options)
err = handleChromiumError(err, options.Options)
if err != nil {
if errors.Is(err, ErrOmitBackgroundWithoutPrintBackground) {
return api.WrapError(
fmt.Errorf("convert to PDF: %w", err),
api.NewSentinelHttpError(
http.StatusBadRequest,
"omitBackground requires printBackground set to true",
),
)
}
if errors.Is(err, ErrPrintingFailed) {
return api.WrapError(
fmt.Errorf("convert to PDF: %w", err),
api.NewSentinelHttpError(
http.StatusBadRequest,
"Chromium failed to print the PDF; this usually happens when the page is too large",
),
)
}
if errors.Is(err, ErrInvalidPrinterSettings) {
return api.WrapError(
fmt.Errorf("convert to PDF: %w", err),
api.NewSentinelHttpError(
http.StatusBadRequest,
"Chromium does not handle the provided settings; please check for aberrant form values",
),
)
}
if errors.Is(err, ErrPageRangesExceedsPageCount) {
return api.WrapError(
fmt.Errorf("convert to PDF: %w", err),
api.NewSentinelHttpError(
http.StatusBadRequest,
fmt.Sprintf("The page ranges '%s' (nativePageRanges) exceeds the page count", options.PageRanges),
),
)
}
if errors.Is(err, ErrPageRangesSyntaxError) {
return api.WrapError(
fmt.Errorf("convert to PDF: %w", err),
api.NewSentinelHttpError(
http.StatusBadRequest,
fmt.Sprintf("Chromium does not handle the page ranges '%s' (nativePageRanges) syntax", options.PageRanges),
),
)
}
return fmt.Errorf("convert to PDF: %w", err)
}
err = pdfengines.ValidatePdfFormatsCompat(pdfFormats, encrypt.UserPassword, embedPaths)
if err != nil {
return err
}
err = pdfengines.ValidatePdfEncryptCompat(encrypt)
if err != nil {
return err
}
err = pdfengines.ValidateFacturXCompat(facturX, facturxXmlPath, pdfFormats)
if err != nil {
return err
}
outputPaths, err := pdfengines.SplitPdfStub(ctx, engine, mode, []string{outputPath})
if err != nil {
return fmt.Errorf("split PDF: %w", err)
}
err = pdfengines.WatermarkStub(ctx, engine, watermark, outputPaths)
if err != nil {
return fmt.Errorf("watermark PDFs: %w", err)
}
err = pdfengines.StampStub(ctx, engine, stamp, outputPaths)
if err != nil {
return fmt.Errorf("stamp PDFs: %w", err)
}
err = pdfengines.RotateStub(ctx, engine, rotateAngle, rotatePages, outputPaths)
if err != nil {
return fmt.Errorf("rotate PDFs: %w", err)
}
pdfFormats = pdfengines.FacturXPdfFormats(ctx, engine, facturX, pdfFormats, true, nil)
convertOutputPaths, err := pdfengines.ConvertStub(ctx, engine, pdfFormats, outputPaths)
if err != nil {
return fmt.Errorf("convert PDF(s): %w", err)
}
// Metadata, embeds are written after Convert, as LibreOffice
// strips them during PDF/A conversion.
err = pdfengines.WriteMetadataStub(ctx, engine, metadata, convertOutputPaths)
if err != nil {
return fmt.Errorf("write metadata: %w", err)
}
err = pdfengines.EmbedFilesStub(ctx, engine, embedPaths, convertOutputPaths)
if err != nil {
return fmt.Errorf("embed files into PDFs: %w", err)
}
err = pdfengines.EmbedFilesMetadataStub(ctx, engine, embedsMetadata, convertOutputPaths)
if err != nil {
return fmt.Errorf("set embeds metadata: %w", err)
}
err = pdfengines.ApplyFacturXStub(ctx, engine, facturX, facturxXmlPath, convertOutputPaths)
if err != nil {
return fmt.Errorf("apply Factur-X: %w", err)
}
err = pdfengines.EncryptPdfStub(ctx, engine, encrypt, convertOutputPaths)
if err != nil {
return fmt.Errorf("encrypt PDFs: %w", err)
}
zeroValuedSplitMode := gotenberg.SplitMode{}
zeroValuedPdfFormats := gotenberg.PdfFormats{}
if mode != zeroValuedSplitMode && pdfFormats != zeroValuedPdfFormats {
// The PDF has been split and split parts have been converted to a
// specific format. We want to keep the split naming.
for i, convertOutputPath := range convertOutputPaths {
err = ctx.Rename(convertOutputPath, outputPaths[i])
if err != nil {
return fmt.Errorf("rename output path: %w", err)
}
}
} else {
outputPaths = convertOutputPaths
}
err = ctx.AddOutputPaths(outputPaths...)
if err != nil {
return fmt.Errorf("add output paths: %w", err)
}
return nil
}
func screenshotUrl(ctx *api.Context, chromium Api, url string, options ScreenshotOptions) error {
ext := fmt.Sprintf(".%s", options.Format)
outputPath := ctx.GeneratePath(ext)
err := chromium.Screenshot(ctx, ctx.Log(), url, outputPath, options)
err = handleChromiumError(err, options.Options)
if err != nil {
return fmt.Errorf("screenshot: %w", err)
}
err = ctx.AddOutputPaths(outputPath)
if err != nil {
return fmt.Errorf("add output path: %w", err)
}
return nil
}
func handleChromiumError(err error, options Options) error {
if err == nil {
return nil
}
if errors.Is(err, ErrInvalidEvaluationExpression) {
if options.WaitForExpression == "" {
// We do not expect the 'waitWindowStatus' form field to return
// an ErrInvalidEvaluationExpression error. In such a scenario,
// we return a 500.
return err
}
return api.WrapError(
err,
api.NewSentinelHttpError(
http.StatusBadRequest,
fmt.Sprintf("The expression '%s' (waitForExpression) returned an exception or undefined", options.WaitForExpression),
),
)
}
if errors.Is(err, ErrInvalidSelectorQuery) {
if options.WaitForSelector == "" {
// We only expect to see this error if the user specified a selector.
// If they didn't and we still generated the error, return a 500.
return err
}
return api.WrapError(
err,
api.NewSentinelHttpError(
http.StatusBadRequest,
fmt.Sprintf("The selector '%s' (waitForSelector) returned an exception or undefined", options.WaitForSelector),
),
)
}
if errors.Is(err, ErrInvalidHttpStatusCode) {
return api.WrapError(
err,
api.NewSentinelHttpError(
http.StatusConflict,
fmt.Sprintf("Invalid HTTP status code from the main page: %s", strings.ReplaceAll(err.Error(), fmt.Sprintf(": %s", ErrInvalidHttpStatusCode.Error()), "")),
),
)
}
if errors.Is(err, ErrInvalidResourceHttpStatusCode) {
return api.WrapError(
err,
api.NewSentinelHttpError(
http.StatusConflict,
fmt.Sprintf("Invalid HTTP status code from resources:\n%s", strings.ReplaceAll(err.Error(), fmt.Sprintf(": %s", ErrInvalidResourceHttpStatusCode.Error()), "")),
),
)
}
if errors.Is(err, ErrConsoleExceptions) {
return api.WrapError(
err,
api.NewSentinelHttpError(
http.StatusConflict,
fmt.Sprintf("Chromium console exceptions:\n%s", strings.ReplaceAll(err.Error(), ErrConsoleExceptions.Error(), "")),
),
)
}
if errors.Is(err, ErrLoadingFailed) {
return api.WrapError(
err,
api.NewSentinelHttpError(
http.StatusBadRequest,
fmt.Sprintf("Chromium returned %v", err),
),
)
}
if errors.Is(err, ErrResourceLoadingFailed) {
return api.WrapError(
err,
api.NewSentinelHttpError(
http.StatusConflict,
fmt.Sprintf("Chromium failed to load resources: %v", strings.ReplaceAll(err.Error(), fmt.Sprintf(": %s", ErrResourceLoadingFailed.Error()), "")),
),
)
}
return err
}