mirror of
https://github.com/gotenberg/gotenberg.git
synced 2026-07-02 00:17:40 +08:00
704 lines
22 KiB
Go
704 lines
22 KiB
Go
package api
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"mime"
|
|
"mime/multipart"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
"github.com/hashicorp/go-retryablehttp"
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/mholt/archives"
|
|
"go.opentelemetry.io/otel"
|
|
"go.opentelemetry.io/otel/codes"
|
|
"go.opentelemetry.io/otel/propagation"
|
|
semconv "go.opentelemetry.io/otel/semconv/v1.40.0"
|
|
"go.opentelemetry.io/otel/trace"
|
|
"golang.org/x/sync/errgroup"
|
|
"golang.org/x/text/unicode/norm"
|
|
|
|
"github.com/gotenberg/gotenberg/v8/pkg/gotenberg"
|
|
)
|
|
|
|
var (
|
|
// ErrContextAlreadyClosed happens when the context has been canceled.
|
|
ErrContextAlreadyClosed = errors.New("context already closed")
|
|
|
|
// ErrOutOfBoundsOutputPath happens when an output path is not within
|
|
// context's working directory. It enforces having all the files in the
|
|
// same directory.
|
|
ErrOutOfBoundsOutputPath = errors.New("output path is not within context's working directory")
|
|
)
|
|
|
|
// Context is the request context for a "multipart/form-data" request.
|
|
type Context struct {
|
|
dirPath string
|
|
values map[string][]string
|
|
files map[string]string
|
|
filesByField map[string][]string
|
|
diskToOriginal map[string]string
|
|
outputPaths []string
|
|
cancelled bool
|
|
|
|
logger *slog.Logger
|
|
echoCtx echo.Context
|
|
mkdirAll gotenberg.MkdirAll
|
|
pathRename gotenberg.PathRename
|
|
context.Context
|
|
}
|
|
|
|
type trackingReader struct {
|
|
R io.Reader
|
|
AddReadBytes func(n int64) error
|
|
}
|
|
|
|
func (t *trackingReader) Read(p []byte) (int, error) {
|
|
n, err := t.R.Read(p)
|
|
if n > 0 {
|
|
errAddRead := t.AddReadBytes(int64(n))
|
|
if errAddRead != nil {
|
|
return n, fmt.Errorf("add read bytes: %w", errAddRead)
|
|
}
|
|
}
|
|
if err != nil {
|
|
// It's a common practice in Go to return io.EOF unwrapped to signal
|
|
// the end of a data stream. Wrapping it can lead to unexpected
|
|
// behavior in standard library functions.
|
|
return n, err
|
|
}
|
|
return n, nil
|
|
}
|
|
|
|
type downloadFrom struct {
|
|
// Url is the URL to download a file from.
|
|
Url string `json:"url"`
|
|
|
|
// ExtraHttpHeaders are the HTTP headers to send alongside.
|
|
ExtraHttpHeaders map[string]string `json:"extraHttpHeaders"`
|
|
|
|
// Embedded routes the downloaded file as an embed. Deprecated: use
|
|
// Field instead. Kept for backward compatibility.
|
|
Embedded bool `json:"embedded"`
|
|
|
|
// Field routes the downloaded file to a specific form field bucket.
|
|
// Supported values: "watermark", "stamp". For embeds, prefer the
|
|
// Embedded flag or set Field to "embedded".
|
|
Field string `json:"field"`
|
|
}
|
|
|
|
// newContext returns a [Context] by parsing a "multipart/form-data" request.
|
|
func newContext(echoCtx echo.Context, logger *slog.Logger, fs *gotenberg.FileSystem, timeout time.Duration, bodyLimit int64, downloadFromCfg downloadFromConfig) (*Context, context.CancelFunc, error) {
|
|
processCtx, processCancel := context.WithTimeout(echoCtx.Request().Context(), timeout)
|
|
|
|
// We want to make sure the multipart/form-data does not exceed a given
|
|
// limit. We consider: form fields (keys, values, files) and files
|
|
// downloaded remotely ("download from" feature).
|
|
var totalBytesRead atomic.Int64
|
|
|
|
addReadBytes := func(n int64) error {
|
|
newTotal := totalBytesRead.Add(n)
|
|
if bodyLimit != 0 && newTotal > bodyLimit {
|
|
return WrapError(
|
|
fmt.Errorf("body limit reached (> %d)", bodyLimit),
|
|
NewSentinelHttpError(http.StatusRequestEntityTooLarge, "The request body exceeds the configured size limit. Increase it with --api-body-limit, or send a smaller request."),
|
|
)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
ctx := &Context{
|
|
outputPaths: make([]string, 0),
|
|
cancelled: false,
|
|
logger: logger,
|
|
echoCtx: echoCtx,
|
|
mkdirAll: new(gotenberg.OsMkdirAll),
|
|
pathRename: new(gotenberg.OsPathRename),
|
|
Context: processCtx,
|
|
}
|
|
|
|
// A custom cancel function which removes the context's working directory
|
|
// when called.
|
|
cancel := func() context.CancelFunc {
|
|
return func() {
|
|
if ctx.cancelled {
|
|
return
|
|
}
|
|
|
|
processCancel()
|
|
|
|
if ctx.dirPath == "" {
|
|
return
|
|
}
|
|
|
|
err := os.RemoveAll(ctx.dirPath)
|
|
if err != nil {
|
|
ctx.logger.ErrorContext(context.Background(), fmt.Sprintf("remove context's working directory: %s", err))
|
|
|
|
return
|
|
}
|
|
|
|
ctx.logger.DebugContext(context.Background(), fmt.Sprintf("'%s' context's working directory removed", ctx.dirPath))
|
|
ctx.cancelled = true
|
|
}
|
|
}()
|
|
|
|
form, err := echoCtx.MultipartForm()
|
|
if err != nil {
|
|
if errors.Is(err, http.ErrNotMultipart) {
|
|
return nil, cancel, WrapError(
|
|
fmt.Errorf("get multipart form: %w", err),
|
|
NewSentinelHttpError(http.StatusUnsupportedMediaType, "Invalid 'Content-Type' header value: want 'multipart/form-data'"),
|
|
)
|
|
}
|
|
|
|
if errors.Is(err, http.ErrMissingBoundary) {
|
|
return nil, cancel, WrapError(
|
|
fmt.Errorf("get multipart form: %w", err),
|
|
NewSentinelHttpError(http.StatusUnsupportedMediaType, "Invalid 'Content-Type' header value: no boundary"),
|
|
)
|
|
}
|
|
|
|
if strings.Contains(err.Error(), io.EOF.Error()) {
|
|
return nil, cancel, WrapError(
|
|
fmt.Errorf("get multipart form: %w", err),
|
|
NewSentinelHttpError(http.StatusBadRequest, "Malformed body: it does not match the 'Content-Type' header boundaries"),
|
|
)
|
|
}
|
|
|
|
return nil, cancel, fmt.Errorf("get multipart form: %w", err)
|
|
}
|
|
|
|
// This will ensure we do not exceed the body limit.
|
|
var formValuesSize int64
|
|
for key, valArray := range form.Value {
|
|
formValuesSize += int64(len(key))
|
|
for _, val := range valArray {
|
|
formValuesSize += int64(len(val))
|
|
}
|
|
}
|
|
err = addReadBytes(formValuesSize)
|
|
if err != nil {
|
|
return nil, cancel, fmt.Errorf("add read bytes: %w", err)
|
|
}
|
|
|
|
dirPath, err := fs.MkdirAll()
|
|
if err != nil {
|
|
return nil, cancel, fmt.Errorf("create working directory: %w", err)
|
|
}
|
|
|
|
ctx.dirPath = dirPath
|
|
ctx.values = form.Value
|
|
ctx.files = make(map[string]string)
|
|
ctx.filesByField = make(map[string][]string)
|
|
ctx.diskToOriginal = make(map[string]string)
|
|
|
|
// First, try to download files listed in the "downloadFrom" form field, if
|
|
// any.
|
|
raw, ok := ctx.values["downloadFrom"]
|
|
if !downloadFromCfg.disable && ok {
|
|
var dls []downloadFrom
|
|
err = json.Unmarshal([]byte(raw[0]), &dls)
|
|
if err != nil {
|
|
return nil, cancel, WrapError(
|
|
fmt.Errorf("unmarshal json: %w", err),
|
|
NewSentinelHttpError(http.StatusBadRequest, fmt.Sprintf("Invalid 'downloadFrom' form field value: %s", err)),
|
|
)
|
|
}
|
|
|
|
// Each goroutine writes to its own results slot. The main
|
|
// goroutine merges into ctx.files, ctx.diskToOriginal, and
|
|
// ctx.filesByField after eg.Wait() to avoid concurrent map
|
|
// writes.
|
|
type downloadFromResult struct {
|
|
filename, path, formField string
|
|
}
|
|
results := make([]downloadFromResult, len(dls))
|
|
|
|
eg, _ := errgroup.WithContext(ctx)
|
|
for i, dl := range dls {
|
|
eg.Go(func() error {
|
|
deadline, ok := ctx.Deadline()
|
|
if !ok {
|
|
// Should not happen, as context is created with a timeout.
|
|
return errors.New("context has no deadline")
|
|
}
|
|
|
|
if strings.TrimSpace(dl.Url) == "" {
|
|
return WrapError(
|
|
errors.New("empty download from URL"),
|
|
NewSentinelHttpError(http.StatusBadRequest, fmt.Sprintf("Invalid 'downloadFrom' form field entry %d: URL must be set", i)),
|
|
)
|
|
}
|
|
|
|
ipOpts := []gotenberg.DecideOption{
|
|
gotenberg.WithDenyPrivateIPs(downloadFromCfg.denyPrivateIPs),
|
|
gotenberg.WithDenyPublicIPs(downloadFromCfg.denyPublicIPs),
|
|
}
|
|
err := gotenberg.FilterOutboundURL(ctx, dl.Url, downloadFromCfg.allowList, downloadFromCfg.denyList, deadline, ipOpts...)
|
|
if err != nil {
|
|
return fmt.Errorf("filter URL: %w", err)
|
|
}
|
|
|
|
dlCtx, dlSpan := gotenberg.Tracer().Start(ctx, "GET Download From",
|
|
trace.WithSpanKind(trace.SpanKindClient),
|
|
trace.WithAttributes(semconv.ServerAddress(dl.Url)),
|
|
)
|
|
|
|
logger.DebugContext(dlCtx, fmt.Sprintf("download file from '%s'", dl.Url))
|
|
|
|
req, err := retryablehttp.NewRequest(http.MethodGet, dl.Url, nil)
|
|
if err != nil {
|
|
dlSpan.RecordError(err)
|
|
dlSpan.SetStatus(codes.Error, err.Error())
|
|
dlSpan.End()
|
|
return fmt.Errorf("create request to '%s': %w", dl.Url, err)
|
|
}
|
|
|
|
req.Header.Set("User-Agent", "Gotenberg")
|
|
for key, value := range dl.ExtraHttpHeaders {
|
|
req.Header.Set(key, value)
|
|
}
|
|
|
|
// Inject OTEL trace context into outbound request.
|
|
otel.GetTextMapPropagator().Inject(dlCtx, propagation.HeaderCarrier(req.Header))
|
|
|
|
// Propagate correlation ID header.
|
|
if correlationIdHeader, ok := echoCtx.Get("correlationIdHeader").(string); ok {
|
|
if correlationId, ok := echoCtx.Get("correlationId").(string); ok {
|
|
req.Header.Set(correlationIdHeader, correlationId)
|
|
}
|
|
}
|
|
|
|
client := &retryablehttp.Client{
|
|
HTTPClient: gotenberg.NewOutboundHttpClient(time.Until(deadline), downloadFromCfg.allowList, downloadFromCfg.denyList, ipOpts...),
|
|
RetryMax: downloadFromCfg.maxRetry,
|
|
RetryWaitMin: time.Duration(1) * time.Second,
|
|
RetryWaitMax: time.Until(deadline),
|
|
Logger: gotenberg.NewLeveledLogger(logger),
|
|
CheckRetry: retryablehttp.DefaultRetryPolicy,
|
|
Backoff: retryablehttp.DefaultBackoff,
|
|
}
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
dlSpan.RecordError(err)
|
|
dlSpan.SetStatus(codes.Error, err.Error())
|
|
dlSpan.End()
|
|
return WrapError(
|
|
fmt.Errorf("download file from to '%s': %w", dl.Url, err),
|
|
NewSentinelHttpError(http.StatusBadRequest, fmt.Sprintf("Unable to download file from '%s': %s", dl.Url, err)),
|
|
)
|
|
}
|
|
defer func() {
|
|
err := resp.Body.Close()
|
|
if err != nil {
|
|
logger.ErrorContext(ctx, fmt.Sprintf("close response body from '%s': %s", dl.Url, err))
|
|
}
|
|
}()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
dlErr := fmt.Errorf("download file from to '%s': got status: '%s'", dl.Url, resp.Status)
|
|
dlSpan.RecordError(dlErr)
|
|
dlSpan.SetStatus(codes.Error, dlErr.Error())
|
|
dlSpan.End()
|
|
return WrapError(
|
|
dlErr,
|
|
NewSentinelHttpError(http.StatusBadRequest, fmt.Sprintf("Unable to download file from '%s': got status: '%s'", dl.Url, resp.Status)),
|
|
)
|
|
}
|
|
|
|
contentDisposition := resp.Header.Get("Content-Disposition")
|
|
if contentDisposition == "" {
|
|
dlErr := fmt.Errorf("no 'Content-Disposition' header from '%s'", dl.Url)
|
|
dlSpan.RecordError(dlErr)
|
|
dlSpan.SetStatus(codes.Error, dlErr.Error())
|
|
dlSpan.End()
|
|
return WrapError(
|
|
dlErr,
|
|
NewSentinelHttpError(http.StatusBadRequest, fmt.Sprintf("No 'Content-Disposition' header from '%s'", dl.Url)),
|
|
)
|
|
}
|
|
|
|
// FIXME: the implementation of this method might not be
|
|
// complete, as it fails to parse an empty mediatype.
|
|
// See: https://github.com/golang/go/issues/69551.
|
|
_, params, err := mime.ParseMediaType(contentDisposition)
|
|
if err != nil {
|
|
dlErr := fmt.Errorf("parse 'Content-Disposition' header '%s' from '%s': %w", contentDisposition, dl.Url, err)
|
|
dlSpan.RecordError(dlErr)
|
|
dlSpan.SetStatus(codes.Error, dlErr.Error())
|
|
dlSpan.End()
|
|
return WrapError(
|
|
dlErr,
|
|
NewSentinelHttpError(http.StatusBadRequest, fmt.Sprintf("Invalid 'Content-Disposition' header '%s' from '%s': %s", contentDisposition, dl.Url, err)),
|
|
)
|
|
}
|
|
|
|
filename, ok := params["filename"]
|
|
if !ok {
|
|
dlErr := fmt.Errorf("get filename from 'Content-Disposition' header '%s' from '%s'", contentDisposition, dl.Url)
|
|
dlSpan.RecordError(dlErr)
|
|
dlSpan.SetStatus(codes.Error, dlErr.Error())
|
|
dlSpan.End()
|
|
return WrapError(
|
|
dlErr,
|
|
NewSentinelHttpError(http.StatusBadRequest, fmt.Sprintf("Invalid 'Content-Disposition' header '%s' from '%s': no filename", contentDisposition, dl.Url)),
|
|
)
|
|
}
|
|
|
|
// Strip path separators (including backslashes) and control
|
|
// characters, then NFC-normalize. Defends against directory
|
|
// traversal in the on-disk name and Windows-side Zip Slip
|
|
// when the original filename is later embedded in an output
|
|
// zip entry.
|
|
// See: https://github.com/gotenberg/gotenberg/issues/662.
|
|
filename = sanitizeFilename(filename)
|
|
|
|
// Use a UUID-based name on disk to avoid filesystem
|
|
// NAME_MAX limits with long filenames.
|
|
// See: https://github.com/gotenberg/gotenberg/issues/1500.
|
|
safeName := uuid.New().String() + filepath.Ext(filename)
|
|
path := fmt.Sprintf("%s/%s", ctx.dirPath, safeName)
|
|
|
|
out, err := os.Create(path)
|
|
if err != nil {
|
|
dlErr := fmt.Errorf("create local file: %w", err)
|
|
dlSpan.RecordError(dlErr)
|
|
dlSpan.SetStatus(codes.Error, dlErr.Error())
|
|
dlSpan.End()
|
|
return dlErr
|
|
}
|
|
defer func() {
|
|
err := out.Close()
|
|
if err != nil {
|
|
logger.ErrorContext(ctx, fmt.Sprintf("close local file: %s", err))
|
|
}
|
|
}()
|
|
|
|
// This will ensure we do not exceed the body limit.
|
|
reader := &trackingReader{R: resp.Body, AddReadBytes: addReadBytes}
|
|
|
|
_, err = io.Copy(out, reader)
|
|
if err != nil {
|
|
dlErr := fmt.Errorf("copy downloaded file from '%s' to local file: %w", dl.Url, err)
|
|
dlSpan.RecordError(dlErr)
|
|
dlSpan.SetStatus(codes.Error, dlErr.Error())
|
|
dlSpan.End()
|
|
return dlErr
|
|
}
|
|
|
|
dlSpan.SetStatus(codes.Ok, "")
|
|
dlSpan.End()
|
|
|
|
var formField string
|
|
switch {
|
|
case dl.Field == "embedded" || dl.Embedded:
|
|
formField = EmbedsFormField
|
|
case dl.Field == "watermark":
|
|
formField = WatermarkFormField
|
|
case dl.Field == "stamp":
|
|
formField = StampFormField
|
|
}
|
|
results[i] = downloadFromResult{filename: filename, path: path, formField: formField}
|
|
|
|
return nil
|
|
})
|
|
}
|
|
|
|
err = eg.Wait()
|
|
if err != nil {
|
|
return ctx, cancel, err
|
|
}
|
|
|
|
for _, r := range results {
|
|
ctx.files[r.filename] = r.path
|
|
ctx.diskToOriginal[r.path] = r.filename
|
|
if r.formField != "" {
|
|
ctx.filesByField[r.formField] = append(ctx.filesByField[r.formField], r.path)
|
|
}
|
|
}
|
|
}
|
|
|
|
copyToDisk := func(fh *multipart.FileHeader) error {
|
|
in, err := fh.Open()
|
|
if err != nil {
|
|
return fmt.Errorf("open multipart file: %w", err)
|
|
}
|
|
|
|
defer func() {
|
|
err := in.Close()
|
|
if err != nil {
|
|
logger.ErrorContext(context.Background(), fmt.Sprintf("close file header: %s", err))
|
|
}
|
|
}()
|
|
|
|
// This will ensure we do not exceed the body limit.
|
|
reader := &trackingReader{R: in, AddReadBytes: addReadBytes}
|
|
|
|
// Strip path separators (including backslashes) and control
|
|
// characters, then NFC-normalize. Defends against directory
|
|
// traversal in the on-disk name and Windows-side Zip Slip when the
|
|
// original filename is later embedded in an output zip entry.
|
|
// See: https://github.com/gotenberg/gotenberg/issues/662.
|
|
filename := sanitizeFilename(fh.Filename)
|
|
|
|
// Use a UUID-based name on disk to avoid filesystem
|
|
// NAME_MAX limits with long filenames.
|
|
// See: https://github.com/gotenberg/gotenberg/issues/1500.
|
|
safeName := uuid.New().String() + filepath.Ext(filename)
|
|
path := fmt.Sprintf("%s/%s", ctx.dirPath, safeName)
|
|
|
|
out, err := os.Create(path)
|
|
if err != nil {
|
|
return fmt.Errorf("create local file: %w", err)
|
|
}
|
|
defer func() {
|
|
err := out.Close()
|
|
if err != nil {
|
|
logger.ErrorContext(context.Background(), fmt.Sprintf("close local file: %s", err))
|
|
}
|
|
}()
|
|
|
|
_, err = io.Copy(out, reader)
|
|
if err != nil {
|
|
return fmt.Errorf("copy multipart file to local file: %w", err)
|
|
}
|
|
|
|
ctx.files[filename] = path
|
|
ctx.diskToOriginal[path] = filename
|
|
|
|
return nil
|
|
}
|
|
|
|
// Then, copy the form files, if any.
|
|
for fieldName, files := range form.File {
|
|
for _, fh := range files {
|
|
err = copyToDisk(fh)
|
|
if err != nil {
|
|
return ctx, cancel, fmt.Errorf("copy to disk: %w", err)
|
|
}
|
|
// Track files by field name
|
|
filename := sanitizeFilename(fh.Filename)
|
|
filePath := ctx.files[filename]
|
|
ctx.filesByField[fieldName] = append(ctx.filesByField[fieldName], filePath)
|
|
}
|
|
}
|
|
|
|
// Create symlinks from original filenames to UUID-based disk names
|
|
// so that relative asset references (e.g., <img src="image.png">)
|
|
// resolve correctly when Chromium navigates to a file:// URL.
|
|
// Symlink creation is best-effort: it may fail for filenames that
|
|
// exceed the filesystem NAME_MAX limit (the reason UUIDs were
|
|
// introduced in the first place).
|
|
for originalName, diskPath := range ctx.files {
|
|
symlinkPath := fmt.Sprintf("%s/%s", ctx.dirPath, originalName)
|
|
if symlinkPath == diskPath {
|
|
continue
|
|
}
|
|
err = os.Symlink(filepath.Base(diskPath), symlinkPath)
|
|
if err != nil {
|
|
logger.DebugContext(context.Background(), fmt.Sprintf("skip symlink for '%s': %s", originalName, err))
|
|
}
|
|
}
|
|
|
|
ctx.Log().DebugContext(ctx, fmt.Sprintf("form fields: %+v", ctx.values))
|
|
ctx.Log().DebugContext(ctx, fmt.Sprintf("form files: %+v", ctx.files))
|
|
ctx.Log().DebugContext(ctx, fmt.Sprintf("form files by field: %+v", ctx.filesByField))
|
|
ctx.Log().DebugContext(ctx, fmt.Sprintf("total bytes: %d", totalBytesRead.Load()))
|
|
|
|
return ctx, cancel, err
|
|
}
|
|
|
|
// Request returns the [http.Request].
|
|
func (ctx *Context) Request() *http.Request {
|
|
return ctx.echoCtx.Request()
|
|
}
|
|
|
|
// FormData return a [FormData].
|
|
func (ctx *Context) FormData() *FormData {
|
|
return &FormData{
|
|
values: ctx.values,
|
|
files: ctx.files,
|
|
filesByField: ctx.filesByField,
|
|
diskToOriginal: ctx.diskToOriginal,
|
|
errors: nil,
|
|
}
|
|
}
|
|
|
|
// FileCount returns the number of files received in the request.
|
|
func (ctx *Context) FileCount() int {
|
|
return len(ctx.files)
|
|
}
|
|
|
|
// OriginalFilename returns the original filename associated with a disk path.
|
|
// If no mapping exists, it falls back to [filepath.Base].
|
|
func (ctx *Context) OriginalFilename(diskPath string) string {
|
|
if original, ok := ctx.diskToOriginal[diskPath]; ok {
|
|
return original
|
|
}
|
|
return filepath.Base(diskPath)
|
|
}
|
|
|
|
// RegisterDiskPath associates a disk path with an original filename so that
|
|
// [Context.OriginalFilename] can resolve it later.
|
|
func (ctx *Context) RegisterDiskPath(diskPath, originalFilename string) {
|
|
ctx.diskToOriginal[diskPath] = originalFilename
|
|
}
|
|
|
|
// DirPath returns the path to the request's working directory.
|
|
func (ctx *Context) DirPath() string {
|
|
return ctx.dirPath
|
|
}
|
|
|
|
// GeneratePath generates a path within the context's working directory.
|
|
// It generates a new UUID-based filename. It does not create a file.
|
|
func (ctx *Context) GeneratePath(extension string) string {
|
|
return fmt.Sprintf("%s/%s%s", ctx.dirPath, uuid.New().String(), extension)
|
|
}
|
|
|
|
// GeneratePathFromFilename generates a path within the context's working
|
|
// directory. It uses a UUID-based name on disk to avoid filesystem NAME_MAX
|
|
// limits but registers the given filename so that [Context.OriginalFilename]
|
|
// can resolve it. It does not create a file.
|
|
func (ctx *Context) GeneratePathFromFilename(filename string) string {
|
|
safeName := uuid.New().String() + filepath.Ext(filename)
|
|
path := fmt.Sprintf("%s/%s", ctx.dirPath, safeName)
|
|
ctx.diskToOriginal[path] = filename
|
|
return path
|
|
}
|
|
|
|
// CreateSubDirectory creates a subdirectory within the context's working
|
|
// directory.
|
|
func (ctx *Context) CreateSubDirectory(dirName string) (string, error) {
|
|
path := fmt.Sprintf("%s/%s", ctx.dirPath, dirName)
|
|
err := ctx.mkdirAll.MkdirAll(path, 0o755)
|
|
if err != nil {
|
|
return "", fmt.Errorf("create sub-directory %s: %w", path, err)
|
|
}
|
|
return path, nil
|
|
}
|
|
|
|
// Rename is just a wrapper around [os.Rename], as we need to mock this
|
|
// behavior in our tests.
|
|
func (ctx *Context) Rename(oldpath, newpath string) error {
|
|
ctx.Log().DebugContext(ctx, fmt.Sprintf("rename %s to %s", oldpath, newpath))
|
|
err := ctx.pathRename.Rename(oldpath, newpath)
|
|
if err != nil {
|
|
return fmt.Errorf("rename path: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// AddOutputPaths adds the given paths. Those paths will be used later to build
|
|
// the output file.
|
|
func (ctx *Context) AddOutputPaths(paths ...string) error {
|
|
if ctx.cancelled {
|
|
return ErrContextAlreadyClosed
|
|
}
|
|
|
|
for _, path := range paths {
|
|
if !strings.HasPrefix(path, ctx.dirPath) {
|
|
return ErrOutOfBoundsOutputPath
|
|
}
|
|
|
|
ctx.outputPaths = append(ctx.outputPaths, path)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Log returns the context [slog.Logger].
|
|
func (ctx *Context) Log() *slog.Logger {
|
|
return ctx.logger
|
|
}
|
|
|
|
// BuildOutputFile builds the output file according to the output paths
|
|
// registered in the context. If many output paths, an archive is created.
|
|
func (ctx *Context) BuildOutputFile() (string, error) {
|
|
if ctx.cancelled {
|
|
return "", ErrContextAlreadyClosed
|
|
}
|
|
|
|
if len(ctx.outputPaths) == 0 {
|
|
return "", errors.New("no output path")
|
|
}
|
|
|
|
if len(ctx.outputPaths) == 1 {
|
|
ctx.logger.DebugContext(ctx, fmt.Sprintf("only one output file '%s', skip archive creation", ctx.outputPaths[0]))
|
|
return ctx.outputPaths[0], nil
|
|
}
|
|
|
|
filesInfo, err := archives.FilesFromDisk(ctx.Context, nil, func() map[string]string {
|
|
f := make(map[string]string)
|
|
for _, outputPath := range ctx.outputPaths {
|
|
f[outputPath] = ctx.OriginalFilename(outputPath)
|
|
}
|
|
return f
|
|
}())
|
|
if err != nil {
|
|
return "", fmt.Errorf("create files info: %w", err)
|
|
}
|
|
|
|
archivePath := ctx.GeneratePath(".zip")
|
|
out, err := os.Create(archivePath)
|
|
if err != nil {
|
|
return "", fmt.Errorf("create zip file: %w", err)
|
|
}
|
|
defer func(out *os.File) {
|
|
err := out.Close()
|
|
if err != nil {
|
|
ctx.logger.ErrorContext(ctx, fmt.Sprintf("close zip file: %s", err))
|
|
}
|
|
}(out)
|
|
|
|
err = archives.Zip{}.Archive(ctx.Context, out, filesInfo)
|
|
if err != nil {
|
|
return "", fmt.Errorf("archive output files: %w", err)
|
|
}
|
|
|
|
ctx.logger.DebugContext(ctx, fmt.Sprintf("archive '%s' created", archivePath))
|
|
|
|
return archivePath, nil
|
|
}
|
|
|
|
// OutputFilename returns the filename based on the given output path or the
|
|
// "Gotenberg-Output-Filename" header's value.
|
|
func (ctx *Context) OutputFilename(outputPath string) string {
|
|
filename := ctx.echoCtx.Get("outputFilename").(string)
|
|
|
|
if filename == "" {
|
|
return ctx.OriginalFilename(outputPath)
|
|
}
|
|
|
|
return fmt.Sprintf("%s%s", filename, filepath.Ext(outputPath))
|
|
}
|
|
|
|
// sanitizeFilename strips path separators (including backslashes, which
|
|
// [filepath.Base] ignores on Linux) and control characters from a
|
|
// caller-supplied filename, then NFC-normalizes the result. This prevents a
|
|
// Windows-side Zip Slip when an output zip is extracted by a permissive
|
|
// extractor that interprets '\' as a path separator.
|
|
func sanitizeFilename(name string) string {
|
|
if i := strings.LastIndexAny(name, `/\`); i >= 0 {
|
|
name = name[i+1:]
|
|
}
|
|
name = strings.Map(func(r rune) rune {
|
|
if r < 0x20 || r == 0x7f {
|
|
return -1
|
|
}
|
|
return r
|
|
}, name)
|
|
return norm.NFC.String(name)
|
|
}
|