Files
gotenberg/pkg/modules/exiftool/exiftool.go
T
2026-03-18 04:46:12 +01:00

275 lines
9.2 KiB
Go

package exiftool
import (
"context"
"errors"
"fmt"
"os"
"os/exec"
"reflect"
"strings"
"syscall"
"github.com/barasher/go-exiftool"
"go.uber.org/zap"
"github.com/gotenberg/gotenberg/v8/pkg/gotenberg"
)
func init() {
gotenberg.MustRegisterModule(new(ExifTool))
}
// ExifTool abstracts the CLI tool ExifTool and implements the
// [gotenberg.PdfEngine] interface.
type ExifTool struct {
binPath string
}
// Descriptor returns [ExifTool]'s module descriptor.
func (engine *ExifTool) Descriptor() gotenberg.ModuleDescriptor {
return gotenberg.ModuleDescriptor{
ID: "exiftool",
New: func() gotenberg.Module { return new(ExifTool) },
}
}
// Provision sets the module properties.
func (engine *ExifTool) Provision(ctx *gotenberg.Context) error {
binPath, ok := os.LookupEnv("EXIFTOOL_BIN_PATH")
if !ok {
return errors.New("EXIFTOOL_BIN_PATH environment variable is not set")
}
engine.binPath = binPath
return nil
}
// Validate validates the module properties.
func (engine *ExifTool) Validate() error {
_, err := os.Stat(engine.binPath)
if os.IsNotExist(err) {
return fmt.Errorf("ExifTool binary path does not exist: %w", err)
}
return nil
}
// Debug returns additional debug data.
func (engine *ExifTool) Debug() map[string]any {
debug := make(map[string]any)
cmd := exec.Command(engine.binPath, "-ver") //nolint:gosec
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
output, err := cmd.Output()
if err != nil {
debug["version"] = err.Error()
return debug
}
debug["version"] = strings.TrimSpace(string(output))
return debug
}
// Merge is not available in this implementation.
func (engine *ExifTool) Merge(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error {
return fmt.Errorf("merge PDFs with ExifTool: %w", gotenberg.ErrPdfEngineMethodNotSupported)
}
// Split is not available in this implementation.
func (engine *ExifTool) Split(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) {
return nil, fmt.Errorf("split PDF with ExifTool: %w", gotenberg.ErrPdfEngineMethodNotSupported)
}
// Flatten is not available in this implementation.
func (engine *ExifTool) Flatten(ctx context.Context, logger *zap.Logger, inputPath string) error {
return fmt.Errorf("flatten PDF with ExifTool: %w", gotenberg.ErrPdfEngineMethodNotSupported)
}
// Convert is not available in this implementation.
func (engine *ExifTool) Convert(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error {
return fmt.Errorf("convert PDF to '%+v' with ExifTool: %w", formats, gotenberg.ErrPdfEngineMethodNotSupported)
}
// ReadMetadata extracts the metadata of a given PDF file.
func (engine *ExifTool) ReadMetadata(ctx context.Context, logger *zap.Logger, inputPath string) (map[string]any, error) {
exifTool, err := exiftool.NewExiftool(exiftool.SetExiftoolBinaryPath(engine.binPath))
if err != nil {
return nil, fmt.Errorf("new ExifTool: %w", err)
}
defer func(exifTool *exiftool.Exiftool) {
err := exifTool.Close()
if err != nil {
logger.Error(fmt.Sprintf("close ExifTool: %v", err))
}
}(exifTool)
fileMetadata := exifTool.ExtractMetadata(inputPath)
if fileMetadata[0].Err != nil {
return nil, fmt.Errorf("read metadata with ExitfTool: %w", fileMetadata[0].Err)
}
return fileMetadata[0].Fields, nil
}
// WriteMetadata writes the metadata into a given PDF file.
func (engine *ExifTool) WriteMetadata(ctx context.Context, logger *zap.Logger, metadata map[string]any, inputPath string) error {
exifTool, err := exiftool.NewExiftool(exiftool.SetExiftoolBinaryPath(engine.binPath))
if err != nil {
return fmt.Errorf("new ExifTool: %w", err)
}
defer func(exifTool *exiftool.Exiftool) {
err := exifTool.Close()
if err != nil {
logger.Error(fmt.Sprintf("close ExifTool: %v", err))
}
}(exifTool)
fileMetadata := exifTool.ExtractMetadata(inputPath)
if fileMetadata[0].Err != nil {
return fmt.Errorf("read metadata with ExitfTool: %w", fileMetadata[0].Err)
}
// Define a list of derived, system, or computed tags that ExifTool
// extracts but should never be written back. Writing these can break PDF/A
// compliance (e.g., PageCount -> prism:pageCount) or cause side effects
// (e.g., FileModifyDate).
derivedTags := []string{
"PageCount", // Causes prism:pageCount injection
"Linearized", // Computed status; writing it may invalidate structure
"PDFVersion", // Header version; should not be manually forced via metadata
"MIMEType", // Read-only derived
"FileType", // Read-only derived
"FileTypeExtension", // Read-only derived
"FileSize", // System attribute
"FileModifyDate", // System attribute
"FileAccessDate", // System attribute
"FileInodeChangeDate", // System attribute
"FilePermissions", // System attribute
"FileName", // Writing this triggers a file rename in ExifTool
"Directory", // System attribute
"ExifToolVersion", // Tool metadata
"Error", // Extraction error messages
"Warning", // Extraction warning messages
}
for _, tag := range derivedTags {
delete(fileMetadata[0].Fields, tag)
}
for key, value := range metadata {
switch val := value.(type) {
case string:
fileMetadata[0].SetString(key, val)
case []string:
fileMetadata[0].SetStrings(key, val)
case []any:
// See https://github.com/gotenberg/gotenberg/issues/1048.
strs := make([]string, len(val))
for i, entry := range val {
if str, ok := entry.(string); ok {
strs[i] = str
continue
}
return fmt.Errorf("write PDF metadata with ExifTool: %s %+v %s %w", key, val, reflect.TypeFor[[]any](), gotenberg.ErrPdfEngineMetadataValueNotSupported)
}
fileMetadata[0].SetStrings(key, strs)
case bool:
fileMetadata[0].SetString(key, fmt.Sprintf("%t", val))
case int:
fileMetadata[0].SetInt(key, int64(val))
case int64:
fileMetadata[0].SetInt(key, val)
case float32:
fileMetadata[0].SetFloat(key, float64(val))
case float64:
fileMetadata[0].SetFloat(key, val)
// TODO: support more complex cases, e.g., arrays and nested objects
// (limitations in underlying library).
default:
return fmt.Errorf("write PDF metadata with ExifTool: %s %+v %s %w", key, val, reflect.TypeOf(val), gotenberg.ErrPdfEngineMetadataValueNotSupported)
}
}
exifTool.WriteMetadata(fileMetadata)
if fileMetadata[0].Err != nil {
return fmt.Errorf("write PDF metadata with ExifTool: %w", fileMetadata[0].Err)
}
return nil
}
// PageCount returns the number of pages in a PDF file using ExifTool.
func (engine *ExifTool) PageCount(ctx context.Context, logger *zap.Logger, inputPath string) (int, error) {
metadata, err := engine.ReadMetadata(ctx, logger, inputPath)
if err != nil {
return 0, fmt.Errorf("read metadata with ExifTool: %w", err)
}
pageCountValue, ok := metadata["PageCount"]
if !ok {
return 0, errors.New("PageCount not found in metadata")
}
switch val := pageCountValue.(type) {
case int:
return val, nil
case int64:
return int(val), nil
case float64:
return int(val), nil
case string:
var res int
_, err := fmt.Sscanf(val, "%d", &res)
if err != nil {
return 0, fmt.Errorf("parse PageCount string '%s': %w", val, err)
}
return res, nil
default:
return 0, fmt.Errorf("unexpected PageCount type '%T'", pageCountValue)
}
}
// WriteBookmarks is not available in this implementation.
func (engine *ExifTool) WriteBookmarks(ctx context.Context, logger *zap.Logger, inputPath string, bookmarks []gotenberg.Bookmark) error {
return fmt.Errorf("write PDF bookmarks with ExifTool: %w", gotenberg.ErrPdfEngineMethodNotSupported)
}
// ReadBookmarks is not available in this implementation.
func (engine *ExifTool) ReadBookmarks(ctx context.Context, logger *zap.Logger, inputPath string) ([]gotenberg.Bookmark, error) {
return nil, fmt.Errorf("read PDF bookmarks with ExifTool: %w", gotenberg.ErrPdfEngineMethodNotSupported)
}
// Encrypt is not available in this implementation.
func (engine *ExifTool) Encrypt(ctx context.Context, logger *zap.Logger, inputPath, userPassword, ownerPassword string) error {
return fmt.Errorf("encrypt PDF using ExifTool: %w", gotenberg.ErrPdfEncryptionNotSupported)
}
// EmbedFiles is not available in this implementation.
func (engine *ExifTool) EmbedFiles(ctx context.Context, logger *zap.Logger, filePaths []string, inputPath string) error {
return fmt.Errorf("embed files with ExifTool: %w", gotenberg.ErrPdfEngineMethodNotSupported)
}
// Watermark is not available in this implementation.
func (engine *ExifTool) Watermark(ctx context.Context, logger *zap.Logger, inputPath string, stamp gotenberg.Stamp) error {
return fmt.Errorf("watermark PDF with ExifTool: %w", gotenberg.ErrPdfEngineMethodNotSupported)
}
// Stamp is not available in this implementation.
func (engine *ExifTool) Stamp(ctx context.Context, logger *zap.Logger, inputPath string, stamp gotenberg.Stamp) error {
return fmt.Errorf("stamp PDF with ExifTool: %w", gotenberg.ErrPdfEngineMethodNotSupported)
}
// Interface guards.
var (
_ gotenberg.Module = (*ExifTool)(nil)
_ gotenberg.Provisioner = (*ExifTool)(nil)
_ gotenberg.Validator = (*ExifTool)(nil)
_ gotenberg.Debuggable = (*ExifTool)(nil)
_ gotenberg.PdfEngine = (*ExifTool)(nil)
)