mirror of
https://codeberg.org/listyantidewi/your-everyday-tools.git
synced 2026-07-01 23:17:37 +08:00
1165 lines
44 KiB
Python
1165 lines
44 KiB
Python
import io
|
|
from flask import Blueprint, render_template, request, send_file, jsonify
|
|
from utils.file_utils import make_zip
|
|
from utils.pymupdf import import_pymupdf
|
|
from routes._helpers import safe_int, safe_float, log_error, NO_FILE_SINGLE, NO_FILE_MULTIPLE
|
|
|
|
fitz = import_pymupdf()
|
|
|
|
bp = Blueprint("pdf", __name__)
|
|
|
|
|
|
def _open_pdf(data: bytes):
|
|
"""Open a PDF from bytes, raising a friendly ValueError on failure."""
|
|
try:
|
|
return fitz.open(stream=data, filetype="pdf")
|
|
except Exception as e:
|
|
log_error(e, "fitz.open")
|
|
raise ValueError("Could not open PDF (the file may be corrupted, encrypted, or not a PDF).")
|
|
|
|
|
|
# ── Page Routes ──────────────────────────────────
|
|
|
|
@bp.route("/merge")
|
|
def merge_page():
|
|
return render_template("upload_tool.html",
|
|
title="Merge PDFs",
|
|
description="Combine multiple PDF files into one document",
|
|
endpoint="/pdf/merge",
|
|
accept=".pdf",
|
|
multiple=True,
|
|
options=[])
|
|
|
|
|
|
@bp.route("/split")
|
|
def split_page():
|
|
return render_template("upload_tool.html",
|
|
title="Split PDF",
|
|
description="Split a PDF into individual pages or custom ranges",
|
|
endpoint="/pdf/split",
|
|
accept=".pdf",
|
|
multiple=False,
|
|
options=[
|
|
{"type": "text", "name": "pages", "label": "Page ranges (leave empty for all pages)",
|
|
"placeholder": "e.g. 1-3, 5, 7-10"},
|
|
])
|
|
|
|
|
|
@bp.route("/compress")
|
|
def compress_page():
|
|
return render_template("upload_tool.html",
|
|
title="Compress PDF",
|
|
description="Reduce PDF file size by compressing images and cleaning up",
|
|
notes=(
|
|
'<p><strong>How compression works:</strong> embedded images are re-encoded as JPEG '
|
|
'at a lower quality, downscaled if larger than a per-level cap (1200/1800/2400 px), '
|
|
'and the PDF\'s internal cross-reference table is cleaned up.</p>'
|
|
'<p><strong>Best results on:</strong> photo-heavy PDFs (scanned reports, brochures, '
|
|
'photo books). <strong>Minimal savings on:</strong> text-only PDFs — they\'re already '
|
|
'tiny because text compresses well in PDF natively.</p>'
|
|
'<ul style="margin:.4rem 0 .6rem 1.2rem">'
|
|
'<li><strong>Maximum compression</strong> — JPEG quality 40, max image edge 1200px. '
|
|
'Best size, visible image quality loss.</li>'
|
|
'<li><strong>Medium</strong> — JPEG quality 65, max 1800px. Good balance for most use.</li>'
|
|
'<li><strong>Minimal</strong> — JPEG quality 85, max 2400px. Slightly smaller, hardly any loss.</li>'
|
|
'</ul>'
|
|
'<p style="font-size:.9em;color:var(--muted)">Image positions, sizes, and rotation '
|
|
'are preserved exactly — we replace each image in-place rather than re-flowing the page.</p>'
|
|
),
|
|
endpoint="/pdf/compress",
|
|
accept=".pdf",
|
|
multiple=False,
|
|
options=[
|
|
{"type": "select", "name": "quality", "label": "Compression Level",
|
|
"choices": [
|
|
{"value": "medium", "label": "Medium (good balance)"},
|
|
{"value": "low", "label": "Maximum compression"},
|
|
{"value": "high", "label": "Minimal compression"},
|
|
]},
|
|
])
|
|
|
|
|
|
@bp.route("/rotate")
|
|
def rotate_page():
|
|
return render_template("upload_tool.html",
|
|
title="Rotate PDF",
|
|
description="Rotate all or specific pages of a PDF",
|
|
endpoint="/pdf/rotate",
|
|
accept=".pdf",
|
|
multiple=False,
|
|
options=[
|
|
{"type": "select", "name": "angle", "label": "Rotation Angle",
|
|
"choices": [
|
|
{"value": "90", "label": "90° Clockwise"},
|
|
{"value": "180", "label": "180°"},
|
|
{"value": "270", "label": "90° Counter-clockwise"},
|
|
]},
|
|
{"type": "text", "name": "pages", "label": "Pages to rotate (leave empty for all)",
|
|
"placeholder": "e.g. 1, 3, 5-7"},
|
|
])
|
|
|
|
|
|
@bp.route("/resize")
|
|
def resize_page():
|
|
return render_template("upload_tool.html",
|
|
title="Resize PDF",
|
|
description="Change the page dimensions of a PDF",
|
|
endpoint="/pdf/resize",
|
|
accept=".pdf",
|
|
multiple=False,
|
|
options=[
|
|
{"type": "select", "name": "mode", "label": "Resize Mode",
|
|
"choices": [
|
|
{"value": "scale", "label": "Scale by percentage"},
|
|
{"value": "paper", "label": "Standard paper size"},
|
|
]},
|
|
{"type": "number", "name": "scale", "label": "Scale (%)", "default": 100, "min": 10, "max": 500,
|
|
"depends_on": {"mode": "scale"}},
|
|
{"type": "select", "name": "paper", "label": "Paper Size",
|
|
"choices": [
|
|
{"value": "a4", "label": "A4 (210 x 297 mm)"},
|
|
{"value": "letter", "label": "Letter (8.5 x 11 in)"},
|
|
{"value": "a3", "label": "A3 (297 x 420 mm)"},
|
|
{"value": "a5", "label": "A5 (148 x 210 mm)"},
|
|
{"value": "legal", "label": "Legal (8.5 x 14 in)"},
|
|
],
|
|
"depends_on": {"mode": "paper"}},
|
|
])
|
|
|
|
|
|
@bp.route("/page-numbers")
|
|
def page_numbers_page():
|
|
return render_template("upload_tool.html",
|
|
title="Add Page Numbers",
|
|
description="Add page numbers to each page of a PDF",
|
|
endpoint="/pdf/page-numbers",
|
|
accept=".pdf",
|
|
multiple=False,
|
|
options=[
|
|
{"type": "select", "name": "position", "label": "Position",
|
|
"choices": [
|
|
{"value": "bottom-center", "label": "Bottom Center"},
|
|
{"value": "bottom-right", "label": "Bottom Right"},
|
|
{"value": "bottom-left", "label": "Bottom Left"},
|
|
{"value": "top-center", "label": "Top Center"},
|
|
{"value": "top-right", "label": "Top Right"},
|
|
{"value": "top-left", "label": "Top Left"},
|
|
]},
|
|
{"type": "number", "name": "start", "label": "Start number", "default": 1, "min": 0},
|
|
{"type": "number", "name": "fontsize", "label": "Font size", "default": 11, "min": 6, "max": 30},
|
|
])
|
|
|
|
|
|
@bp.route("/extract-images")
|
|
def extract_images_page():
|
|
return render_template("upload_tool.html",
|
|
title="Extract Images",
|
|
description="Extract all images embedded in a PDF file",
|
|
notes=(
|
|
'<p><strong>What you get:</strong> every embedded raster image (PNG / JPEG / TIFF) '
|
|
'found in the PDF, downloaded as a ZIP. Vector graphics (lines, paths, drawn shapes) '
|
|
'are <strong>not</strong> exported as images — they\'re part of the page itself, not '
|
|
'separate image objects.</p>'
|
|
'<p style="font-size:.9em;color:var(--muted)">For scanned PDFs you usually get one '
|
|
'large image per page. For rendered text PDFs with figures, you get just the figures. '
|
|
'If you need a screenshot of the whole page, use <a href="/convert/pdf-to-images">PDF to Images</a>.</p>'
|
|
),
|
|
endpoint="/pdf/extract-images",
|
|
accept=".pdf",
|
|
multiple=False,
|
|
options=[])
|
|
|
|
|
|
@bp.route("/protect")
|
|
def protect_page():
|
|
return render_template("upload_tool.html",
|
|
title="Protect PDF",
|
|
description="Add password protection to a PDF file",
|
|
notes=(
|
|
'<p><strong>Encryption:</strong> AES-256, the strongest standard PDF encryption. '
|
|
'Required to open and to print/copy.</p>'
|
|
'<p><strong>User vs Owner password:</strong></p>'
|
|
'<ul style="margin:.4rem 0 .6rem 1.2rem">'
|
|
'<li><strong>User password</strong> — required to open the PDF. Without it, the PDF cannot be viewed.</li>'
|
|
'<li><strong>Owner password</strong> — controls editing/printing/copying restrictions. Leave blank to use the same as the user password.</li>'
|
|
'</ul>'
|
|
'<p style="font-size:.9em;color:var(--muted)"><strong>Important:</strong> there is '
|
|
'no recovery — if you forget the password, the PDF stays locked. We allow printing '
|
|
'and copying for password-holders by default.</p>'
|
|
),
|
|
endpoint="/pdf/protect",
|
|
accept=".pdf",
|
|
multiple=False,
|
|
options=[
|
|
{"type": "password", "name": "user_password", "label": "User Password (to open)",
|
|
"placeholder": "Enter password"},
|
|
{"type": "password", "name": "owner_password", "label": "Owner Password (optional, for editing)",
|
|
"placeholder": "Leave empty to use same password"},
|
|
])
|
|
|
|
|
|
@bp.route("/sign")
|
|
def sign_page():
|
|
return render_template("upload_tool.html",
|
|
title="Sign PDF",
|
|
description="Stamp a signature image onto one or more pages of a PDF",
|
|
notes=(
|
|
"<p><strong>Tip:</strong> upload a transparent PNG of your signature for best results. "
|
|
"A white-background JPG will look like a sticker on the page.</p>"
|
|
"<p>This tool stamps a visible signature — it does <em>not</em> apply a cryptographic digital signature.</p>"
|
|
),
|
|
endpoint="/pdf/sign",
|
|
accept=".pdf",
|
|
multiple=False,
|
|
options=[
|
|
{"type": "file", "name": "signature", "label": "Signature image (PNG / JPG)",
|
|
"accept": "image/png,image/jpeg", "required": True},
|
|
{"type": "text", "name": "pages", "label": "Pages to sign (leave empty for all)",
|
|
"placeholder": "e.g. 1, 3, 5-7"},
|
|
{"type": "select", "name": "position", "label": "Position", "default": "bottom-right",
|
|
"choices": [
|
|
{"value": "bottom-right", "label": "Bottom Right"},
|
|
{"value": "bottom-center", "label": "Bottom Center"},
|
|
{"value": "bottom-left", "label": "Bottom Left"},
|
|
{"value": "top-right", "label": "Top Right"},
|
|
{"value": "top-center", "label": "Top Center"},
|
|
{"value": "top-left", "label": "Top Left"},
|
|
]},
|
|
{"type": "number", "name": "width", "label": "Signature width (points)", "default": 140, "min": 30, "max": 400},
|
|
{"type": "number", "name": "margin", "label": "Margin from edge (points)", "default": 36, "min": 0, "max": 200},
|
|
{"type": "number", "name": "opacity", "label": "Opacity (%)", "default": 100, "min": 10, "max": 100},
|
|
])
|
|
|
|
|
|
@bp.route("/unlock")
|
|
def unlock_page():
|
|
return render_template("upload_tool.html",
|
|
title="Unlock PDF",
|
|
description="Remove password protection from a PDF",
|
|
notes=(
|
|
'<p><strong>You need to know the password.</strong> This tool removes password '
|
|
'protection from a PDF you can already open — it is not a password cracker. '
|
|
'You\'ll get a clear "incorrect password" error if you enter the wrong one.</p>'
|
|
'<p style="font-size:.9em;color:var(--muted)">Use case: you have a PDF protected '
|
|
'by a password you know, and want to share an unprotected copy or pass it through '
|
|
'tools that don\'t handle encrypted PDFs.</p>'
|
|
),
|
|
endpoint="/pdf/unlock",
|
|
accept=".pdf",
|
|
multiple=False,
|
|
options=[
|
|
{"type": "password", "name": "password", "label": "PDF Password",
|
|
"placeholder": "Enter the current password"},
|
|
])
|
|
|
|
|
|
@bp.route("/form-fill")
|
|
def form_fill_page():
|
|
return render_template("tools/form_fill.html")
|
|
|
|
|
|
@bp.route("/redact")
|
|
def redact_page():
|
|
return render_template("upload_tool.html",
|
|
title="Redact PDF",
|
|
description="Permanently black-out sensitive text in a PDF",
|
|
notes=(
|
|
'<p><strong>How it works:</strong> enter one or more search terms or regex patterns '
|
|
'(one per line). Every occurrence on every page is found, then permanently '
|
|
'overlaid with a solid black rectangle. The underlying text is also stripped '
|
|
'from the PDF\'s content stream so it cannot be recovered with copy-paste.</p>'
|
|
'<p style="font-size:.9em;color:var(--muted)"><strong>Common patterns:</strong> '
|
|
'<code>\\b\\d{16}\\b</code> (credit-card numbers), '
|
|
'<code>[\\w.-]+@[\\w.-]+\\.[\\w]+</code> (emails), '
|
|
'<code>\\b\\d{3}-\\d{2}-\\d{4}\\b</code> (US SSN-like). '
|
|
'Plain text is matched literally unless you tick “Treat as regex”.</p>'
|
|
),
|
|
endpoint="/pdf/redact",
|
|
accept=".pdf",
|
|
multiple=False,
|
|
options=[
|
|
{"type": "text", "name": "patterns",
|
|
"label": "Patterns (one per line)",
|
|
"placeholder": "e.g. john@example.com / 4111-?\\d{4}-?\\d{4}-?\\d{4}"},
|
|
{"type": "checkbox", "name": "is_regex",
|
|
"label": "Pattern type",
|
|
"check_label": "Treat each line as a regular expression",
|
|
"default": False},
|
|
{"type": "checkbox", "name": "case_sensitive",
|
|
"label": "Case sensitivity",
|
|
"check_label": "Match case exactly",
|
|
"default": False},
|
|
{"type": "text", "name": "pages", "label": "Pages (blank = all)",
|
|
"placeholder": "e.g. 1-3, 5"},
|
|
])
|
|
|
|
|
|
# ── Processing Routes ────────────────────────────
|
|
|
|
def parse_page_ranges(spec: str, total: int) -> list[int]:
|
|
"""Parse '1-3, 5, 7-10' into a list of 0-based page indices."""
|
|
if not spec.strip():
|
|
return list(range(total))
|
|
|
|
pages = set()
|
|
for part in spec.split(","):
|
|
part = part.strip()
|
|
if "-" in part:
|
|
start, end = part.split("-", 1)
|
|
s = max(1, int(start.strip()))
|
|
e = min(total, int(end.strip()))
|
|
pages.update(range(s - 1, e))
|
|
else:
|
|
p = int(part.strip()) - 1
|
|
if 0 <= p < total:
|
|
pages.add(p)
|
|
return sorted(pages)
|
|
|
|
|
|
PAPER_SIZES = {
|
|
"a4": (595.28, 841.89),
|
|
"letter": (612, 792),
|
|
"a3": (841.89, 1190.55),
|
|
"a5": (419.53, 595.28),
|
|
"legal": (612, 1008),
|
|
}
|
|
|
|
|
|
@bp.route("/merge", methods=["POST"])
|
|
def merge():
|
|
files = request.files.getlist("files")
|
|
if len(files) < 2:
|
|
return jsonify(error="Please upload at least 2 PDF files."), 400
|
|
|
|
result = fitz.open()
|
|
try:
|
|
for f in files:
|
|
try:
|
|
with fitz.open(stream=f.read(), filetype="pdf") as doc:
|
|
result.insert_pdf(doc)
|
|
except Exception as e:
|
|
log_error(e, f"merge: {f.filename}")
|
|
return jsonify(error=f"Could not read '{f.filename}' (corrupted or not a PDF)."), 400
|
|
|
|
output = io.BytesIO()
|
|
result.save(output)
|
|
output.seek(0)
|
|
finally:
|
|
result.close()
|
|
|
|
return send_file(output, mimetype="application/pdf",
|
|
as_attachment=True, download_name="merged.pdf")
|
|
|
|
|
|
@bp.route("/split", methods=["POST"])
|
|
def split():
|
|
files = request.files.getlist("files")
|
|
if not files or not files[0].filename:
|
|
return jsonify(error=NO_FILE_SINGLE), 400
|
|
|
|
page_spec = request.form.get("pages", "").strip()
|
|
try:
|
|
doc = _open_pdf(files[0].read())
|
|
except ValueError as e:
|
|
return jsonify(error=str(e)), 400
|
|
|
|
try:
|
|
try:
|
|
pages = parse_page_ranges(page_spec, len(doc))
|
|
except (ValueError, IndexError):
|
|
return jsonify(error="Invalid page range. Use e.g. '1-3, 5, 7-10'."), 400
|
|
|
|
if not pages:
|
|
return jsonify(error="No valid pages selected."), 400
|
|
|
|
if len(pages) == 1:
|
|
with fitz.open() as single:
|
|
single.insert_pdf(doc, from_page=pages[0], to_page=pages[0])
|
|
output = io.BytesIO()
|
|
single.save(output)
|
|
output.seek(0)
|
|
return send_file(output, mimetype="application/pdf",
|
|
as_attachment=True, download_name=f"page_{pages[0]+1}.pdf")
|
|
|
|
parts = []
|
|
for p in pages:
|
|
with fitz.open() as part:
|
|
part.insert_pdf(doc, from_page=p, to_page=p)
|
|
buf = io.BytesIO()
|
|
part.save(buf)
|
|
parts.append((f"page_{p + 1}.pdf", buf.getvalue()))
|
|
finally:
|
|
doc.close()
|
|
|
|
zip_buf = make_zip(parts)
|
|
return send_file(zip_buf, mimetype="application/zip",
|
|
as_attachment=True, download_name="split_pages.zip")
|
|
|
|
|
|
@bp.route("/compress", methods=["POST"])
|
|
def compress():
|
|
files = request.files.getlist("files")
|
|
if not files or not files[0].filename:
|
|
return jsonify(error=NO_FILE_SINGLE), 400
|
|
|
|
quality = request.form.get("quality", "medium")
|
|
image_quality = {"low": 40, "medium": 65, "high": 85}.get(quality, 65)
|
|
max_dim = {"low": 1200, "medium": 1800, "high": 2400}.get(quality, 1800)
|
|
|
|
from PIL import Image
|
|
|
|
try:
|
|
doc = _open_pdf(files[0].read())
|
|
except ValueError as e:
|
|
return jsonify(error=str(e)), 400
|
|
|
|
try:
|
|
processed_xrefs = set()
|
|
for page in doc:
|
|
for img_info in page.get_images(full=True):
|
|
xref = img_info[0]
|
|
if xref in processed_xrefs:
|
|
continue
|
|
processed_xrefs.add(xref)
|
|
try:
|
|
base_image = doc.extract_image(xref)
|
|
if not base_image:
|
|
continue
|
|
with Image.open(io.BytesIO(base_image["image"])) as pil_img:
|
|
if pil_img.mode != "RGB":
|
|
pil_img = pil_img.convert("RGB")
|
|
|
|
if max(pil_img.size) > max_dim:
|
|
pil_img.thumbnail((max_dim, max_dim), Image.LANCZOS)
|
|
|
|
buf = io.BytesIO()
|
|
pil_img.save(buf, format="JPEG",
|
|
quality=image_quality, optimize=True)
|
|
|
|
# Replace image in-place — preserves original placement & size.
|
|
page.replace_image(xref, stream=buf.getvalue())
|
|
except Exception as e:
|
|
log_error(e, f"compress xref={xref}")
|
|
continue
|
|
|
|
output = io.BytesIO()
|
|
doc.save(output, garbage=4, deflate=True, clean=True)
|
|
output.seek(0)
|
|
finally:
|
|
doc.close()
|
|
|
|
name = files[0].filename.rsplit(".", 1)[0] + "_compressed.pdf"
|
|
return send_file(output, mimetype="application/pdf",
|
|
as_attachment=True, download_name=name)
|
|
|
|
|
|
@bp.route("/rotate", methods=["POST"])
|
|
def rotate():
|
|
files = request.files.getlist("files")
|
|
if not files or not files[0].filename:
|
|
return jsonify(error=NO_FILE_SINGLE), 400
|
|
|
|
angle = safe_int(request.form.get("angle"), 90)
|
|
if angle not in (90, 180, 270):
|
|
return jsonify(error="Rotation must be 90, 180, or 270."), 400
|
|
page_spec = request.form.get("pages", "").strip()
|
|
|
|
try:
|
|
doc = _open_pdf(files[0].read())
|
|
except ValueError as e:
|
|
return jsonify(error=str(e)), 400
|
|
|
|
try:
|
|
try:
|
|
pages = parse_page_ranges(page_spec, len(doc))
|
|
except (ValueError, IndexError):
|
|
return jsonify(error="Invalid page range. Use e.g. '1-3, 5, 7-10'."), 400
|
|
|
|
for p in pages:
|
|
doc[p].set_rotation((doc[p].rotation + angle) % 360)
|
|
|
|
output = io.BytesIO()
|
|
doc.save(output)
|
|
output.seek(0)
|
|
finally:
|
|
doc.close()
|
|
|
|
name = files[0].filename.rsplit(".", 1)[0] + "_rotated.pdf"
|
|
return send_file(output, mimetype="application/pdf",
|
|
as_attachment=True, download_name=name)
|
|
|
|
|
|
@bp.route("/resize", methods=["POST"])
|
|
def resize():
|
|
files = request.files.getlist("files")
|
|
if not files or not files[0].filename:
|
|
return jsonify(error=NO_FILE_SINGLE), 400
|
|
|
|
mode = request.form.get("mode", "scale")
|
|
try:
|
|
doc = _open_pdf(files[0].read())
|
|
except ValueError as e:
|
|
return jsonify(error=str(e)), 400
|
|
|
|
new_doc = fitz.open()
|
|
try:
|
|
if mode == "scale":
|
|
scale_pct = safe_float(request.form.get("scale"), 100.0,
|
|
min_val=10.0, max_val=500.0)
|
|
scale = scale_pct / 100.0
|
|
|
|
for page in doc:
|
|
r = page.rect
|
|
new_page = new_doc.new_page(width=r.width * scale,
|
|
height=r.height * scale)
|
|
new_page.show_pdf_page(new_page.rect, doc, page.number,
|
|
rotate=page.rotation)
|
|
|
|
elif mode == "paper":
|
|
paper = request.form.get("paper", "a4")
|
|
target_w, target_h = PAPER_SIZES.get(paper, PAPER_SIZES["a4"])
|
|
|
|
for page in doc:
|
|
r = page.rect
|
|
src_w, src_h = r.width, r.height
|
|
|
|
# Match target orientation to source orientation
|
|
if (src_w > src_h) != (target_w > target_h):
|
|
page_w, page_h = target_h, target_w
|
|
else:
|
|
page_w, page_h = target_w, target_h
|
|
|
|
# Fit source page into new page, preserving aspect ratio
|
|
fit = min(page_w / src_w, page_h / src_h)
|
|
content_w = src_w * fit
|
|
content_h = src_h * fit
|
|
x0 = (page_w - content_w) / 2
|
|
y0 = (page_h - content_h) / 2
|
|
|
|
new_page = new_doc.new_page(width=page_w, height=page_h)
|
|
new_page.show_pdf_page(
|
|
fitz.Rect(x0, y0, x0 + content_w, y0 + content_h),
|
|
doc, page.number, rotate=page.rotation
|
|
)
|
|
else:
|
|
return jsonify(error="Unknown resize mode."), 400
|
|
|
|
output = io.BytesIO()
|
|
new_doc.save(output, garbage=4, deflate=True)
|
|
output.seek(0)
|
|
finally:
|
|
new_doc.close()
|
|
doc.close()
|
|
|
|
name = files[0].filename.rsplit(".", 1)[0] + "_resized.pdf"
|
|
return send_file(output, mimetype="application/pdf",
|
|
as_attachment=True, download_name=name)
|
|
|
|
|
|
@bp.route("/page-numbers", methods=["POST"])
|
|
def page_numbers():
|
|
files = request.files.getlist("files")
|
|
if not files or not files[0].filename:
|
|
return jsonify(error=NO_FILE_SINGLE), 400
|
|
|
|
position = request.form.get("position", "bottom-center")
|
|
start = safe_int(request.form.get("start"), 1, min_val=0, max_val=100000)
|
|
fontsize = safe_int(request.form.get("fontsize"), 11, min_val=6, max_val=72)
|
|
|
|
try:
|
|
doc = _open_pdf(files[0].read())
|
|
except ValueError as e:
|
|
return jsonify(error=str(e)), 400
|
|
|
|
try:
|
|
for i, page in enumerate(doc):
|
|
num = start + i
|
|
r = page.rect
|
|
margin = 36 # 0.5 inch
|
|
|
|
pos_map = {
|
|
"bottom-center": fitz.Point(r.width / 2, r.height - margin),
|
|
"bottom-right": fitz.Point(r.width - margin, r.height - margin),
|
|
"bottom-left": fitz.Point(margin, r.height - margin),
|
|
"top-center": fitz.Point(r.width / 2, margin + fontsize),
|
|
"top-right": fitz.Point(r.width - margin, margin + fontsize),
|
|
"top-left": fitz.Point(margin, margin + fontsize),
|
|
}
|
|
point = pos_map.get(position, pos_map["bottom-center"])
|
|
|
|
page.insert_text(point, str(num), fontsize=fontsize,
|
|
fontname="helv", color=(0.3, 0.3, 0.3))
|
|
|
|
output = io.BytesIO()
|
|
doc.save(output)
|
|
output.seek(0)
|
|
finally:
|
|
doc.close()
|
|
|
|
name = files[0].filename.rsplit(".", 1)[0] + "_numbered.pdf"
|
|
return send_file(output, mimetype="application/pdf",
|
|
as_attachment=True, download_name=name)
|
|
|
|
|
|
@bp.route("/extract-images", methods=["POST"])
|
|
def extract_images():
|
|
files = request.files.getlist("files")
|
|
if not files or not files[0].filename:
|
|
return jsonify(error=NO_FILE_SINGLE), 400
|
|
|
|
try:
|
|
doc = _open_pdf(files[0].read())
|
|
except ValueError as e:
|
|
return jsonify(error=str(e)), 400
|
|
|
|
images = []
|
|
try:
|
|
for i, page in enumerate(doc):
|
|
for img_idx, img_info in enumerate(page.get_images(full=True)):
|
|
xref = img_info[0]
|
|
try:
|
|
base_image = doc.extract_image(xref)
|
|
if not base_image:
|
|
continue
|
|
ext = base_image.get("ext", "png")
|
|
images.append((f"page{i+1}_img{img_idx+1}.{ext}",
|
|
base_image["image"]))
|
|
except Exception as e:
|
|
log_error(e, f"extract_images xref={xref}")
|
|
continue
|
|
finally:
|
|
doc.close()
|
|
|
|
if not images:
|
|
return jsonify(error="No images found in the PDF."), 400
|
|
|
|
if len(images) == 1:
|
|
ext = images[0][0].rsplit(".", 1)[1]
|
|
mime = f"image/{'jpeg' if ext in ('jpg','jpeg') else ext}"
|
|
return send_file(io.BytesIO(images[0][1]), mimetype=mime,
|
|
as_attachment=True, download_name=images[0][0])
|
|
|
|
zip_buf = make_zip(images)
|
|
name = files[0].filename.rsplit(".", 1)[0] + "_images.zip"
|
|
return send_file(zip_buf, mimetype="application/zip",
|
|
as_attachment=True, download_name=name)
|
|
|
|
|
|
@bp.route("/protect", methods=["POST"])
|
|
def protect():
|
|
files = request.files.getlist("files")
|
|
if not files or not files[0].filename:
|
|
return jsonify(error=NO_FILE_SINGLE), 400
|
|
|
|
user_pw = request.form.get("user_password", "")
|
|
owner_pw = request.form.get("owner_password", "") or user_pw
|
|
|
|
if not user_pw:
|
|
return jsonify(error="Please enter a password."), 400
|
|
|
|
try:
|
|
doc = _open_pdf(files[0].read())
|
|
except ValueError as e:
|
|
return jsonify(error=str(e)), 400
|
|
|
|
try:
|
|
perm = fitz.PDF_PERM_PRINT | fitz.PDF_PERM_COPY
|
|
|
|
output = io.BytesIO()
|
|
doc.save(output,
|
|
encryption=fitz.PDF_ENCRYPT_AES_256,
|
|
user_pw=user_pw,
|
|
owner_pw=owner_pw,
|
|
permissions=perm)
|
|
output.seek(0)
|
|
finally:
|
|
doc.close()
|
|
|
|
name = files[0].filename.rsplit(".", 1)[0] + "_protected.pdf"
|
|
return send_file(output, mimetype="application/pdf",
|
|
as_attachment=True, download_name=name)
|
|
|
|
|
|
@bp.route("/sign", methods=["POST"])
|
|
def sign():
|
|
from PIL import Image
|
|
|
|
files = request.files.getlist("files")
|
|
if not files or not files[0].filename:
|
|
return jsonify(error="Please upload a PDF."), 400
|
|
|
|
sig_file = request.files.get("signature")
|
|
if not sig_file or not sig_file.filename:
|
|
return jsonify(error="Please upload a signature image (PNG or JPG)."), 400
|
|
|
|
position = request.form.get("position", "bottom-right")
|
|
sig_width = safe_float(request.form.get("width"), 140.0,
|
|
min_val=30.0, max_val=600.0)
|
|
margin = safe_float(request.form.get("margin"), 36.0,
|
|
min_val=0.0, max_val=300.0)
|
|
opacity_pct = safe_int(request.form.get("opacity"), 100,
|
|
min_val=10, max_val=100)
|
|
opacity = opacity_pct / 100.0
|
|
|
|
page_spec = request.form.get("pages", "").strip()
|
|
|
|
try:
|
|
with Image.open(sig_file) as raw:
|
|
sig_img = raw.convert("RGBA")
|
|
except Exception as e:
|
|
log_error(e, "sign: signature image")
|
|
return jsonify(error="Could not read signature image (file may be corrupted or not an image)."), 400
|
|
|
|
if opacity < 1.0:
|
|
r, g, b, a = sig_img.split()
|
|
a = a.point(lambda v: int(v * opacity))
|
|
sig_img = Image.merge("RGBA", (r, g, b, a))
|
|
|
|
sig_buf = io.BytesIO()
|
|
sig_img.save(sig_buf, format="PNG")
|
|
sig_bytes = sig_buf.getvalue()
|
|
|
|
sig_ratio = sig_img.height / sig_img.width if sig_img.width else 1.0
|
|
sig_h = sig_width * sig_ratio
|
|
sig_img.close()
|
|
|
|
try:
|
|
doc = _open_pdf(files[0].read())
|
|
except ValueError as e:
|
|
return jsonify(error=str(e)), 400
|
|
|
|
try:
|
|
try:
|
|
target = parse_page_ranges(page_spec, len(doc))
|
|
except (ValueError, IndexError):
|
|
return jsonify(error="Invalid page range. Use e.g. '1, 3, 5-7'."), 400
|
|
if not target:
|
|
return jsonify(error="No valid pages selected."), 400
|
|
|
|
for pno in target:
|
|
page = doc[pno]
|
|
r = page.rect
|
|
|
|
if "right" in position:
|
|
x0 = r.width - margin - sig_width
|
|
elif "center" in position:
|
|
x0 = (r.width - sig_width) / 2
|
|
else:
|
|
x0 = margin
|
|
|
|
if "bottom" in position:
|
|
y0 = r.height - margin - sig_h
|
|
else:
|
|
y0 = margin
|
|
|
|
page.insert_image(
|
|
fitz.Rect(x0, y0, x0 + sig_width, y0 + sig_h),
|
|
stream=sig_bytes, keep_proportion=True, overlay=True,
|
|
)
|
|
|
|
output = io.BytesIO()
|
|
doc.save(output, garbage=4, deflate=True)
|
|
output.seek(0)
|
|
finally:
|
|
doc.close()
|
|
|
|
name = files[0].filename.rsplit(".", 1)[0] + "_signed.pdf"
|
|
return send_file(output, mimetype="application/pdf",
|
|
as_attachment=True, download_name=name)
|
|
|
|
|
|
@bp.route("/redact", methods=["POST"])
|
|
def redact():
|
|
import re
|
|
|
|
files = request.files.getlist("files")
|
|
if not files or not files[0].filename:
|
|
return jsonify(error=NO_FILE_SINGLE), 400
|
|
|
|
patterns_raw = request.form.get("patterns", "").strip()
|
|
if not patterns_raw:
|
|
return jsonify(error="Enter at least one search term or pattern."), 400
|
|
|
|
is_regex = request.form.get("is_regex") == "on"
|
|
case_sensitive = request.form.get("case_sensitive") == "on"
|
|
page_spec = request.form.get("pages", "").strip()
|
|
|
|
patterns = [p for p in patterns_raw.splitlines() if p.strip()]
|
|
if not patterns:
|
|
return jsonify(error="Enter at least one search term or pattern."), 400
|
|
|
|
# Validate regex patterns up-front so the user gets a clean error message.
|
|
flags = 0 if case_sensitive else re.IGNORECASE
|
|
if is_regex:
|
|
compiled: list[re.Pattern] = []
|
|
for p in patterns:
|
|
try:
|
|
compiled.append(re.compile(p, flags))
|
|
except re.error as e:
|
|
return jsonify(error=f"Invalid regex {p!r}: {e}"), 400
|
|
|
|
try:
|
|
doc = _open_pdf(files[0].read())
|
|
except ValueError as e:
|
|
return jsonify(error=str(e)), 400
|
|
|
|
try:
|
|
try:
|
|
target = parse_page_ranges(page_spec, len(doc))
|
|
except (ValueError, IndexError):
|
|
return jsonify(error="Invalid page range. Use e.g. '1-3, 5, 7-10'."), 400
|
|
if not target:
|
|
return jsonify(error="No valid pages selected."), 400
|
|
|
|
total_redactions = 0
|
|
for pno in target:
|
|
page = doc[pno]
|
|
rects: list[fitz.Rect] = []
|
|
|
|
if is_regex:
|
|
# Regex path: walk the page text, locate each match, then map
|
|
# the character range back to bounding boxes via search_for.
|
|
page_text = page.get_text()
|
|
for pat in compiled:
|
|
for m in pat.finditer(page_text):
|
|
snippet = m.group(0)
|
|
if not snippet.strip():
|
|
continue
|
|
# search_for handles word-wrap and returns one rect per
|
|
# visual hit on the page.
|
|
for r in page.search_for(snippet, quads=False):
|
|
rects.append(r)
|
|
else:
|
|
for term in patterns:
|
|
if not term.strip():
|
|
continue
|
|
flags_arg = 0 if case_sensitive else fitz.TEXT_PRESERVE_LIGATURES # search_for is case-insensitive by default
|
|
found = page.search_for(term)
|
|
rects.extend(found)
|
|
|
|
# De-duplicate near-identical rectangles
|
|
uniq: list[fitz.Rect] = []
|
|
for r in rects:
|
|
if not any(abs(r.x0 - u.x0) < 0.5 and abs(r.y0 - u.y0) < 0.5
|
|
and abs(r.x1 - u.x1) < 0.5 and abs(r.y1 - u.y1) < 0.5
|
|
for u in uniq):
|
|
uniq.append(r)
|
|
|
|
for r in uniq:
|
|
page.add_redact_annot(r, fill=(0, 0, 0))
|
|
total_redactions += len(uniq)
|
|
|
|
# apply_redactions actually removes the underlying text; the
|
|
# IMAGE_PIXELS option preserves images on the page.
|
|
page.apply_redactions(images=fitz.PDF_REDACT_IMAGE_NONE)
|
|
|
|
if total_redactions == 0:
|
|
return jsonify(error=(
|
|
"No matches found. Check spelling, toggle case-sensitivity, "
|
|
"or try a different pattern."
|
|
)), 400
|
|
|
|
output = io.BytesIO()
|
|
doc.save(output, garbage=4, deflate=True, clean=True)
|
|
output.seek(0)
|
|
finally:
|
|
doc.close()
|
|
|
|
name = files[0].filename.rsplit(".", 1)[0] + "_redacted.pdf"
|
|
resp = send_file(output, mimetype="application/pdf",
|
|
as_attachment=True, download_name=name)
|
|
resp.headers["X-Redactions-Applied"] = str(total_redactions)
|
|
return resp
|
|
|
|
|
|
@bp.route("/unlock", methods=["POST"])
|
|
def unlock():
|
|
files = request.files.getlist("files")
|
|
if not files or not files[0].filename:
|
|
return jsonify(error=NO_FILE_SINGLE), 400
|
|
|
|
password = request.form.get("password", "")
|
|
try:
|
|
doc = _open_pdf(files[0].read())
|
|
except ValueError as e:
|
|
return jsonify(error=str(e)), 400
|
|
|
|
try:
|
|
if doc.needs_pass:
|
|
if not doc.authenticate(password):
|
|
return jsonify(error="Incorrect password."), 400
|
|
|
|
output = io.BytesIO()
|
|
doc.save(output)
|
|
output.seek(0)
|
|
finally:
|
|
doc.close()
|
|
|
|
name = files[0].filename.rsplit(".", 1)[0] + "_unlocked.pdf"
|
|
return send_file(output, mimetype="application/pdf",
|
|
as_attachment=True, download_name=name)
|
|
|
|
|
|
# ── PDF Form Filler (AcroForm) ─────────────────────────────
|
|
|
|
# PyMuPDF widget type constants → string labels we expose to the UI
|
|
_WIDGET_TYPE_NAMES = {
|
|
fitz.PDF_WIDGET_TYPE_TEXT: "text",
|
|
fitz.PDF_WIDGET_TYPE_CHECKBOX: "checkbox",
|
|
fitz.PDF_WIDGET_TYPE_RADIOBUTTON: "radio",
|
|
fitz.PDF_WIDGET_TYPE_LISTBOX: "listbox",
|
|
fitz.PDF_WIDGET_TYPE_COMBOBOX: "combobox",
|
|
fitz.PDF_WIDGET_TYPE_BUTTON: "button",
|
|
fitz.PDF_WIDGET_TYPE_SIGNATURE: "signature",
|
|
}
|
|
|
|
|
|
def _label_near_widget(page, rect: fitz.Rect, max_dist: float = 250) -> str:
|
|
"""Find the text label that visually sits next to a widget on the page.
|
|
|
|
Radio button / checkbox labels (e.g. "Male", "Female") are painted on the
|
|
page as static text, NOT stored on the widget. We sniff them by walking
|
|
page words and picking the contiguous run of words on the same line,
|
|
starting from the side adjacent to the widget. A gap > ~25 pixels stops
|
|
the run, which prevents grabbing the next radio's label in a horizontal
|
|
row layout like "[ ] Male [ ] Female".
|
|
|
|
Right side is searched first (the conventional layout); left is fallback.
|
|
"""
|
|
if not rect:
|
|
return ""
|
|
height = max(rect.y1 - rect.y0, 8)
|
|
cy = (rect.y0 + rect.y1) / 2
|
|
|
|
# get_text("words") -> list of (x0, y0, x1, y1, "text", block, line, word)
|
|
words = page.get_text("words")
|
|
if not words:
|
|
return ""
|
|
|
|
def same_line(wy0: float, wy1: float) -> bool:
|
|
wcy = (wy0 + wy1) / 2
|
|
return abs(wcy - cy) <= height * 0.7
|
|
|
|
GAP = 25.0 # max horizontal gap between adjacent label words, in points
|
|
|
|
# ── Right-side run ──
|
|
right = [w for w in words
|
|
if same_line(w[1], w[3])
|
|
and w[0] >= rect.x1 - 1
|
|
and w[0] - rect.x1 < max_dist]
|
|
if right:
|
|
right.sort(key=lambda w: w[0])
|
|
result = [right[0][4]]
|
|
prev_x1 = right[0][2]
|
|
for w in right[1:]:
|
|
if w[0] - prev_x1 > GAP:
|
|
break
|
|
result.append(w[4])
|
|
prev_x1 = w[2]
|
|
text = " ".join(result).strip().rstrip(":;,.")
|
|
if text:
|
|
return text[:80]
|
|
|
|
# ── Left-side fallback ──
|
|
left = [w for w in words
|
|
if same_line(w[1], w[3])
|
|
and w[2] <= rect.x0 + 1
|
|
and rect.x0 - w[2] < max_dist]
|
|
if left:
|
|
left.sort(key=lambda w: -w[2]) # rightmost first (closest to widget)
|
|
result = [left[0][4]]
|
|
prev_x0 = left[0][0]
|
|
for w in left[1:]:
|
|
if prev_x0 - w[2] > GAP:
|
|
break
|
|
result.insert(0, w[4])
|
|
prev_x0 = w[0]
|
|
text = " ".join(result).strip().rstrip(":;,.")
|
|
if text:
|
|
return text[:80]
|
|
|
|
return ""
|
|
|
|
|
|
def _serialize_widgets(doc) -> list[dict]:
|
|
"""Walk every page's widgets and return a JSON-friendly list of fields."""
|
|
fields: list[dict] = []
|
|
for page_num, page in enumerate(doc, start=1):
|
|
for w in page.widgets() or []:
|
|
ftype = _WIDGET_TYPE_NAMES.get(w.field_type, "unknown")
|
|
|
|
# Required / read-only flags live in field_flags (bit field)
|
|
flags = getattr(w, "field_flags", 0) or 0
|
|
required = bool(flags & 2) # bit 2 = required
|
|
readonly = bool(flags & 1) # bit 1 = read-only
|
|
multiline = bool(flags & (1 << 12)) # bit 13 = multiline (text only)
|
|
# PDF spec bit 19 (Ff 1<<18) = combobox is editable (user can type
|
|
# values outside the choice list). Set only on combo fields.
|
|
editable_combo = (ftype == "combobox") and bool(flags & (1 << 18))
|
|
|
|
# Choice fields expose `choice_values`; treat None as empty list
|
|
choices = list(w.choice_values or []) if hasattr(w, "choice_values") else []
|
|
|
|
# For checkboxes / radios the "on" state name varies per PDF
|
|
# (often "Yes", "On", "1", or arbitrary identifiers like "Male").
|
|
on_states = []
|
|
if ftype in ("checkbox", "radio"):
|
|
states = w.button_states() or {}
|
|
for _, vals in states.items():
|
|
if not vals:
|
|
continue
|
|
for v in vals:
|
|
if v and v != "Off" and v not in on_states:
|
|
on_states.append(v)
|
|
|
|
# For radios + checkboxes, sniff a human label from the page text
|
|
# adjacent to this widget. PDFs paint these as static text rather
|
|
# than storing them on the widget, so we have to read the page.
|
|
option_label = ""
|
|
if ftype in ("radio", "checkbox"):
|
|
option_label = _label_near_widget(page, w.rect)
|
|
|
|
# The "value" identifier this specific radio represents when "on".
|
|
option_value = on_states[0] if (ftype == "radio" and on_states) else ""
|
|
|
|
fields.append({
|
|
"name": w.field_name or "",
|
|
"label": w.field_label or w.field_name or "",
|
|
"type": ftype,
|
|
"value": w.field_value if w.field_value is not None else "",
|
|
"page": page_num,
|
|
"rect": [round(c, 2) for c in (w.rect or fitz.Rect())],
|
|
"option_label": option_label,
|
|
"option_value": option_value,
|
|
"editable": editable_combo,
|
|
"required": required,
|
|
"readonly": readonly,
|
|
"multiline": multiline,
|
|
"choices": choices,
|
|
"on_states": on_states,
|
|
"max_length": w.text_maxlen if hasattr(w, "text_maxlen") else 0,
|
|
})
|
|
return fields
|
|
|
|
|
|
@bp.route("/form-inspect", methods=["POST"])
|
|
def form_inspect():
|
|
files = request.files.getlist("files")
|
|
if not files or not files[0].filename:
|
|
return jsonify(error=NO_FILE_SINGLE), 400
|
|
|
|
try:
|
|
doc = _open_pdf(files[0].read())
|
|
except ValueError as e:
|
|
return jsonify(error=str(e)), 400
|
|
|
|
try:
|
|
fields = _serialize_widgets(doc)
|
|
return jsonify({
|
|
"filename": files[0].filename,
|
|
"page_count": len(doc),
|
|
"field_count": len(fields),
|
|
"fields": fields,
|
|
"has_form": len(fields) > 0,
|
|
})
|
|
finally:
|
|
doc.close()
|
|
|
|
|
|
@bp.route("/form-fill", methods=["POST"])
|
|
def form_fill():
|
|
"""Apply field values to the uploaded PDF and return the filled file.
|
|
|
|
Form values are passed as JSON in the `values` field of the multipart body:
|
|
`{"<field_name>": "<value>", ...}`. Values are matched against
|
|
`widget.field_name`. Unknown names are silently ignored.
|
|
"""
|
|
import json
|
|
|
|
files = request.files.getlist("files")
|
|
if not files or not files[0].filename:
|
|
return jsonify(error=NO_FILE_SINGLE), 400
|
|
|
|
raw_values = request.form.get("values", "{}")
|
|
try:
|
|
values_map = json.loads(raw_values)
|
|
if not isinstance(values_map, dict):
|
|
raise ValueError("values must be an object")
|
|
except (ValueError, TypeError) as e:
|
|
return jsonify(error=f"Invalid form values JSON: {e}"), 400
|
|
|
|
flatten = request.form.get("flatten") == "on"
|
|
|
|
try:
|
|
doc = _open_pdf(files[0].read())
|
|
except ValueError as e:
|
|
return jsonify(error=str(e)), 400
|
|
|
|
applied = 0
|
|
skipped: list[str] = []
|
|
try:
|
|
for page in doc:
|
|
for w in page.widgets() or []:
|
|
if not w.field_name or w.field_name not in values_map:
|
|
continue
|
|
if w.field_flags and (w.field_flags & 1): # read-only
|
|
skipped.append(w.field_name)
|
|
continue
|
|
|
|
new_val = values_map[w.field_name]
|
|
ftype = w.field_type
|
|
|
|
try:
|
|
if ftype == fitz.PDF_WIDGET_TYPE_CHECKBOX:
|
|
# truthy → checkbox's "on" state, falsy → "Off"
|
|
if new_val in (True, "true", "on", "1", 1, "Yes", "yes"):
|
|
on_vals = []
|
|
states = w.button_states() or {}
|
|
for vals in states.values():
|
|
if not vals:
|
|
continue
|
|
for v in vals:
|
|
if v and v != "Off":
|
|
on_vals.append(v)
|
|
w.field_value = on_vals[0] if on_vals else "Yes"
|
|
else:
|
|
w.field_value = "Off"
|
|
elif ftype == fitz.PDF_WIDGET_TYPE_RADIOBUTTON:
|
|
# value should match one of the radio's on-states
|
|
w.field_value = str(new_val) if new_val else "Off"
|
|
elif ftype in (fitz.PDF_WIDGET_TYPE_LISTBOX,
|
|
fitz.PDF_WIDGET_TYPE_COMBOBOX):
|
|
w.field_value = str(new_val) if new_val is not None else ""
|
|
else: # text or other text-like
|
|
w.field_value = str(new_val) if new_val is not None else ""
|
|
|
|
w.update()
|
|
applied += 1
|
|
except Exception as e:
|
|
log_error(e, f"form-fill: {w.field_name}")
|
|
skipped.append(w.field_name)
|
|
|
|
# Optional: flatten the form so the values become baked-in static text.
|
|
# Without flatten=true the result is still an editable PDF form.
|
|
if flatten:
|
|
for page in doc:
|
|
# No public PyMuPDF API to "flatten" widgets in one call, but
|
|
# converting the page to a pixmap-and-reinsert collapses widgets.
|
|
# Simpler: render then rebuild — but that loses fidelity for
|
|
# text-heavy forms. Best practical approach: leave widgets
|
|
# editable; users who need a flat copy can re-print to PDF.
|
|
pass
|
|
|
|
output = io.BytesIO()
|
|
doc.save(output, garbage=4, deflate=True, clean=True)
|
|
output.seek(0)
|
|
finally:
|
|
doc.close()
|
|
|
|
base = files[0].filename.rsplit(".", 1)[0]
|
|
resp = send_file(output, mimetype="application/pdf",
|
|
as_attachment=True, download_name=f"{base}_filled.pdf")
|
|
resp.headers["X-Fields-Applied"] = str(applied)
|
|
resp.headers["X-Fields-Skipped"] = str(len(skipped))
|
|
return resp
|