mirror of
https://codeberg.org/listyantidewi/your-everyday-tools.git
synced 2026-07-01 23:17:37 +08:00
fixed form filler for radio button
This commit is contained in:
@@ -7,6 +7,9 @@ All notable changes to **Your Everyday Tools** are documented here. The format i
|
||||
### Added
|
||||
- **Fill PDF Form** *(PDF Tools)* — upload a PDF that has AcroForm fields (the kind in tax forms, gov applications, and most fillable PDFs), inspect the fields in your browser, fill them, and download the filled PDF. Supports text, multi-line text, checkbox, radio, listbox, and combobox field types. Two-step UI: `/pdf/form-inspect` returns the field schema as JSON, then `/pdf/form-fill` applies values. PDFs without form fields surface a clear "this PDF doesn't have an AcroForm" message rather than silently doing nothing. XFA-only forms (some Adobe-only forms) are not supported — limitation of PyMuPDF, not the project.
|
||||
|
||||
### Improved
|
||||
- **Fill PDF Form: human radio/checkbox labels.** PDF radio buttons store opaque on-state values (often `0`/`1`/`Yes`/arbitrary identifiers) but the human label like "Male" / "Female" is painted on the page as static text *next to* the widget — not part of the field. Form Filler now sniffs that nearby text and shows the human label in the UI, while keeping the PDF on-state value as the actual submitted value (and as a tooltip for power users). Same for checkbox labels. The sniffer correctly handles vertical lists, horizontal rows ("○ Male ○ Female"), and multi-word labels ("I agree to the terms and conditions"), stopping at gaps > 25pt to avoid grabbing the next widget's label.
|
||||
|
||||
## [0.6.0] — 2026-04-29
|
||||
|
||||
### Added — 8 new tools across 6 categories (total now 99)
|
||||
|
||||
+81
-1
@@ -867,6 +867,73 @@ _WIDGET_TYPE_NAMES = {
|
||||
}
|
||||
|
||||
|
||||
def _label_near_widget(page, rect: fitz.Rect, max_dist: float = 250) -> str:
|
||||
"""Find the text label that visually sits next to a widget on the page.
|
||||
|
||||
Radio button / checkbox labels (e.g. "Male", "Female") are painted on the
|
||||
page as static text, NOT stored on the widget. We sniff them by walking
|
||||
page words and picking the contiguous run of words on the same line,
|
||||
starting from the side adjacent to the widget. A gap > ~25 pixels stops
|
||||
the run, which prevents grabbing the next radio's label in a horizontal
|
||||
row layout like "[ ] Male [ ] Female".
|
||||
|
||||
Right side is searched first (the conventional layout); left is fallback.
|
||||
"""
|
||||
if not rect:
|
||||
return ""
|
||||
height = max(rect.y1 - rect.y0, 8)
|
||||
cy = (rect.y0 + rect.y1) / 2
|
||||
|
||||
# get_text("words") -> list of (x0, y0, x1, y1, "text", block, line, word)
|
||||
words = page.get_text("words")
|
||||
if not words:
|
||||
return ""
|
||||
|
||||
def same_line(wy0: float, wy1: float) -> bool:
|
||||
wcy = (wy0 + wy1) / 2
|
||||
return abs(wcy - cy) <= height * 0.7
|
||||
|
||||
GAP = 25.0 # max horizontal gap between adjacent label words, in points
|
||||
|
||||
# ── Right-side run ──
|
||||
right = [w for w in words
|
||||
if same_line(w[1], w[3])
|
||||
and w[0] >= rect.x1 - 1
|
||||
and w[0] - rect.x1 < max_dist]
|
||||
if right:
|
||||
right.sort(key=lambda w: w[0])
|
||||
result = [right[0][4]]
|
||||
prev_x1 = right[0][2]
|
||||
for w in right[1:]:
|
||||
if w[0] - prev_x1 > GAP:
|
||||
break
|
||||
result.append(w[4])
|
||||
prev_x1 = w[2]
|
||||
text = " ".join(result).strip().rstrip(":;,.")
|
||||
if text:
|
||||
return text[:80]
|
||||
|
||||
# ── Left-side fallback ──
|
||||
left = [w for w in words
|
||||
if same_line(w[1], w[3])
|
||||
and w[2] <= rect.x0 + 1
|
||||
and rect.x0 - w[2] < max_dist]
|
||||
if left:
|
||||
left.sort(key=lambda w: -w[2]) # rightmost first (closest to widget)
|
||||
result = [left[0][4]]
|
||||
prev_x0 = left[0][0]
|
||||
for w in left[1:]:
|
||||
if prev_x0 - w[2] > GAP:
|
||||
break
|
||||
result.insert(0, w[4])
|
||||
prev_x0 = w[0]
|
||||
text = " ".join(result).strip().rstrip(":;,.")
|
||||
if text:
|
||||
return text[:80]
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def _serialize_widgets(doc) -> list[dict]:
|
||||
"""Walk every page's widgets and return a JSON-friendly list of fields."""
|
||||
fields: list[dict] = []
|
||||
@@ -883,7 +950,8 @@ def _serialize_widgets(doc) -> list[dict]:
|
||||
# Choice fields expose `choice_values`; treat None as empty list
|
||||
choices = list(w.choice_values or []) if hasattr(w, "choice_values") else []
|
||||
|
||||
# For checkboxes the "on" state name varies per PDF
|
||||
# For checkboxes / radios the "on" state name varies per PDF
|
||||
# (often "Yes", "On", "1", or arbitrary identifiers like "Male").
|
||||
on_states = []
|
||||
if ftype in ("checkbox", "radio"):
|
||||
states = w.button_states() or {}
|
||||
@@ -894,6 +962,16 @@ def _serialize_widgets(doc) -> list[dict]:
|
||||
if v and v != "Off" and v not in on_states:
|
||||
on_states.append(v)
|
||||
|
||||
# For radios + checkboxes, sniff a human label from the page text
|
||||
# adjacent to this widget. PDFs paint these as static text rather
|
||||
# than storing them on the widget, so we have to read the page.
|
||||
option_label = ""
|
||||
if ftype in ("radio", "checkbox"):
|
||||
option_label = _label_near_widget(page, w.rect)
|
||||
|
||||
# The "value" identifier this specific radio represents when "on".
|
||||
option_value = on_states[0] if (ftype == "radio" and on_states) else ""
|
||||
|
||||
fields.append({
|
||||
"name": w.field_name or "",
|
||||
"label": w.field_label or w.field_name or "",
|
||||
@@ -901,6 +979,8 @@ def _serialize_widgets(doc) -> list[dict]:
|
||||
"value": w.field_value if w.field_value is not None else "",
|
||||
"page": page_num,
|
||||
"rect": [round(c, 2) for c in (w.rect or fitz.Rect())],
|
||||
"option_label": option_label,
|
||||
"option_value": option_value,
|
||||
"required": required,
|
||||
"readonly": readonly,
|
||||
"multiline": multiline,
|
||||
|
||||
@@ -101,18 +101,24 @@ function renderForm(data) {
|
||||
`across ${data.page_count} page${data.page_count === 1 ? "" : "s"} of ` +
|
||||
`<code>${escapeHtml(data.filename)}</code>`;
|
||||
|
||||
// Group radios by field_name (PDF standard: multiple widgets share a name)
|
||||
// Group radios by field_name (PDF standard: multiple widgets share a name).
|
||||
// Each radio carries its own option_value (PDF-internal id, e.g. "0"/"1")
|
||||
// and option_label (human label sniffed from nearby page text, e.g. "Male").
|
||||
const radioGroups = {};
|
||||
const renderQueue = [];
|
||||
for (const f of data.fields) {
|
||||
if (f.type === "radio") {
|
||||
if (!radioGroups[f.name]) {
|
||||
radioGroups[f.name] = { ...f, on_states_combined: [] };
|
||||
radioGroups[f.name] = { ...f, options: [] };
|
||||
renderQueue.push({ kind: "radio_group", name: f.name });
|
||||
}
|
||||
for (const s of f.on_states || []) {
|
||||
if (!radioGroups[f.name].on_states_combined.includes(s)) {
|
||||
radioGroups[f.name].on_states_combined.push(s);
|
||||
if (f.option_value) {
|
||||
const exists = radioGroups[f.name].options.some(o => o.value === f.option_value);
|
||||
if (!exists) {
|
||||
radioGroups[f.name].options.push({
|
||||
value: f.option_value,
|
||||
label: f.option_label || f.option_value,
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if (f.type !== "button" && f.type !== "signature") {
|
||||
@@ -170,21 +176,28 @@ function buildField(f, isRadioGroup) {
|
||||
wrap.appendChild(lbl);
|
||||
return wrap;
|
||||
} else if (isRadioGroup) {
|
||||
// Render one radio per on-state value
|
||||
const states = f.on_states_combined || ["On"];
|
||||
for (const s of states) {
|
||||
// Render one radio per option (value + label pair sniffed from PDF)
|
||||
const opts = (f.options && f.options.length)
|
||||
? f.options
|
||||
: [{ value: "On", label: "On" }];
|
||||
for (const opt of opts) {
|
||||
const lbl = document.createElement("label");
|
||||
lbl.style.cssText = "display:inline-flex;align-items:center;gap:.4rem;margin-right:1rem";
|
||||
const r = document.createElement("input");
|
||||
r.type = "radio";
|
||||
r.name = `radio-${f.name}`;
|
||||
r.value = s;
|
||||
r.value = opt.value;
|
||||
r.dataset.fieldName = f.name;
|
||||
r.dataset.fieldType = "radio";
|
||||
if (f.value === s) r.checked = true;
|
||||
if (f.value === opt.value) r.checked = true;
|
||||
const span = document.createElement("span");
|
||||
span.textContent = s;
|
||||
span.textContent = opt.label;
|
||||
span.style.fontSize = ".9rem";
|
||||
// Show the PDF-internal value as a tooltip so power users can
|
||||
// map back to the underlying widget if they need to.
|
||||
if (opt.label !== opt.value) {
|
||||
lbl.title = `PDF on-state: "${opt.value}"`;
|
||||
}
|
||||
lbl.appendChild(r);
|
||||
lbl.appendChild(span);
|
||||
wrap.appendChild(lbl);
|
||||
|
||||
Reference in New Issue
Block a user