fixed form filler for radio button

This commit is contained in:
listyantidewi1
2026-05-01 17:42:30 +07:00
parent 73df374969
commit 705c6b4fd0
3 changed files with 108 additions and 12 deletions
+3
View File
@@ -7,6 +7,9 @@ All notable changes to **Your Everyday Tools** are documented here. The format i
### Added
- **Fill PDF Form** *(PDF Tools)* — upload a PDF that has AcroForm fields (the kind in tax forms, gov applications, and most fillable PDFs), inspect the fields in your browser, fill them, and download the filled PDF. Supports text, multi-line text, checkbox, radio, listbox, and combobox field types. Two-step UI: `/pdf/form-inspect` returns the field schema as JSON, then `/pdf/form-fill` applies values. PDFs without form fields surface a clear "this PDF doesn't have an AcroForm" message rather than silently doing nothing. XFA-only forms (some Adobe-only forms) are not supported — limitation of PyMuPDF, not the project.
### Improved
- **Fill PDF Form: human radio/checkbox labels.** PDF radio buttons store opaque on-state values (often `0`/`1`/`Yes`/arbitrary identifiers) but the human label like "Male" / "Female" is painted on the page as static text *next to* the widget — not part of the field. Form Filler now sniffs that nearby text and shows the human label in the UI, while keeping the PDF on-state value as the actual submitted value (and as a tooltip for power users). Same for checkbox labels. The sniffer correctly handles vertical lists, horizontal rows ("○ Male ○ Female"), and multi-word labels ("I agree to the terms and conditions"), stopping at gaps > 25pt to avoid grabbing the next widget's label.
## [0.6.0] — 2026-04-29
### Added — 8 new tools across 6 categories (total now 99)
+81 -1
View File
@@ -867,6 +867,73 @@ _WIDGET_TYPE_NAMES = {
}
def _label_near_widget(page, rect: fitz.Rect, max_dist: float = 250) -> str:
"""Find the text label that visually sits next to a widget on the page.
Radio button / checkbox labels (e.g. "Male", "Female") are painted on the
page as static text, NOT stored on the widget. We sniff them by walking
page words and picking the contiguous run of words on the same line,
starting from the side adjacent to the widget. A gap > ~25 pixels stops
the run, which prevents grabbing the next radio's label in a horizontal
row layout like "[ ] Male [ ] Female".
Right side is searched first (the conventional layout); left is fallback.
"""
if not rect:
return ""
height = max(rect.y1 - rect.y0, 8)
cy = (rect.y0 + rect.y1) / 2
# get_text("words") -> list of (x0, y0, x1, y1, "text", block, line, word)
words = page.get_text("words")
if not words:
return ""
def same_line(wy0: float, wy1: float) -> bool:
wcy = (wy0 + wy1) / 2
return abs(wcy - cy) <= height * 0.7
GAP = 25.0 # max horizontal gap between adjacent label words, in points
# ── Right-side run ──
right = [w for w in words
if same_line(w[1], w[3])
and w[0] >= rect.x1 - 1
and w[0] - rect.x1 < max_dist]
if right:
right.sort(key=lambda w: w[0])
result = [right[0][4]]
prev_x1 = right[0][2]
for w in right[1:]:
if w[0] - prev_x1 > GAP:
break
result.append(w[4])
prev_x1 = w[2]
text = " ".join(result).strip().rstrip(":;,.")
if text:
return text[:80]
# ── Left-side fallback ──
left = [w for w in words
if same_line(w[1], w[3])
and w[2] <= rect.x0 + 1
and rect.x0 - w[2] < max_dist]
if left:
left.sort(key=lambda w: -w[2]) # rightmost first (closest to widget)
result = [left[0][4]]
prev_x0 = left[0][0]
for w in left[1:]:
if prev_x0 - w[2] > GAP:
break
result.insert(0, w[4])
prev_x0 = w[0]
text = " ".join(result).strip().rstrip(":;,.")
if text:
return text[:80]
return ""
def _serialize_widgets(doc) -> list[dict]:
"""Walk every page's widgets and return a JSON-friendly list of fields."""
fields: list[dict] = []
@@ -883,7 +950,8 @@ def _serialize_widgets(doc) -> list[dict]:
# Choice fields expose `choice_values`; treat None as empty list
choices = list(w.choice_values or []) if hasattr(w, "choice_values") else []
# For checkboxes the "on" state name varies per PDF
# For checkboxes / radios the "on" state name varies per PDF
# (often "Yes", "On", "1", or arbitrary identifiers like "Male").
on_states = []
if ftype in ("checkbox", "radio"):
states = w.button_states() or {}
@@ -894,6 +962,16 @@ def _serialize_widgets(doc) -> list[dict]:
if v and v != "Off" and v not in on_states:
on_states.append(v)
# For radios + checkboxes, sniff a human label from the page text
# adjacent to this widget. PDFs paint these as static text rather
# than storing them on the widget, so we have to read the page.
option_label = ""
if ftype in ("radio", "checkbox"):
option_label = _label_near_widget(page, w.rect)
# The "value" identifier this specific radio represents when "on".
option_value = on_states[0] if (ftype == "radio" and on_states) else ""
fields.append({
"name": w.field_name or "",
"label": w.field_label or w.field_name or "",
@@ -901,6 +979,8 @@ def _serialize_widgets(doc) -> list[dict]:
"value": w.field_value if w.field_value is not None else "",
"page": page_num,
"rect": [round(c, 2) for c in (w.rect or fitz.Rect())],
"option_label": option_label,
"option_value": option_value,
"required": required,
"readonly": readonly,
"multiline": multiline,
+24 -11
View File
@@ -101,18 +101,24 @@ function renderForm(data) {
`across ${data.page_count} page${data.page_count === 1 ? "" : "s"} of ` +
`<code>${escapeHtml(data.filename)}</code>`;
// Group radios by field_name (PDF standard: multiple widgets share a name)
// Group radios by field_name (PDF standard: multiple widgets share a name).
// Each radio carries its own option_value (PDF-internal id, e.g. "0"/"1")
// and option_label (human label sniffed from nearby page text, e.g. "Male").
const radioGroups = {};
const renderQueue = [];
for (const f of data.fields) {
if (f.type === "radio") {
if (!radioGroups[f.name]) {
radioGroups[f.name] = { ...f, on_states_combined: [] };
radioGroups[f.name] = { ...f, options: [] };
renderQueue.push({ kind: "radio_group", name: f.name });
}
for (const s of f.on_states || []) {
if (!radioGroups[f.name].on_states_combined.includes(s)) {
radioGroups[f.name].on_states_combined.push(s);
if (f.option_value) {
const exists = radioGroups[f.name].options.some(o => o.value === f.option_value);
if (!exists) {
radioGroups[f.name].options.push({
value: f.option_value,
label: f.option_label || f.option_value,
});
}
}
} else if (f.type !== "button" && f.type !== "signature") {
@@ -170,21 +176,28 @@ function buildField(f, isRadioGroup) {
wrap.appendChild(lbl);
return wrap;
} else if (isRadioGroup) {
// Render one radio per on-state value
const states = f.on_states_combined || ["On"];
for (const s of states) {
// Render one radio per option (value + label pair sniffed from PDF)
const opts = (f.options && f.options.length)
? f.options
: [{ value: "On", label: "On" }];
for (const opt of opts) {
const lbl = document.createElement("label");
lbl.style.cssText = "display:inline-flex;align-items:center;gap:.4rem;margin-right:1rem";
const r = document.createElement("input");
r.type = "radio";
r.name = `radio-${f.name}`;
r.value = s;
r.value = opt.value;
r.dataset.fieldName = f.name;
r.dataset.fieldType = "radio";
if (f.value === s) r.checked = true;
if (f.value === opt.value) r.checked = true;
const span = document.createElement("span");
span.textContent = s;
span.textContent = opt.label;
span.style.fontSize = ".9rem";
// Show the PDF-internal value as a tooltip so power users can
// map back to the underlying widget if they need to.
if (opt.label !== opt.value) {
lbl.title = `PDF on-state: "${opt.value}"`;
}
lbl.appendChild(r);
lbl.appendChild(span);
wrap.appendChild(lbl);