diff --git a/routes/__pycache__/convert_tools.cpython-313.pyc b/routes/__pycache__/convert_tools.cpython-313.pyc index f4e2002..8a7fbb8 100644 Binary files a/routes/__pycache__/convert_tools.cpython-313.pyc and b/routes/__pycache__/convert_tools.cpython-313.pyc differ diff --git a/routes/convert_tools.py b/routes/convert_tools.py index 2ff12c3..17c34af 100644 --- a/routes/convert_tools.py +++ b/routes/convert_tools.py @@ -3,6 +3,13 @@ import fitz # PyMuPDF from flask import Blueprint, render_template, request, send_file, jsonify from PIL import Image import img2pdf +from docx import Document as DocxDocument +from reportlab.lib.pagesizes import A4 +from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle +from reportlab.lib.units import inch +from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle +from reportlab.lib import colors +from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_RIGHT try: from pdf2docx import Converter as Pdf2DocxConverter @@ -19,9 +26,9 @@ bp = Blueprint("convert", __name__) def to_pdf_page(): return render_template("upload_tool.html", title="Files to PDF", - description="Convert images and text files to PDF", + description="Convert images, Word documents, and text files to PDF", endpoint="/convert/to-pdf", - accept=".jpg,.jpeg,.png,.bmp,.tiff,.webp,.txt", + accept=".jpg,.jpeg,.png,.bmp,.tiff,.webp,.txt,.docx", multiple=True, options=[]) @@ -66,6 +73,100 @@ def pdf_to_text_page(): options=[]) +# ── Helpers ────────────────────────────────────── + +def _docx_to_pdf(data: bytes) -> bytes: + """Convert a .docx file (as bytes) to PDF bytes using python-docx + reportlab.""" + doc = DocxDocument(io.BytesIO(data)) + buf = io.BytesIO() + + styles = getSampleStyleSheet() + normal = styles["Normal"] + normal.fontName = "Helvetica" + normal.fontSize = 11 + normal.leading = 14 + + heading_styles = {} + for level in range(1, 4): + size = {1: 18, 2: 15, 3: 13}[level] + heading_styles[level] = ParagraphStyle( + f"Heading{level}", parent=normal, + fontName="Helvetica-Bold", fontSize=size, leading=size + 4, + spaceBefore=12, spaceAfter=6, + ) + + pdf = SimpleDocTemplate(buf, pagesize=A4, + leftMargin=inch, rightMargin=inch, + topMargin=inch, bottomMargin=inch) + story = [] + + for para in doc.paragraphs: + text = para.text.strip() + if not text: + story.append(Spacer(1, 6)) + continue + + style_name = para.style.name.lower() if para.style else "" + + if "heading 1" in style_name: + story.append(Paragraph(text, heading_styles[1])) + elif "heading 2" in style_name: + story.append(Paragraph(text, heading_styles[2])) + elif "heading 3" in style_name: + story.append(Paragraph(text, heading_styles[3])) + else: + # Preserve basic inline formatting + rich = _build_rich_text(para) + story.append(Paragraph(rich, normal)) + + # Handle tables + for table in doc.tables: + tdata = [] + for row in table.rows: + tdata.append([cell.text for cell in row.cells]) + if tdata: + t = Table(tdata, repeatRows=1) + t.setStyle(TableStyle([ + ("GRID", (0, 0), (-1, -1), 0.5, colors.grey), + ("BACKGROUND", (0, 0), (-1, 0), colors.Color(0.9, 0.9, 0.95)), + ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"), + ("FONTSIZE", (0, 0), (-1, -1), 10), + ("TOPPADDING", (0, 0), (-1, -1), 4), + ("BOTTOMPADDING", (0, 0), (-1, -1), 4), + ("LEFTPADDING", (0, 0), (-1, -1), 6), + ("RIGHTPADDING", (0, 0), (-1, -1), 6), + ])) + story.append(Spacer(1, 8)) + story.append(t) + story.append(Spacer(1, 8)) + + if not story: + story.append(Paragraph("(empty document)", normal)) + + pdf.build(story) + return buf.getvalue() + + +def _build_rich_text(para) -> str: + """Convert a python-docx paragraph's runs into reportlab-compatible rich text.""" + parts = [] + for run in para.runs: + text = run.text.replace("&", "&").replace("<", "<").replace(">", ">") + if not text: + continue + if run.bold and run.italic: + parts.append(f"{text}") + elif run.bold: + parts.append(f"{text}") + elif run.italic: + parts.append(f"{text}") + elif run.underline: + parts.append(f"{text}") + else: + parts.append(text) + return "".join(parts) or para.text + + # ── Processing Routes ──────────────────────────── @bp.route("/to-pdf", methods=["POST"]) @@ -80,14 +181,19 @@ def to_pdf(): name = f.filename.lower() data = f.read() - if name.endswith(".txt"): + if name.endswith(".docx"): + # Word document → PDF pages + try: + docx_pdf_bytes = _docx_to_pdf(data) + docx_pdf = fitz.open(stream=docx_pdf_bytes, filetype="pdf") + pdf_doc.insert_pdf(docx_pdf) + docx_pdf.close() + except Exception as e: + return jsonify(error=f"Error converting {f.filename}: {str(e)}"), 400 + elif name.endswith(".txt"): # Text file → PDF page text = data.decode("utf-8", errors="replace") page = pdf_doc.new_page(width=595, height=842) # A4 - tw = fitz.TextWriter(page.rect) - font = fitz.Font("helv") - # Insert text with wrapping - where = fitz.Point(50, 50) rect = fitz.Rect(50, 50, 545, 792) page.insert_textbox(rect, text, fontsize=11, fontname="helv") else: