Add PDF/HEIC/image upload support and improve receipt extraction

- Add backend image conversion endpoint (POST /api/convert-image) supporting PDF, HEIC, PNG, WebP via Pillow, PyMuPDF, and pillow-heif - Add separate "Upload file" button in UI while keeping camera-first behavior for the photo area and + button - Improve Haiku extraction prompt for hotel receipts (parenthesized total) - Increase max image resolution from 1024px to 2048px for better OCR accuracy - Add libheif-dev system dependency in Dockerfile Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 01:20:23 -06:00
parent fde6fcb724
commit a5c6996219
4 changed files with 113 additions and 4 deletions
--- a/server.py
+++ b/server.py
@@ -17,6 +17,11 @@ from urllib.parse import quote as urlquote, urlparse, parse_qs
 import openpyxl
 from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
 import requests
+from PIL import Image
+import fitz  # PyMuPDF
+import pillow_heif
+
+pillow_heif.register_heif_opener()

 # --- Nextcloud configuration (filled at startup) ---
 NC_BASE = "https://nextcloud.sdanywhere.com"
@@ -228,7 +233,7 @@ def extract_receipt_info(jpeg_bytes):
                        },
                        {
                            "type": "text",
-                            "text": 'Extract the total amount and transaction date from this receipt. Reply with JSON only: {"amount": number_or_null, "date": "YYYY-MM-DD"_or_null}',
+                            "text": 'Extract the total amount charged and the transaction date from this receipt. For hotel receipts, the total is the amount in parentheses (the charge to the guest), NOT the balance due (which is typically 0 meaning it has been paid). Reply with JSON only: {"amount": number_or_null, "date": "YYYY-MM-DD"_or_null}',
                        },
                    ],
                }],
@@ -251,6 +256,39 @@ def extract_receipt_info(jpeg_bytes):
    return {"amount": None, "date": None}


+MAX_UPLOAD_SIZE = 20 * 1024 * 1024  # 20 MB
+
+
+def convert_to_jpeg(raw_bytes, content_type=""):
+    """Convert PDF, HEIC, PNG, WebP, etc. to JPEG bytes (max 1024px, quality 80)."""
+    ct = content_type.lower()
+    if ct == "application/pdf" or raw_bytes[:5] == b"%PDF-":
+        # Render first page at 2x DPI
+        doc = fitz.open(stream=raw_bytes, filetype="pdf")
+        page = doc[0]
+        mat = fitz.Matrix(2.0, 2.0)
+        pix = page.get_pixmap(matrix=mat)
+        img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples)
+        doc.close()
+    else:
+        img = Image.open(io.BytesIO(raw_bytes))
+
+    if img.mode in ("RGBA", "P", "LA"):
+        img = img.convert("RGB")
+    elif img.mode != "RGB":
+        img = img.convert("RGB")
+
+    # Resize to max 2048px on longest side
+    max_dim = 2048
+    w, h = img.size
+    if max(w, h) > max_dim:
+        scale = max_dim / max(w, h)
+        img = img.resize((int(w * scale), int(h * scale)), Image.LANCZOS)
+
+    buf = io.BytesIO()
+    img.save(buf, format="JPEG", quality=80)
+    return buf.getvalue()
+

 # --- HTTP handler -------------------------------------------------------------

@@ -557,6 +595,28 @@ class Handler(BaseHTTPRequestHandler):
                self._send_json({"amount": None, "date": None})
            return

+        # POST /api/convert-image — convert PDF/HEIC/PNG/WebP to JPEG
+        if self.path == "/api/convert-image":
+            if not self._check_session():
+                return
+            try:
+                length = int(self.headers.get("Content-Length", 0))
+                if length > MAX_UPLOAD_SIZE:
+                    self._send_error(413, "File too large (max 20 MB)")
+                    return
+                body = self._read_body()
+                content_type = self.headers.get("Content-Type", "application/octet-stream")
+                jpeg_bytes = convert_to_jpeg(body, content_type)
+                self.send_response(200)
+                self.send_header("Content-Type", "image/jpeg")
+                self.send_header("Content-Length", str(len(jpeg_bytes)))
+                self.end_headers()
+                self.wfile.write(jpeg_bytes)
+            except Exception as e:
+                print(f"[convert-image] Error: {e}")
+                self._send_error(500, f"Conversion failed: {e}")
+            return
+
        # POST /api/photos/<id> — upload photo
        m = re.fullmatch(r"/api/photos/([A-Za-z0-9_-]+)", self.path)
        if m: