Add PDF/HEIC/image upload support and improve receipt extraction

- Add backend image conversion endpoint (POST /api/convert-image) supporting
  PDF, HEIC, PNG, WebP via Pillow, PyMuPDF, and pillow-heif
- Add separate "Upload file" button in UI while keeping camera-first behavior
  for the photo area and + button
- Improve Haiku extraction prompt for hotel receipts (parenthesized total)
- Increase max image resolution from 1024px to 2048px for better OCR accuracy
- Add libheif-dev system dependency in Dockerfile

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-01 01:20:23 -06:00
parent fde6fcb724
commit a5c6996219
4 changed files with 113 additions and 4 deletions

View File

@@ -3,6 +3,7 @@ FROM python:3.11-slim
WORKDIR /app WORKDIR /app
COPY requirements.txt . COPY requirements.txt .
RUN apt-get update && apt-get install -y --no-install-recommends libheif-dev && rm -rf /var/lib/apt/lists/*
RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements.txt
COPY server.py receipts.html api-key ./ COPY server.py receipts.html api-key ./

View File

@@ -542,6 +542,8 @@
<span>Tap to take photo</span> <span>Tap to take photo</span>
</div> </div>
<input type="file" id="photo-input" accept="image/*" capture="environment" hidden> <input type="file" id="photo-input" accept="image/*" capture="environment" hidden>
<input type="file" id="file-input" accept="image/*,.pdf,.heic,.heif,application/pdf" hidden>
<button type="button" id="upload-file-btn" style="margin-top:8px;width:100%;padding:10px;border:1.5px dashed #aaa;border-radius:8px;background:#fafafa;color:#666;font-size:0.9rem;cursor:pointer;">Upload file (PDF, HEIC, image)</button>
<div class="extract-status" id="extract-status"></div> <div class="extract-status" id="extract-status"></div>
</div> </div>
@@ -695,6 +697,8 @@
const modalTitle = document.getElementById("modal-title"); const modalTitle = document.getElementById("modal-title");
const photoArea = document.getElementById("photo-area"); const photoArea = document.getElementById("photo-area");
const photoInput = document.getElementById("photo-input"); const photoInput = document.getElementById("photo-input");
const fileInput = document.getElementById("file-input");
const uploadFileBtn = document.getElementById("upload-file-btn");
const dateInput = document.getElementById("date-input"); const dateInput = document.getElementById("date-input");
const amountInput = document.getElementById("amount-input"); const amountInput = document.getElementById("amount-input");
const categoryInput = document.getElementById("category-input"); const categoryInput = document.getElementById("category-input");
@@ -866,6 +870,7 @@
function closeModal() { function closeModal() {
overlay.classList.remove("open"); overlay.classList.remove("open");
photoInput.value = ""; photoInput.value = "";
fileInput.value = "";
} }
function updatePhotoArea() { function updatePhotoArea() {
@@ -876,7 +881,7 @@
photoArea.classList.remove("has-photo"); photoArea.classList.remove("has-photo");
photoArea.innerHTML = ` photoArea.innerHTML = `
<svg width="32" height="32" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><path d="M23 19a2 2 0 01-2 2H3a2 2 0 01-2-2V8a2 2 0 012-2h4l2-3h6l2 3h4a2 2 0 012 2z"/><circle cx="12" cy="13" r="4"/></svg> <svg width="32" height="32" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><path d="M23 19a2 2 0 01-2 2H3a2 2 0 01-2-2V8a2 2 0 012-2h4l2-3h6l2 3h4a2 2 0 012 2z"/><circle cx="12" cy="13" r="4"/></svg>
<span>Tap to take photo</span>`; <span>Tap to add photo or file</span>`;
} }
} }
@@ -1055,6 +1060,7 @@
}); });
photoArea.addEventListener("click", () => photoInput.click()); photoArea.addEventListener("click", () => photoInput.click());
uploadFileBtn.addEventListener("click", () => fileInput.click());
// --- Receipt extraction via Claude --- // --- Receipt extraction via Claude ---
const extractStatus = document.getElementById("extract-status"); const extractStatus = document.getElementById("extract-status");
@@ -1290,11 +1296,50 @@
}, "image/jpeg", 0.85); }, "image/jpeg", 0.85);
}); });
async function convertViaBackend(file) {
const buf = await file.arrayBuffer();
const r = await fetch("/api/convert-image", {
method: "POST",
headers: { "Content-Type": file.type || "application/octet-stream" },
body: buf
});
if (!r.ok) throw new Error("Conversion failed");
return await r.blob();
}
function needsBackendConversion(file) {
const type = (file.type || "").toLowerCase();
const name = (file.name || "").toLowerCase();
if (type === "application/pdf" || name.endsWith(".pdf")) return true;
if (type === "image/heic" || type === "image/heif" || name.endsWith(".heic") || name.endsWith(".heif")) return true;
return false;
}
photoInput.addEventListener("change", async e => { photoInput.addEventListener("change", async e => {
const file = e.target.files[0]; const file = e.target.files[0];
if (!file) return; if (!file) return;
const resized = await resizeImage(file, 1024); const blob = await resizeImage(file, 2048);
openCropOverlay(resized); openCropOverlay(blob);
});
fileInput.addEventListener("change", async e => {
const file = e.target.files[0];
if (!file) return;
let blob;
if (needsBackendConversion(file)) {
try {
extractStatus.textContent = "Converting file...";
blob = await convertViaBackend(file);
} catch (err) {
extractStatus.textContent = "Conversion failed";
return;
} finally {
if (extractStatus.textContent === "Converting file...") extractStatus.textContent = "";
}
} else {
blob = await resizeImage(file, 2048);
}
openCropOverlay(blob);
}); });
btnSave.addEventListener("click", async () => { btnSave.addEventListener("click", async () => {

View File

@@ -1,2 +1,5 @@
requests requests
openpyxl openpyxl
Pillow
PyMuPDF
pillow-heif

View File

@@ -17,6 +17,11 @@ from urllib.parse import quote as urlquote, urlparse, parse_qs
import openpyxl import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
import requests import requests
from PIL import Image
import fitz # PyMuPDF
import pillow_heif
pillow_heif.register_heif_opener()
# --- Nextcloud configuration (filled at startup) --- # --- Nextcloud configuration (filled at startup) ---
NC_BASE = "https://nextcloud.sdanywhere.com" NC_BASE = "https://nextcloud.sdanywhere.com"
@@ -228,7 +233,7 @@ def extract_receipt_info(jpeg_bytes):
}, },
{ {
"type": "text", "type": "text",
"text": 'Extract the total amount and transaction date from this receipt. Reply with JSON only: {"amount": number_or_null, "date": "YYYY-MM-DD"_or_null}', "text": 'Extract the total amount charged and the transaction date from this receipt. For hotel receipts, the total is the amount in parentheses (the charge to the guest), NOT the balance due (which is typically 0 meaning it has been paid). Reply with JSON only: {"amount": number_or_null, "date": "YYYY-MM-DD"_or_null}',
}, },
], ],
}], }],
@@ -251,6 +256,39 @@ def extract_receipt_info(jpeg_bytes):
return {"amount": None, "date": None} return {"amount": None, "date": None}
MAX_UPLOAD_SIZE = 20 * 1024 * 1024 # 20 MB
def convert_to_jpeg(raw_bytes, content_type=""):
"""Convert PDF, HEIC, PNG, WebP, etc. to JPEG bytes (max 1024px, quality 80)."""
ct = content_type.lower()
if ct == "application/pdf" or raw_bytes[:5] == b"%PDF-":
# Render first page at 2x DPI
doc = fitz.open(stream=raw_bytes, filetype="pdf")
page = doc[0]
mat = fitz.Matrix(2.0, 2.0)
pix = page.get_pixmap(matrix=mat)
img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples)
doc.close()
else:
img = Image.open(io.BytesIO(raw_bytes))
if img.mode in ("RGBA", "P", "LA"):
img = img.convert("RGB")
elif img.mode != "RGB":
img = img.convert("RGB")
# Resize to max 2048px on longest side
max_dim = 2048
w, h = img.size
if max(w, h) > max_dim:
scale = max_dim / max(w, h)
img = img.resize((int(w * scale), int(h * scale)), Image.LANCZOS)
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=80)
return buf.getvalue()
# --- HTTP handler ------------------------------------------------------------- # --- HTTP handler -------------------------------------------------------------
@@ -557,6 +595,28 @@ class Handler(BaseHTTPRequestHandler):
self._send_json({"amount": None, "date": None}) self._send_json({"amount": None, "date": None})
return return
# POST /api/convert-image — convert PDF/HEIC/PNG/WebP to JPEG
if self.path == "/api/convert-image":
if not self._check_session():
return
try:
length = int(self.headers.get("Content-Length", 0))
if length > MAX_UPLOAD_SIZE:
self._send_error(413, "File too large (max 20 MB)")
return
body = self._read_body()
content_type = self.headers.get("Content-Type", "application/octet-stream")
jpeg_bytes = convert_to_jpeg(body, content_type)
self.send_response(200)
self.send_header("Content-Type", "image/jpeg")
self.send_header("Content-Length", str(len(jpeg_bytes)))
self.end_headers()
self.wfile.write(jpeg_bytes)
except Exception as e:
print(f"[convert-image] Error: {e}")
self._send_error(500, f"Conversion failed: {e}")
return
# POST /api/photos/<id> — upload photo # POST /api/photos/<id> — upload photo
m = re.fullmatch(r"/api/photos/([A-Za-z0-9_-]+)", self.path) m = re.fullmatch(r"/api/photos/([A-Za-z0-9_-]+)", self.path)
if m: if m: