Expense reports are manual friction that remote workers deal with monthly. The typical flow — photograph receipts, open a spreadsheet, copy in amounts, export to PDF, attach to email — takes 30-60 minutes per report. Automating it with scripts reduces that to minutes.
This guide builds an expense automation system: a folder watcher that extracts amounts from receipt images using OCR, generates a categorized CSV, and produces a PDF report ready to submit.
What Gets Automated
- Drop receipt images into
~/receipts/incoming/ - OCR script extracts amount, vendor, and date from each image
- Categorize expenses (meals, software, hardware, travel)
- Generate monthly CSV and PDF report
- Email report to your finance contact or manager
Install Dependencies
# macOS
brew install tesseract imagemagick python3
# Ubuntu/Debian
sudo apt install tesseract-ocr imagemagick python3-pip -y
# Python packages
pip3 install pytesseract Pillow pandas fpdf2 python-dotenv
# Verify Tesseract
tesseract --version
Folder Structure
~/expenses/
├── receipts/
│ ├── incoming/ # drop new receipts here
│ └── processed/ # moved here after extraction
├── ledger.csv # master expense log
├── reports/ # generated PDFs
├── extract.py # OCR extraction script
├── report.py # PDF report generator
└── .env # SMTP credentials
Receipt Extraction Script
# extract.py
import os
import re
import shutil
import csv
from datetime import datetime
from pathlib import Path
import pytesseract
from PIL import Image
INCOMING = Path.home() / "expenses/receipts/incoming"
PROCESSED = Path.home() / "expenses/receipts/processed"
LEDGER = Path.home() / "expenses/ledger.csv"
CATEGORIES = {
"amazon": "software",
"aws": "software",
"github": "software",
"figma": "software",
"notion": "software",
"zoom": "software",
"slack": "software",
"uber eats": "meals",
"doordash": "meals",
"grubhub": "meals",
"starbucks": "meals",
"delta": "travel",
"united": "travel",
"american airlines": "travel",
"marriott": "travel",
"hilton": "travel",
"best buy": "hardware",
"b&h": "hardware",
"apple": "hardware",
"logitech": "hardware",
}
def extract_amount(text):
"""Extract dollar amount from OCR text."""
patterns = [
r"total[:\s]+\$?([\d,]+\.\d{2})",
r"amount[:\s]+\$?([\d,]+\.\d{2})",
r"\$\s*([\d,]+\.\d{2})",
r"([\d,]+\.\d{2})",
]
for pattern in patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match:
amount = match.group(1).replace(",", "")
return float(amount)
return None
def extract_vendor(text):
"""Match known vendors from OCR text."""
text_lower = text.lower()
for vendor in CATEGORIES:
if vendor in text_lower:
return vendor.title(), CATEGORIES[vendor]
# Fallback: first non-empty line as vendor
lines = [l.strip() for l in text.split("\n") if l.strip()]
return lines[0] if lines else "Unknown", "other"
def extract_date(text):
"""Extract date from OCR text, default to today."""
patterns = [
r"(\d{1,2}/\d{1,2}/\d{2,4})",
r"(\d{4}-\d{2}-\d{2})",
r"(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\w*\s+\d{1,2},?\s+\d{4}",
]
for pattern in patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match:
return match.group(0)
return datetime.today().strftime("%Y-%m-%d")
def process_receipts():
INCOMING.mkdir(parents=True, exist_ok=True)
PROCESSED.mkdir(parents=True, exist_ok=True)
images = list(INCOMING.glob("*.{jpg,jpeg,png,pdf}"))
if not images:
images = [p for ext in ["jpg","jpeg","png","pdf"] for p in INCOMING.glob(f"*.{ext}")]
new_rows = []
for img_path in images:
print(f"Processing: {img_path.name}")
img = Image.open(img_path)
text = pytesseract.image_to_string(img)
amount = extract_amount(text)
vendor, category = extract_vendor(text)
date = extract_date(text)
if amount:
row = {
"date": date,
"vendor": vendor,
"amount": amount,
"category": category,
"notes": "",
"receipt": img_path.name,
}
new_rows.append(row)
print(f" → {vendor}: ${amount} ({category})")
else:
print(f" → Could not extract amount — check manually")
# Move to processed
shutil.move(str(img_path), str(PROCESSED / img_path.name))
if new_rows:
write_header = not LEDGER.exists()
with open(LEDGER, "a", newline="") as f:
writer = csv.DictWriter(f, fieldnames=new_rows[0].keys())
if write_header:
writer.writeheader()
writer.writerows(new_rows)
print(f"\nAdded {len(new_rows)} expenses to ledger.")
if __name__ == "__main__":
process_receipts()
PDF Report Generator
# report.py
import csv
import os
from datetime import datetime
from pathlib import Path
from collections import defaultdict
from fpdf import FPDF
LEDGER = Path.home() / "expenses/ledger.csv"
REPORTS_DIR = Path.home() / "expenses/reports"
def generate_report(month: str = None):
"""month format: YYYY-MM, defaults to current month"""
if not month:
month = datetime.today().strftime("%Y-%m")
rows = []
with open(LEDGER, newline="") as f:
for row in csv.DictReader(f):
if row["date"].startswith(month):
rows.append(row)
if not rows:
print(f"No expenses found for {month}")
return
total = sum(float(r["amount"]) for r in rows)
by_category = defaultdict(float)
for r in rows:
by_category[r["category"]] += float(r["amount"])
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
pdf_path = REPORTS_DIR / f"expense-report-{month}.pdf"
pdf = FPDF()
pdf.add_page()
pdf.set_font("Helvetica", "B", 16)
pdf.cell(0, 10, f"Expense Report — {month}", ln=True)
pdf.set_font("Helvetica", "", 10)
pdf.cell(0, 6, f"Generated: {datetime.today().strftime('%Y-%m-%d')}", ln=True)
pdf.ln(4)
# Category summary
pdf.set_font("Helvetica", "B", 12)
pdf.cell(0, 8, "Summary by Category", ln=True)
pdf.set_font("Helvetica", "", 10)
for cat, amt in sorted(by_category.items()):
pdf.cell(60, 6, cat.title())
pdf.cell(0, 6, f"${amt:.2f}", ln=True)
pdf.set_font("Helvetica", "B", 10)
pdf.cell(60, 6, "TOTAL")
pdf.cell(0, 6, f"${total:.2f}", ln=True)
pdf.ln(6)
# Line items
pdf.set_font("Helvetica", "B", 12)
pdf.cell(0, 8, "Line Items", ln=True)
pdf.set_font("Helvetica", "B", 9)
pdf.cell(30, 6, "Date")
pdf.cell(70, 6, "Vendor")
pdf.cell(30, 6, "Category")
pdf.cell(0, 6, "Amount", ln=True)
pdf.line(10, pdf.get_y(), 200, pdf.get_y())
pdf.set_font("Helvetica", "", 9)
for r in sorted(rows, key=lambda x: x["date"]):
pdf.cell(30, 6, r["date"])
pdf.cell(70, 6, r["vendor"][:30])
pdf.cell(30, 6, r["category"])
pdf.cell(0, 6, f"${float(r['amount']):.2f}", ln=True)
pdf.output(str(pdf_path))
print(f"Report saved: {pdf_path}")
return pdf_path
if __name__ == "__main__":
import sys
month = sys.argv[1] if len(sys.argv) > 1 else None
generate_report(month)
Email the Report
# .env
SMTP_HOST=smtp.gmail.com
SMTP_USER=you@gmail.com
SMTP_PASS=your-app-password
MANAGER_EMAIL=manager@company.com
YOUR_NAME=Your Name
# send-report.py
import smtplib
import os
import sys
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email.mime.text import MIMEText
from email import encoders
from datetime import datetime
from dotenv import load_dotenv
from report import generate_report
load_dotenv(Path.home() / "expenses/.env")
def send_report(month=None):
if not month:
month = datetime.today().strftime("%Y-%m")
pdf_path = generate_report(month)
if not pdf_path:
return
msg = MIMEMultipart()
msg["From"] = os.getenv("SMTP_USER")
msg["To"] = os.getenv("MANAGER_EMAIL")
msg["Subject"] = f"Expense Report {month} — {os.getenv('YOUR_NAME')}"
msg.attach(MIMEText(f"Hi,\n\nAttached is my expense report for {month}.\n\nBest,\n{os.getenv('YOUR_NAME')}"))
with open(pdf_path, "rb") as f:
part = MIMEBase("application", "octet-stream")
part.set_payload(f.read())
encoders.encode_base64(part)
part.add_header("Content-Disposition", f'attachment; filename="{pdf_path.name}"')
msg.attach(part)
with smtplib.SMTP(os.getenv("SMTP_HOST"), 587) as server:
server.starttls()
server.login(os.getenv("SMTP_USER"), os.getenv("SMTP_PASS"))
server.sendmail(msg["From"], msg["To"], msg.msg.as_string())
print(f"Report sent to {os.getenv('MANAGER_EMAIL')}")
if __name__ == "__main__":
send_report(sys.argv[1] if len(sys.argv) > 1 else None)
Run the Full Workflow
# 1. Drop receipt images into ~/expenses/receipts/incoming/
# 2. Extract amounts from all new receipts
python3 ~/expenses/extract.py
# 3. Review and fix the ledger if OCR missed anything
cat ~/expenses/ledger.csv
# 4. Generate report for current month
python3 ~/expenses/report.py
# 5. Send report
python3 ~/expenses/send-report.py
# Or send for a specific month
python3 ~/expenses/send-report.py 2026-02
Add a Manual Entry
For receipts that OCR cannot parse (printed text too faint, scanned PDFs):
# Append manually
echo "2026-03-15,Ikea,89.99,hardware,Standing desk riser,desk-riser-receipt.jpg" \
>> ~/expenses/ledger.csv
Related Reading
- Best Expense Management Platform for Remote Teams with Receipt Tracking
- Best Accounting Software for Freelancers 2026
- Automate Invoice Generation for Freelancers
Built by theluckystrike — More at zovo.one