Privacy Tools Guide

Secure File Upload Handling for Developers

File upload endpoints are one of the most exploited attack surfaces in web applications. A misconfigured uploader lets attackers drop web shells, trigger SSRF via SVG/XML, bypass malware scanners, or cause denial-of-service through zip bombs. This guide covers the full stack of defenses.

Threat Model

Attack Risk
Web shell upload RCE if file is served from a web-accessible path
Malicious SVG/HTML XSS, SSRF, or credential theft
Path traversal in filename Overwrite arbitrary files
Zip bomb DoS via memory exhaustion during extraction
MIME type bypass Bypass extension checks by spoofing Content-Type
Polyglot files Valid GIF + valid PHP simultaneously

1. Validate File Type by Content, Not Extension

Extension and Content-Type header are attacker-controlled. The only reliable check is inspecting the file’s actual magic bytes:

import magic   # python-magic wraps libmagic
import imghdr
from pathlib import Path

ALLOWED_MIME_TYPES = {
    "image/jpeg",
    "image/png",
    "image/gif",
    "image/webp",
    "application/pdf",
}

MAX_FILE_SIZE = 10 * 1024 * 1024   # 10 MB

def validate_upload(file_bytes: bytes, claimed_filename: str) -> str:
    """
    Returns the detected MIME type if valid, raises ValueError otherwise.
    """
    # 1. Size check
    if len(file_bytes) > MAX_FILE_SIZE:
        raise ValueError(f"File too large: {len(file_bytes)} bytes (max {MAX_FILE_SIZE})")

    # 2. Detect MIME from content (not from extension or header)
    detected_mime = magic.from_buffer(file_bytes, mime=True)

    # 3. Allowlist check
    if detected_mime not in ALLOWED_MIME_TYPES:
        raise ValueError(f"Disallowed file type: {detected_mime}")

    # 4. Secondary check for images with imghdr (double validation)
    if detected_mime.startswith("image/"):
        import io
        if imghdr.what(io.BytesIO(file_bytes)) is None:
            raise ValueError("File claims to be an image but is not")

    return detected_mime
pip install python-magic
sudo apt install -y libmagic1

2. Sanitize Filenames

Never trust the original filename. Strip everything suspicious:

import re
import uuid
from pathlib import PurePosixPath

ALLOWED_EXTENSIONS = {
    "image/jpeg": ".jpg",
    "image/png":  ".png",
    "image/gif":  ".gif",
    "image/webp": ".webp",
    "application/pdf": ".pdf",
}

def safe_filename(mime_type: str) -> str:
    """
    Generate a UUID-based filename with the correct extension.
    Never derive the filename from user input.
    """
    ext = ALLOWED_EXTENSIONS.get(mime_type, ".bin")
    return f"{uuid.uuid4().hex}{ext}"

# If you must preserve a user-provided name (e.g., for display):
def sanitize_display_name(name: str) -> str:
    # Strip path components
    name = PurePosixPath(name).name
    # Allow only safe characters
    name = re.sub(r'[^a-zA-Z0-9._-]', '_', name)
    # Truncate
    return name[:100] if name else "upload"

3. Store Files Outside Web Root

import os

# WRONG — file accessible directly via /uploads/shell.php
UPLOAD_DIR = "/var/www/html/uploads/"

# CORRECT — outside web root entirely
UPLOAD_DIR = "/var/app-data/uploads/"

# Or use object storage (S3, GCS) — files are never served through your web server
import boto3

s3 = boto3.client("s3")

def store_file(file_bytes: bytes, safe_name: str) -> str:
    bucket = "myapp-uploads"
    key    = f"uploads/{safe_name}"
    s3.put_object(
        Bucket=bucket,
        Key=key,
        Body=file_bytes,
        ContentDisposition="attachment",          # prevents inline rendering
        ContentType="application/octet-stream",   # force download, not execution
        ServerSideEncryption="AES256",
    )
    return key

When serving files, always generate pre-signed URLs with short TTL rather than making the bucket public.


4. Scan with ClamAV

# Install ClamAV
sudo apt install -y clamav clamav-daemon
sudo freshclam   # update virus database
sudo systemctl enable --now clamav-daemon
import subprocess
import tempfile

def scan_with_clamav(file_bytes: bytes) -> bool:
    """Returns True if file is clean, raises exception if malware found."""
    with tempfile.NamedTemporaryFile(delete=False, suffix=".upload") as tmp:
        tmp.write(file_bytes)
        tmp_path = tmp.name

    try:
        result = subprocess.run(
            ["clamdscan", "--no-summary", tmp_path],
            capture_output=True, text=True, timeout=30
        )
        if result.returncode == 1:
            raise ValueError(f"Malware detected: {result.stdout.strip()}")
        elif result.returncode not in (0, 1):
            raise RuntimeError(f"ClamAV error: {result.stderr.strip()}")
        return True
    finally:
        os.unlink(tmp_path)

5. Prevent Zip Bomb Extraction

If your app extracts archives, always validate before extracting:

import zipfile
import io

MAX_UNCOMPRESSED_SIZE = 50 * 1024 * 1024   # 50 MB
MAX_FILE_COUNT        = 1000
MAX_COMPRESSION_RATIO = 100   # compressed:uncompressed ratio

def safe_extract_zip(zip_bytes: bytes, dest_dir: str):
    with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf:
        members = zf.infolist()

        # Check file count
        if len(members) > MAX_FILE_COUNT:
            raise ValueError(f"Too many files in archive: {len(members)}")

        # Check uncompressed size
        total_uncompressed = sum(m.file_size for m in members)
        if total_uncompressed > MAX_UNCOMPRESSED_SIZE:
            raise ValueError(f"Archive unpacks too large: {total_uncompressed} bytes")

        # Check compression ratio
        total_compressed = sum(m.compress_size for m in members)
        if total_compressed > 0:
            ratio = total_uncompressed / total_compressed
            if ratio > MAX_COMPRESSION_RATIO:
                raise ValueError(f"Suspicious compression ratio: {ratio:.0f}:1")

        # Check for path traversal in member names
        for member in members:
            safe_name = os.path.normpath(member.filename)
            if safe_name.startswith("..") or os.path.isabs(safe_name):
                raise ValueError(f"Path traversal in archive: {member.filename}")

        zf.extractall(dest_dir)

6. Prevent Image-Based XSS (SVG/HTML)

SVG files can contain JavaScript. Never serve them with their correct MIME type:

from fastapi.responses import Response

@app.get("/files/{file_id}")
async def serve_file(file_id: str):
    file_record = db.get_file(file_id)
    file_bytes  = storage.get(file_record.storage_key)

    # Force download; prevent rendering SVG/HTML in browser
    return Response(
        content=file_bytes,
        media_type="application/octet-stream",
        headers={
            "Content-Disposition": f'attachment; filename="{file_record.safe_name}"',
            "X-Content-Type-Options": "nosniff",
            "Content-Security-Policy": "default-src 'none'",
        },
    )

For serving images inline (profile pictures, etc.), re-encode them through an image processing library:

from PIL import Image
import io

def re_encode_image(file_bytes: bytes, max_width=1920, max_height=1080) -> bytes:
    """
    Re-encode image through Pillow — strips embedded scripts, metadata, and ICC profiles.
    Any polyglot file that's valid PHP + valid JPEG will lose the PHP content here.
    """
    img = Image.open(io.BytesIO(file_bytes))
    img.verify()   # detect truncated/corrupt images early

    img = Image.open(io.BytesIO(file_bytes))   # reopen after verify
    img = img.convert("RGB")   # strip alpha channel / profiles
    img.thumbnail((max_width, max_height), Image.LANCZOS)

    out = io.BytesIO()
    img.save(out, format="JPEG", quality=85, optimize=True)
    return out.getvalue()

7. Rate Limit Upload Endpoints

from slowapi import Limiter
from slowapi.util import get_remote_address

limiter = Limiter(key_func=get_remote_address)

@app.post("/upload")
@limiter.limit("10/minute")
async def upload_file(request: Request, file: UploadFile = File(...)):
    file_bytes = await file.read()
    # ... validate and store

Complete Validation Pipeline

async def handle_upload(file: UploadFile) -> dict:
    file_bytes = await file.read()

    # 1. Size check
    if len(file_bytes) > MAX_FILE_SIZE:
        raise HTTPException(400, "File too large")

    # 2. MIME detection from content
    mime = validate_upload(file_bytes, file.filename)

    # 3. Image re-encoding (strips metadata and polyglots)
    if mime.startswith("image/"):
        file_bytes = re_encode_image(file_bytes)

    # 4. Malware scan
    scan_with_clamav(file_bytes)

    # 5. Generate safe filename
    safe_name = safe_filename(mime)

    # 6. Store outside web root / in object storage
    key = store_file(file_bytes, safe_name)

    return {"key": key, "size": len(file_bytes), "type": mime}


Built by theluckystrike — More at zovo.one