PDFy

PDFy Production MVP Implementation Plan

For agentic workers: REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (- [ ]) syntax for tracking.

Goal: Build a production-leaning anonymous PDF malware scanning MVP with a Next.js website, a Python analyzer, fast synchronous scanning, queued advanced analysis, and immediate-delete-by-default retention.

Architecture: Use a pnpm monorepo with apps/web for the public product, services/analyzer for FastAPI-based PDF analysis, and workers/jobs for Redis-backed advanced scan and cleanup workers. Persist scan state in Postgres, store temporary files in S3-compatible storage, and keep schemas shared through a TypeScript contracts package.

Tech Stack: Next.js, TypeScript, Vitest, Prisma, Postgres, Redis, MinIO, FastAPI, Python 3.12, pypdf, pytest, RQ, pnpm, uv, Docker Compose


Planned File Structure

Task 1: Bootstrap The Monorepo And Local Services

Files:

{
  "name": "pdfy",
  "private": true,
  "packageManager": "pnpm@10.0.0",
  "scripts": {
    "dev:web": "pnpm --filter web dev",
    "test:web": "pnpm --filter web test",
    "lint:web": "pnpm --filter web lint",
    "typecheck:web": "pnpm --filter web typecheck"
  },
  "devDependencies": {
    "turbo": "^2.0.0"
  }
}
packages:
  - apps/*
  - packages/*
{
  "$schema": "https://turbo.build/schema.json",
  "tasks": {
    "build": {
      "dependsOn": ["^build"],
      "outputs": [".next/**", "dist/**"]
    },
    "lint": {},
    "test": {},
    "typecheck": {}
  }
}
import { describe, expect, it } from "vitest";

import { GET } from "@/app/api/health/route";

describe("GET /api/health", () => {
  it("returns an ok status payload", async () => {
    const response = await GET();
    const body = await response.json();

    expect(response.status).toBe(200);
    expect(body).toEqual({ status: "ok", service: "web" });
  });
});
from fastapi.testclient import TestClient

from app.main import app

client = TestClient(app)


def test_health_endpoint_returns_ok() -> None:
    response = client.get("/health")

    assert response.status_code == 200
    assert response.json() == {"status": "ok", "service": "analyzer"}

Run: pnpm --dir apps/web test Expected: FAIL with module or route resolution errors because the web health route is not implemented yet

Run: uv run --project services/analyzer pytest services/analyzer/tests/test_health.py -q Expected: FAIL because the FastAPI app entrypoint does not exist yet

export async function GET() {
  return Response.json({ status: "ok", service: "web" });
}
from fastapi import FastAPI

app = FastAPI(title="PDFy Analyzer")


@app.get("/health")
def health() -> dict[str, str]:
    return {"status": "ok", "service": "analyzer"}
[project]
name = "pdfy-analyzer"
version = "0.1.0"
requires-python = ">=3.12"
dependencies = [
  "fastapi>=0.115.0",
  "httpx>=0.28.0",
  "pydantic>=2.9.0",
  "pytest>=8.3.0",
  "uvicorn>=0.34.0"
]

[tool.pytest.ini_options]
pythonpath = ["."]
testpaths = ["tests"]
services:
  postgres:
    image: postgres:16
    environment:
      POSTGRES_USER: pdfy
      POSTGRES_PASSWORD: pdfy
      POSTGRES_DB: pdfy
    ports: ["5432:5432"]
  redis:
    image: redis:7
    ports: ["6379:6379"]
  minio:
    image: minio/minio
    command: server /data --console-address ":9001"
    environment:
      MINIO_ROOT_USER: minio
      MINIO_ROOT_PASSWORD: minio123
    ports: ["9000:9000", "9001:9001"]

Run: pnpm --dir apps/web test Expected: PASS for health.test.ts

Run: uv run --project services/analyzer pytest services/analyzer/tests/test_health.py -q Expected: PASS with 1 passed

git add package.json pnpm-workspace.yaml turbo.json .gitignore docker-compose.yml apps/web services/analyzer
git commit -m "chore: bootstrap workspace and service health checks"

Task 2: Add Shared Contracts And Scan Persistence

Files:

import { describe, expect, it } from "vitest";

import { createScanInputSchema, retentionModeSchema } from "@pdfy/contracts";

describe("scan contracts", () => {
  it("defaults upload requests to immediate deletion", () => {
    const parsed = createScanInputSchema.parse({});

    expect(parsed.retentionMode).toBe("delete_immediately");
  });

  it("allows only supported retention modes", () => {
    expect(retentionModeSchema.safeParse("temporary_cache").success).toBe(true);
    expect(retentionModeSchema.safeParse("forever").success).toBe(false);
  });
});

Run: pnpm --dir apps/web vitest run tests/contracts/scan-contracts.test.ts Expected: FAIL with package import errors because @pdfy/contracts is not implemented yet

import { z } from "zod";

export const retentionModeSchema = z.enum([
  "delete_immediately",
  "temporary_cache"
]);

export const createScanInputSchema = z.object({
  retentionMode: retentionModeSchema.default("delete_immediately"),
  enableEnrichment: z.boolean().default(true)
});

export const scanStatusSchema = z.enum([
  "queued_fast_scan",
  "running_fast_scan",
  "completed_fast_scan",
  "queued_advanced_scan",
  "running_advanced_scan",
  "completed",
  "failed",
  "expired"
]);
export * from "./scan";
model Scan {
  id              String   @id @default(cuid())
  sha256          String
  fileName        String
  storageKey      String?
  retentionMode   String
  status          String
  verdict         String?
  score           Int?
  enableEnrichment Boolean @default(true)
  expiresAt       DateTime?
  createdAt       DateTime @default(now())
  updatedAt       DateTime @updatedAt
  findings        Finding[]
}

model Finding {
  id        String   @id @default(cuid())
  scanId    String
  category  String
  severity  String
  title     String
  detail    Json
  createdAt DateTime @default(now())
  scan      Scan     @relation(fields: [scanId], references: [id], onDelete: Cascade)
}
import { PrismaClient } from "@prisma/client";

declare global {
  var prisma: PrismaClient | undefined;
}

export const db =
  global.prisma ??
  new PrismaClient({
    log: ["warn", "error"]
  });

if (process.env.NODE_ENV !== "production") {
  global.prisma = db;
}
import { z } from "zod";

export const envSchema = z.object({
  DATABASE_URL: z.string().url(),
  ANALYZER_BASE_URL: z.string().url(),
  REDIS_URL: z.string().url(),
  S3_ENDPOINT: z.string().url()
});

Run: pnpm --dir apps/web vitest run tests/contracts/scan-contracts.test.ts Expected: PASS

Run: pnpm --dir apps/web prisma validate Expected: The schema at prisma/schema.prisma is valid

git add packages/contracts apps/web/prisma apps/web/src/lib/db.ts apps/web/src/lib/env.ts apps/web/tests/contracts
git commit -m "feat: add shared scan contracts and persistence schema"

Task 3: Implement The Fast Analyzer Engine

Files:

from app.services.fast_scan import run_fast_scan


def test_fast_scan_detects_javascript_and_url() -> None:
    pdf_bytes = b"""%PDF-1.4
1 0 obj
<< /Type /Catalog /OpenAction 2 0 R >>
endobj
2 0 obj
<< /S /JavaScript /JS (app.alert('x'); var u='http://bad.test') >>
endobj
trailer << /Root 1 0 R >>
%%EOF
"""

    result = run_fast_scan(pdf_bytes, "malicious.pdf")

    assert result.summary.verdict in {"suspicious", "malicious"}
    assert "/JavaScript" in result.keyword_hits
    assert "http://bad.test" in result.iocs.urls

Run: uv run --project services/analyzer pytest services/analyzer/tests/test_fast_scan.py -q Expected: FAIL because run_fast_scan and the scan models do not exist yet

from pydantic import BaseModel, Field


class IocSet(BaseModel):
    urls: list[str] = Field(default_factory=list)
    ips: list[str] = Field(default_factory=list)


class ScanSummary(BaseModel):
    verdict: str
    score: int
    confidence: str


class FastScanResult(BaseModel):
    file_name: str
    sha256: str
    keyword_hits: list[str]
    iocs: IocSet
    summary: ScanSummary
import hashlib


def sha256_bytes(payload: bytes) -> str:
    return hashlib.sha256(payload).hexdigest()
import re

SUSPICIOUS_KEYWORDS = [
    b"/JavaScript",
    b"/JS",
    b"/OpenAction",
    b"/EmbeddedFile"
]

URL_RE = re.compile(rb"https?://[^\s<>()]+")
IP_RE = re.compile(rb"\b(?:\d{1,3}\.){3}\d{1,3}\b")
from app.models import FastScanResult, IocSet, ScanSummary
from app.services.hash_utils import sha256_bytes
from app.services.pdf_extract import IP_RE, SUSPICIOUS_KEYWORDS, URL_RE


def run_fast_scan(pdf_bytes: bytes, file_name: str) -> FastScanResult:
    keyword_hits = [
        keyword.decode("utf-8") for keyword in SUSPICIOUS_KEYWORDS if keyword in pdf_bytes
    ]
    urls = sorted({match.decode("utf-8") for match in URL_RE.findall(pdf_bytes)})
    ips = sorted({match.decode("utf-8") for match in IP_RE.findall(pdf_bytes)})
    score = min(100, len(keyword_hits) * 20 + len(urls) * 15 + len(ips) * 10)
    verdict = "malicious" if score >= 80 else "suspicious" if score >= 40 else "clean"

    return FastScanResult(
        file_name=file_name,
        sha256=sha256_bytes(pdf_bytes),
        keyword_hits=keyword_hits,
        iocs=IocSet(urls=urls, ips=ips),
        summary=ScanSummary(verdict=verdict, score=score, confidence="medium")
    )
from fastapi import APIRouter, File, UploadFile

from app.services.fast_scan import run_fast_scan

router = APIRouter(prefix="/analyze")


@router.post("/fast")
async def analyze_fast(file: UploadFile = File(...)):
    payload = await file.read()
    return run_fast_scan(payload, file.filename or "upload.pdf")
from fastapi import FastAPI

from app.routes.analyze import router as analyze_router

app = FastAPI(title="PDFy Analyzer")
app.include_router(analyze_router)

Run: uv run --project services/analyzer pytest services/analyzer/tests/test_fast_scan.py -q Expected: PASS

Run: uv run --project services/analyzer pytest -q Expected: PASS for health and fast scan tests

git add services/analyzer/app services/analyzer/tests/test_fast_scan.py
git commit -m "feat: add fast PDF analysis engine"

Task 4: Build Upload Creation And Fast Scan Persistence

Files:

import { describe, expect, it } from "vitest";

import { createScan } from "@/src/lib/scans/create-scan";

describe("createScan", () => {
  it("defaults to immediate deletion when no retention mode is provided", async () => {
    const result = await createScan({
      fileName: "sample.pdf",
      contentType: "application/pdf",
      bytes: new Uint8Array([37, 80, 68, 70])
    });

    expect(result.retentionMode).toBe("delete_immediately");
  });

  it("rejects non-pdf uploads", async () => {
    await expect(
      createScan({
        fileName: "sample.exe",
        contentType: "application/octet-stream",
        bytes: new Uint8Array([1, 2, 3])
      })
    ).rejects.toThrow("Only PDF files are allowed");
  });
});

Run: pnpm --dir apps/web vitest run tests/api/create-scan.test.ts Expected: FAIL because createScan is not implemented yet

export async function runFastScan(file: File): Promise<FastScanResult> {
  const form = new FormData();
  form.set("file", file);

  const response = await fetch(`${process.env.ANALYZER_BASE_URL}/analyze/fast`, {
    method: "POST",
    body: form
  });

  if (!response.ok) {
    throw new Error("Fast scan request failed");
  }

  return fastScanResultSchema.parse(await response.json());
}
export type StoredObject = {
  storageKey: string;
};

export async function putTemporaryObject(input: {
  bytes: Uint8Array;
  fileName: string;
}): Promise<StoredObject> {
  return { storageKey: `uploads/${crypto.randomUUID()}-${input.fileName}` };
}
import { createScanInputSchema } from "@pdfy/contracts";

export async function createScan(input: {
  fileName: string;
  contentType: string;
  bytes: Uint8Array;
  retentionMode?: "delete_immediately" | "temporary_cache";
  enableEnrichment?: boolean;
}) {
  if (input.contentType !== "application/pdf") {
    throw new Error("Only PDF files are allowed");
  }

  const parsed = createScanInputSchema.parse({
    retentionMode: input.retentionMode,
    enableEnrichment: input.enableEnrichment
  });

  const file = new File([input.bytes], input.fileName, { type: input.contentType });
  const fastScan = await runFastScan(file);
  const stored = await putTemporaryObject({ bytes: input.bytes, fileName: input.fileName });

  return db.scan.create({
    data: {
      fileName: input.fileName,
      sha256: fastScan.sha256,
      storageKey: stored.storageKey,
      retentionMode: parsed.retentionMode,
      status: "completed_fast_scan",
      verdict: fastScan.summary.verdict,
      score: fastScan.summary.score,
      enableEnrichment: parsed.enableEnrichment
    }
  });
}
export async function POST(request: Request) {
  const form = await request.formData();
  const file = form.get("file");

  if (!(file instanceof File)) {
    return Response.json({ error: "Missing file" }, { status: 400 });
  }

  const scan = await createScan({
    fileName: file.name,
    contentType: file.type,
    bytes: new Uint8Array(await file.arrayBuffer())
  });

  return Response.json({
    scanId: scan.id,
    status: scan.status,
    retentionMode: scan.retentionMode,
    resultUrl: `/scans/${scan.id}`
  });
}

Run: pnpm --dir apps/web vitest run tests/api/create-scan.test.ts Expected: PASS

Run: pnpm --dir apps/web prisma migrate dev --name init_scans Expected: migration created successfully

git add apps/web/app/api/scans apps/web/src/lib/storage.ts apps/web/src/lib/analyzer-client.ts apps/web/src/lib/scans apps/web/tests/api/create-scan.test.ts apps/web/prisma
git commit -m "feat: add upload creation and fast scan persistence"

Task 5: Build The Website Scanner And Result Pages

Files:

import { render, screen } from "@testing-library/react";
import { describe, expect, it } from "vitest";

import { UploadForm } from "@/src/components/upload-form";

describe("UploadForm", () => {
  it("defaults the retention choice to immediate deletion", () => {
    render(<UploadForm />);

    expect(screen.getByLabelText(/delete immediately after analysis/i)).toBeChecked();
  });
});

Run: pnpm --dir apps/web vitest run tests/ui/upload-form.test.tsx Expected: FAIL because the upload components are not implemented yet

"use client";

import { useState } from "react";

export function UploadForm() {
  const [retentionMode, setRetentionMode] = useState("delete_immediately");

  return (
    <form className="scanner-form">
      <label>
        <input
          type="radio"
          name="retention"
          value="delete_immediately"
          checked={retentionMode === "delete_immediately"}
          onChange={() => setRetentionMode("delete_immediately")}
        />
        Delete immediately after analysis
      </label>
      <label>
        <input
          type="radio"
          name="retention"
          value="temporary_cache"
          checked={retentionMode === "temporary_cache"}
          onChange={() => setRetentionMode("temporary_cache")}
        />
        Keep a temporary cached result
      </label>
      <input type="file" name="file" accept="application/pdf" />
      <button type="submit">Scan PDF</button>
    </form>
  );
}
import { UploadForm } from "@/src/components/upload-form";

export default function HomePage() {
  return (
    <main>
      <h1>PDFy</h1>
      <p>Analyze uploaded PDFs for suspicious structure, JavaScript, and network indicators.</p>
      <UploadForm />
    </main>
  );
}
export function ScanSummary(props: {
  verdict: string;
  score: number;
  retentionMode: string;
}) {
  return (
    <section>
      <h2>{props.verdict.toUpperCase()}</h2>
      <p>Risk score: {props.score}</p>
      <p>Retention: {props.retentionMode}</p>
    </section>
  );
}
import { getScan } from "@/src/lib/scans/get-scan";
import { ScanSummary } from "@/src/components/scan-summary";

export default async function ScanPage({
  params
}: {
  params: Promise<{ scanId: string }>;
}) {
  const { scanId } = await params;
  const scan = await getScan(scanId);

  return <ScanSummary verdict={scan.verdict!} score={scan.score!} retentionMode={scan.retentionMode} />;
}

Run: pnpm --dir apps/web vitest run tests/ui/upload-form.test.tsx Expected: PASS

Run: pnpm --dir apps/web test Expected: PASS for contract, API, UI, and health tests

git add apps/web/app/page.tsx apps/web/app/scans apps/web/src/components apps/web/tests/ui/upload-form.test.tsx
git commit -m "feat: add scanner homepage and result pages"

Task 6: Add Advanced Analysis Scheduling And Worker Processing

Files:

from app.services.advanced_scan import summarize_advanced_findings


def test_advanced_scan_flags_embedded_file_keyword() -> None:
    pdf_bytes = b"%PDF-1.4 /EmbeddedFile /OpenAction /JS"

    result = summarize_advanced_findings(pdf_bytes)

    assert "embedded_file" in result.categories
import { describe, expect, it, vi } from "vitest";

import { createScan } from "@/src/lib/scans/create-scan";

describe("createScan advanced queueing", () => {
  it("marks advanced analysis as queued after fast scan succeeds", async () => {
    const scan = await createScan({
      fileName: "sample.pdf",
      contentType: "application/pdf",
      bytes: new Uint8Array([37, 80, 68, 70])
    });

    expect(scan.status).toBe("queued_advanced_scan");
  });
});

Run: uv run --project services/analyzer pytest services/analyzer/tests/test_advanced_scan.py -q Expected: FAIL because advanced scan helpers do not exist yet

Run: pnpm --dir apps/web vitest run tests/api/create-scan.test.ts Expected: FAIL because the scan status still stops at completed_fast_scan

model Scan {
  id               String   @id @default(cuid())
  sha256           String
  fileName         String
  storageKey       String?
  retentionMode    String
  status           String
  verdict          String?
  score            Int?
  advancedStatus   String   @default("queued")
  reportGeneratedAt DateTime?
  enableEnrichment Boolean  @default(true)
  expiresAt        DateTime?
  createdAt        DateTime @default(now())
  updatedAt        DateTime @updatedAt
  findings         Finding[]
}
def summarize_advanced_findings(pdf_bytes: bytes) -> dict[str, list[str]]:
    categories: list[str] = []
    if b"/EmbeddedFile" in pdf_bytes:
        categories.append("embedded_file")
    if b"/OpenAction" in pdf_bytes and b"/JS" in pdf_bytes:
        categories.append("open_action_javascript")
    return {"categories": categories}
from redis import Redis
from rq import Queue

redis = Redis.from_url("redis://localhost:6379/0")
queue = Queue("advanced-scans", connection=redis)


def enqueue_advanced_scan(scan_id: str, storage_key: str) -> str:
    job = queue.enqueue("workers.jobs.worker.run_advanced_scan_job", scan_id, storage_key)
    return job.id
from minio import Minio
from psycopg import connect


def run_advanced_scan_job(scan_id: str, storage_key: str) -> None:
    storage = Minio(
        "localhost:9000",
        access_key="minio",
        secret_key="minio123",
        secure=False
    )
    response = storage.get_object("pdfy", storage_key)
    payload = response.read()
    response.close()
    response.release_conn()

    findings = summarize_advanced_findings(payload)

    with connect("postgresql://pdfy:pdfy@localhost:5432/pdfy") as conn:
        with conn.cursor() as cur:
            for category in findings["categories"]:
                cur.execute(
                    """
                    insert into "Finding" ("id", "scanId", "category", "severity", "title", "detail", "createdAt")
                    values (gen_random_uuid()::text, %s, %s, %s, %s, %s::jsonb, now())
                    """,
                    (
                        scan_id,
                        category,
                        "medium",
                        f"Advanced finding: {category}",
                        '{"source":"advanced_scan"}'
                    )
                )

            cur.execute(
                """
                update "Scan"
                set "advancedStatus" = %s,
                    "status" = %s,
                    "reportGeneratedAt" = now(),
                    "updatedAt" = now()
                where "id" = %s
                """,
                ("completed", "completed", scan_id)
            )
        conn.commit()
[project]
name = "pdfy-jobs"
version = "0.1.0"
requires-python = ">=3.12"
dependencies = [
  "minio>=7.2.0",
  "psycopg[binary]>=3.2.0",
  "pytest>=8.3.0",
  "redis>=5.0.0",
  "rq>=1.16.0"
]

[tool.pytest.ini_options]
pythonpath = ["."]
testpaths = ["tests"]
const scan = await db.scan.create({
  data: {
    fileName: input.fileName,
    sha256: fastScan.sha256,
    storageKey: stored.storageKey,
    retentionMode: parsed.retentionMode,
    status: "queued_advanced_scan",
    advancedStatus: "queued",
    verdict: fastScan.summary.verdict,
    score: fastScan.summary.score,
    enableEnrichment: parsed.enableEnrichment
  }
});

await scheduleAdvancedScan({
  scanId: scan.id,
  storageKey: stored.storageKey
});

return scan;

Run: pnpm --dir apps/web prisma migrate dev --name add_advanced_status Expected: migration created successfully

Run: uv run --project services/analyzer pytest -q Expected: PASS for health, fast scan, and advanced scan tests

Run: pnpm --dir apps/web test Expected: PASS with scan creation asserting queued_advanced_scan

git add services/analyzer workers/jobs apps/web/src/lib/scans/create-scan.ts apps/web/prisma/schema.prisma apps/web/tests/api/create-scan.test.ts
git commit -m "feat: add advanced scan scheduling and worker processing"

Task 7: Implement Report Retrieval, Cleanup, And Retention Enforcement

Files:

from workers.jobs.cleanup import should_delete_scan_file


def test_delete_immediately_scans_are_removed_after_processing() -> None:
    assert should_delete_scan_file("delete_immediately", expires_at=None) is True
import { describe, expect, it } from "vitest";

import { getScanReport } from "@/src/lib/scans/get-scan-report";

describe("getScanReport", () => {
  it("returns a structured report with summary and IOC sections", async () => {
    const report = await getScanReport("scan_123");

    expect(report).toHaveProperty("summary");
    expect(report).toHaveProperty("iocs");
  });
});

Run: uv run --project workers/jobs pytest workers/jobs/tests/test_retention_cleanup.py -q Expected: FAIL because cleanup helpers do not exist yet

Run: pnpm --dir apps/web vitest run tests/report/get-scan-report.test.ts Expected: FAIL because getScanReport does not exist yet

from datetime import datetime, timezone
from minio import Minio
from psycopg import connect


def should_delete_scan_file(retention_mode: str, expires_at: datetime | None) -> bool:
    if retention_mode == "delete_immediately":
        return True
    if expires_at is None:
        return False
    return expires_at <= datetime.now(timezone.utc)


def run_cleanup_job() -> None:
    with connect("postgresql://pdfy:pdfy@localhost:5432/pdfy") as conn:
        with conn.cursor() as cur:
            cur.execute(
                """
                select "id", "storageKey", "retentionMode", "expiresAt"
                from "Scan"
                where "status" <> 'expired'
                """
            )
            rows = cur.fetchall()

        storage = Minio(
            "localhost:9000",
            access_key="minio",
            secret_key="minio123",
            secure=False
        )

        for scan_id, storage_key, retention_mode, expires_at in rows:
            if not should_delete_scan_file(retention_mode, expires_at):
                continue

            if storage_key:
                storage.remove_object("pdfy", storage_key)

            with conn.cursor() as cur:
                cur.execute(
                    """
                    update "Scan"
                    set "storageKey" = null,
                        "status" = %s,
                        "updatedAt" = now()
                    where "id" = %s
                    """,
                    ("expired", scan_id)
                )
        conn.commit()
export async function getScanReport(scanId: string) {
  const scan = await db.scan.findUniqueOrThrow({
    where: { id: scanId },
    include: { findings: true }
  });

  return {
    scanId: scan.id,
    summary: {
      verdict: scan.verdict,
      score: scan.score
    },
    findings: scan.findings,
    iocs: {
      urls: scan.findings
        .flatMap((finding) => finding.detail.urls ?? []),
      ips: scan.findings
        .flatMap((finding) => finding.detail.ips ?? [])
    },
    mitigations: [
      "Open suspicious PDFs only in isolated environments.",
      "Block sender or hosting infrastructure if indicators are confirmed malicious."
    ]
  };
}
export async function GET(
  _request: Request,
  context: { params: Promise<{ scanId: string }> }
) {
  const { scanId } = await context.params;
  return Response.json(await getScanReport(scanId));
}
- `GET /api/scans/:scanId/report` returns summary, findings, IOCs, and mitigation guidance.
- `delete_immediately` remains the default retention mode for all anonymous scans.

Run: uv run --project workers/jobs pytest workers/jobs/tests/test_retention_cleanup.py -q Expected: PASS

Run: pnpm --dir apps/web test Expected: PASS with report retrieval coverage

Run: rg -n "delete_immediately|temporary_cache" docs apps/web services/analyzer workers/jobs Expected: consistent retention terminology across code and docs

git add apps/web/app/api/scans apps/web/app/scans workers/jobs docs/api/contracts.md docs/security/privacy.md docs/runbooks/retention-cleanup.md
git commit -m "feat: add report retrieval and retention cleanup"

Task 8: Add Enrichment Flags, Deployment Hardening, And End-To-End Verification

Files:

import { expect, test } from "@playwright/test";

test("anonymous user can upload a pdf and see a fast-scan verdict", async ({ page }) => {
  await page.goto("/");
  await page.setInputFiles('input[type="file"]', "tests/fixtures/sample.pdf");
  await page.getByRole("button", { name: "Scan PDF" }).click();

  await expect(page.getByText(/risk score/i)).toBeVisible();
});

Run: pnpm --dir apps/web playwright test tests/e2e/scan-flow.spec.ts Expected: FAIL until upload wiring, routing, and test fixtures are complete

DATABASE_URL=postgresql://pdfy:pdfy@localhost:5432/pdfy
REDIS_URL=redis://localhost:6379/0
S3_ENDPOINT=http://localhost:9000
S3_BUCKET=pdfy
ANALYZER_BASE_URL=http://localhost:8000
VT_API_KEY=
ENABLE_ENRICHMENT=true
def should_run_enrichment(enable_enrichment: bool, vt_api_key: str | None) -> bool:
    return enable_enrichment and bool(vt_api_key)
const enableEnrichment = process.env.ENABLE_ENRICHMENT === "true";
## Local Development

1. Start infrastructure with `docker compose up -d`.
2. Install frontend dependencies with `pnpm install`.
3. Sync Python dependencies with `uv sync --project services/analyzer` and `uv sync --project workers/jobs`.
4. Run the web app, analyzer, and worker in separate terminals.

Run: pnpm --dir apps/web test Expected: PASS

Run: pnpm --dir apps/web playwright test Expected: PASS

Run: uv run --project services/analyzer pytest -q Expected: PASS

Run: uv run --project workers/jobs pytest -q Expected: PASS

Run: docker compose config Expected: valid compose configuration

git add apps/web/.env.example services/analyzer/.env.example workers/jobs/.env.example apps/web/tests/e2e docs/operations/deployment.md README.md
git commit -m "chore: harden deployment and verify end-to-end flow"

Self-Review Notes