For agentic workers: REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (
- [ ]) syntax for tracking.
Goal: Build a production-leaning anonymous PDF malware scanning MVP with a Next.js website, a Python analyzer, fast synchronous scanning, queued advanced analysis, and immediate-delete-by-default retention.
Architecture: Use a pnpm monorepo with apps/web for the public product, services/analyzer for FastAPI-based PDF analysis, and workers/jobs for Redis-backed advanced scan and cleanup workers. Persist scan state in Postgres, store temporary files in S3-compatible storage, and keep schemas shared through a TypeScript contracts package.
Tech Stack: Next.js, TypeScript, Vitest, Prisma, Postgres, Redis, MinIO, FastAPI, Python 3.12, pypdf, pytest, RQ, pnpm, uv, Docker Compose
package.json: root scripts for workspace dev, lint, test, and formattingpnpm-workspace.yaml: workspace membership for apps/* and packages/*turbo.json: task orchestration for web and shared TypeScript packages.gitignore: ignore Next build output, Python caches, env files, and local object storagedocker-compose.yml: local Postgres, Redis, MinIO, analyzer, and worker dependenciesapps/web/: Next.js app for upload, scan status, and report viewsapps/web/prisma/schema.prisma: Postgres schema for scans, findings, and retention metadatapackages/contracts/: shared Zod schemas and TypeScript types for scan payloadsservices/analyzer/: FastAPI service for fast and advanced PDF analysisworkers/jobs/: RQ worker entrypoints for advanced scans and cleanup tasksdocs/: living product, architecture, API, security, and operations docsFiles:
package.jsonpnpm-workspace.yamlturbo.json.gitignoredocker-compose.ymlapps/web/package.jsonapps/web/vitest.config.tsapps/web/app/api/health/route.tsapps/web/tests/api/health.test.tsservices/analyzer/pyproject.tomlservices/analyzer/app/main.pyCreate: services/analyzer/tests/test_health.py
{
"name": "pdfy",
"private": true,
"packageManager": "pnpm@10.0.0",
"scripts": {
"dev:web": "pnpm --filter web dev",
"test:web": "pnpm --filter web test",
"lint:web": "pnpm --filter web lint",
"typecheck:web": "pnpm --filter web typecheck"
},
"devDependencies": {
"turbo": "^2.0.0"
}
}
packages:
- apps/*
- packages/*
{
"$schema": "https://turbo.build/schema.json",
"tasks": {
"build": {
"dependsOn": ["^build"],
"outputs": [".next/**", "dist/**"]
},
"lint": {},
"test": {},
"typecheck": {}
}
}
import { describe, expect, it } from "vitest";
import { GET } from "@/app/api/health/route";
describe("GET /api/health", () => {
it("returns an ok status payload", async () => {
const response = await GET();
const body = await response.json();
expect(response.status).toBe(200);
expect(body).toEqual({ status: "ok", service: "web" });
});
});
from fastapi.testclient import TestClient
from app.main import app
client = TestClient(app)
def test_health_endpoint_returns_ok() -> None:
response = client.get("/health")
assert response.status_code == 200
assert response.json() == {"status": "ok", "service": "analyzer"}
Run: pnpm --dir apps/web test
Expected: FAIL with module or route resolution errors because the web health route is not implemented yet
Run: uv run --project services/analyzer pytest services/analyzer/tests/test_health.py -q
Expected: FAIL because the FastAPI app entrypoint does not exist yet
export async function GET() {
return Response.json({ status: "ok", service: "web" });
}
from fastapi import FastAPI
app = FastAPI(title="PDFy Analyzer")
@app.get("/health")
def health() -> dict[str, str]:
return {"status": "ok", "service": "analyzer"}
[project]
name = "pdfy-analyzer"
version = "0.1.0"
requires-python = ">=3.12"
dependencies = [
"fastapi>=0.115.0",
"httpx>=0.28.0",
"pydantic>=2.9.0",
"pytest>=8.3.0",
"uvicorn>=0.34.0"
]
[tool.pytest.ini_options]
pythonpath = ["."]
testpaths = ["tests"]
services:
postgres:
image: postgres:16
environment:
POSTGRES_USER: pdfy
POSTGRES_PASSWORD: pdfy
POSTGRES_DB: pdfy
ports: ["5432:5432"]
redis:
image: redis:7
ports: ["6379:6379"]
minio:
image: minio/minio
command: server /data --console-address ":9001"
environment:
MINIO_ROOT_USER: minio
MINIO_ROOT_PASSWORD: minio123
ports: ["9000:9000", "9001:9001"]
Run: pnpm --dir apps/web test
Expected: PASS for health.test.ts
Run: uv run --project services/analyzer pytest services/analyzer/tests/test_health.py -q
Expected: PASS with 1 passed
git add package.json pnpm-workspace.yaml turbo.json .gitignore docker-compose.yml apps/web services/analyzer
git commit -m "chore: bootstrap workspace and service health checks"
Files:
packages/contracts/package.jsonpackages/contracts/tsconfig.jsonpackages/contracts/src/scan.tspackages/contracts/src/index.tsapps/web/prisma/schema.prismaapps/web/src/lib/db.tsapps/web/src/lib/env.tsCreate: apps/web/tests/contracts/scan-contracts.test.ts
import { describe, expect, it } from "vitest";
import { createScanInputSchema, retentionModeSchema } from "@pdfy/contracts";
describe("scan contracts", () => {
it("defaults upload requests to immediate deletion", () => {
const parsed = createScanInputSchema.parse({});
expect(parsed.retentionMode).toBe("delete_immediately");
});
it("allows only supported retention modes", () => {
expect(retentionModeSchema.safeParse("temporary_cache").success).toBe(true);
expect(retentionModeSchema.safeParse("forever").success).toBe(false);
});
});
Run: pnpm --dir apps/web vitest run tests/contracts/scan-contracts.test.ts
Expected: FAIL with package import errors because @pdfy/contracts is not implemented yet
import { z } from "zod";
export const retentionModeSchema = z.enum([
"delete_immediately",
"temporary_cache"
]);
export const createScanInputSchema = z.object({
retentionMode: retentionModeSchema.default("delete_immediately"),
enableEnrichment: z.boolean().default(true)
});
export const scanStatusSchema = z.enum([
"queued_fast_scan",
"running_fast_scan",
"completed_fast_scan",
"queued_advanced_scan",
"running_advanced_scan",
"completed",
"failed",
"expired"
]);
export * from "./scan";
model Scan {
id String @id @default(cuid())
sha256 String
fileName String
storageKey String?
retentionMode String
status String
verdict String?
score Int?
enableEnrichment Boolean @default(true)
expiresAt DateTime?
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
findings Finding[]
}
model Finding {
id String @id @default(cuid())
scanId String
category String
severity String
title String
detail Json
createdAt DateTime @default(now())
scan Scan @relation(fields: [scanId], references: [id], onDelete: Cascade)
}
import { PrismaClient } from "@prisma/client";
declare global {
var prisma: PrismaClient | undefined;
}
export const db =
global.prisma ??
new PrismaClient({
log: ["warn", "error"]
});
if (process.env.NODE_ENV !== "production") {
global.prisma = db;
}
import { z } from "zod";
export const envSchema = z.object({
DATABASE_URL: z.string().url(),
ANALYZER_BASE_URL: z.string().url(),
REDIS_URL: z.string().url(),
S3_ENDPOINT: z.string().url()
});
Run: pnpm --dir apps/web vitest run tests/contracts/scan-contracts.test.ts
Expected: PASS
Run: pnpm --dir apps/web prisma validate
Expected: The schema at prisma/schema.prisma is valid
git add packages/contracts apps/web/prisma apps/web/src/lib/db.ts apps/web/src/lib/env.ts apps/web/tests/contracts
git commit -m "feat: add shared scan contracts and persistence schema"
Files:
services/analyzer/app/models.pyservices/analyzer/app/services/hash_utils.pyservices/analyzer/app/services/fast_scan.pyservices/analyzer/app/services/pdf_extract.pyservices/analyzer/app/routes/analyze.pyCreate: services/analyzer/tests/test_fast_scan.py
from app.services.fast_scan import run_fast_scan
def test_fast_scan_detects_javascript_and_url() -> None:
pdf_bytes = b"""%PDF-1.4
1 0 obj
<< /Type /Catalog /OpenAction 2 0 R >>
endobj
2 0 obj
<< /S /JavaScript /JS (app.alert('x'); var u='http://bad.test') >>
endobj
trailer << /Root 1 0 R >>
%%EOF
"""
result = run_fast_scan(pdf_bytes, "malicious.pdf")
assert result.summary.verdict in {"suspicious", "malicious"}
assert "/JavaScript" in result.keyword_hits
assert "http://bad.test" in result.iocs.urls
Run: uv run --project services/analyzer pytest services/analyzer/tests/test_fast_scan.py -q
Expected: FAIL because run_fast_scan and the scan models do not exist yet
from pydantic import BaseModel, Field
class IocSet(BaseModel):
urls: list[str] = Field(default_factory=list)
ips: list[str] = Field(default_factory=list)
class ScanSummary(BaseModel):
verdict: str
score: int
confidence: str
class FastScanResult(BaseModel):
file_name: str
sha256: str
keyword_hits: list[str]
iocs: IocSet
summary: ScanSummary
import hashlib
def sha256_bytes(payload: bytes) -> str:
return hashlib.sha256(payload).hexdigest()
import re
SUSPICIOUS_KEYWORDS = [
b"/JavaScript",
b"/JS",
b"/OpenAction",
b"/EmbeddedFile"
]
URL_RE = re.compile(rb"https?://[^\s<>()]+")
IP_RE = re.compile(rb"\b(?:\d{1,3}\.){3}\d{1,3}\b")
from app.models import FastScanResult, IocSet, ScanSummary
from app.services.hash_utils import sha256_bytes
from app.services.pdf_extract import IP_RE, SUSPICIOUS_KEYWORDS, URL_RE
def run_fast_scan(pdf_bytes: bytes, file_name: str) -> FastScanResult:
keyword_hits = [
keyword.decode("utf-8") for keyword in SUSPICIOUS_KEYWORDS if keyword in pdf_bytes
]
urls = sorted({match.decode("utf-8") for match in URL_RE.findall(pdf_bytes)})
ips = sorted({match.decode("utf-8") for match in IP_RE.findall(pdf_bytes)})
score = min(100, len(keyword_hits) * 20 + len(urls) * 15 + len(ips) * 10)
verdict = "malicious" if score >= 80 else "suspicious" if score >= 40 else "clean"
return FastScanResult(
file_name=file_name,
sha256=sha256_bytes(pdf_bytes),
keyword_hits=keyword_hits,
iocs=IocSet(urls=urls, ips=ips),
summary=ScanSummary(verdict=verdict, score=score, confidence="medium")
)
from fastapi import APIRouter, File, UploadFile
from app.services.fast_scan import run_fast_scan
router = APIRouter(prefix="/analyze")
@router.post("/fast")
async def analyze_fast(file: UploadFile = File(...)):
payload = await file.read()
return run_fast_scan(payload, file.filename or "upload.pdf")
app/main.pyfrom fastapi import FastAPI
from app.routes.analyze import router as analyze_router
app = FastAPI(title="PDFy Analyzer")
app.include_router(analyze_router)
Run: uv run --project services/analyzer pytest services/analyzer/tests/test_fast_scan.py -q
Expected: PASS
Run: uv run --project services/analyzer pytest -q
Expected: PASS for health and fast scan tests
git add services/analyzer/app services/analyzer/tests/test_fast_scan.py
git commit -m "feat: add fast PDF analysis engine"
Files:
apps/web/src/lib/storage.tsapps/web/src/lib/analyzer-client.tsapps/web/src/lib/scans/create-scan.tsapps/web/src/lib/scans/get-scan.tsapps/web/app/api/scans/route.tsapps/web/app/api/scans/[scanId]/route.tsCreate: apps/web/tests/api/create-scan.test.ts
import { describe, expect, it } from "vitest";
import { createScan } from "@/src/lib/scans/create-scan";
describe("createScan", () => {
it("defaults to immediate deletion when no retention mode is provided", async () => {
const result = await createScan({
fileName: "sample.pdf",
contentType: "application/pdf",
bytes: new Uint8Array([37, 80, 68, 70])
});
expect(result.retentionMode).toBe("delete_immediately");
});
it("rejects non-pdf uploads", async () => {
await expect(
createScan({
fileName: "sample.exe",
contentType: "application/octet-stream",
bytes: new Uint8Array([1, 2, 3])
})
).rejects.toThrow("Only PDF files are allowed");
});
});
Run: pnpm --dir apps/web vitest run tests/api/create-scan.test.ts
Expected: FAIL because createScan is not implemented yet
export async function runFastScan(file: File): Promise<FastScanResult> {
const form = new FormData();
form.set("file", file);
const response = await fetch(`${process.env.ANALYZER_BASE_URL}/analyze/fast`, {
method: "POST",
body: form
});
if (!response.ok) {
throw new Error("Fast scan request failed");
}
return fastScanResultSchema.parse(await response.json());
}
export type StoredObject = {
storageKey: string;
};
export async function putTemporaryObject(input: {
bytes: Uint8Array;
fileName: string;
}): Promise<StoredObject> {
return { storageKey: `uploads/${crypto.randomUUID()}-${input.fileName}` };
}
createScan with validation, persistence, and fast-scan orchestrationimport { createScanInputSchema } from "@pdfy/contracts";
export async function createScan(input: {
fileName: string;
contentType: string;
bytes: Uint8Array;
retentionMode?: "delete_immediately" | "temporary_cache";
enableEnrichment?: boolean;
}) {
if (input.contentType !== "application/pdf") {
throw new Error("Only PDF files are allowed");
}
const parsed = createScanInputSchema.parse({
retentionMode: input.retentionMode,
enableEnrichment: input.enableEnrichment
});
const file = new File([input.bytes], input.fileName, { type: input.contentType });
const fastScan = await runFastScan(file);
const stored = await putTemporaryObject({ bytes: input.bytes, fileName: input.fileName });
return db.scan.create({
data: {
fileName: input.fileName,
sha256: fastScan.sha256,
storageKey: stored.storageKey,
retentionMode: parsed.retentionMode,
status: "completed_fast_scan",
verdict: fastScan.summary.verdict,
score: fastScan.summary.score,
enableEnrichment: parsed.enableEnrichment
}
});
}
export async function POST(request: Request) {
const form = await request.formData();
const file = form.get("file");
if (!(file instanceof File)) {
return Response.json({ error: "Missing file" }, { status: 400 });
}
const scan = await createScan({
fileName: file.name,
contentType: file.type,
bytes: new Uint8Array(await file.arrayBuffer())
});
return Response.json({
scanId: scan.id,
status: scan.status,
retentionMode: scan.retentionMode,
resultUrl: `/scans/${scan.id}`
});
}
Run: pnpm --dir apps/web vitest run tests/api/create-scan.test.ts
Expected: PASS
Run: pnpm --dir apps/web prisma migrate dev --name init_scans
Expected: migration created successfully
git add apps/web/app/api/scans apps/web/src/lib/storage.ts apps/web/src/lib/analyzer-client.ts apps/web/src/lib/scans apps/web/tests/api/create-scan.test.ts apps/web/prisma
git commit -m "feat: add upload creation and fast scan persistence"
Files:
apps/web/app/page.tsxapps/web/app/scans/[scanId]/page.tsxapps/web/src/components/upload-form.tsxapps/web/src/components/scan-summary.tsxapps/web/src/components/finding-list.tsxCreate: apps/web/tests/ui/upload-form.test.tsx
import { render, screen } from "@testing-library/react";
import { describe, expect, it } from "vitest";
import { UploadForm } from "@/src/components/upload-form";
describe("UploadForm", () => {
it("defaults the retention choice to immediate deletion", () => {
render(<UploadForm />);
expect(screen.getByLabelText(/delete immediately after analysis/i)).toBeChecked();
});
});
Run: pnpm --dir apps/web vitest run tests/ui/upload-form.test.tsx
Expected: FAIL because the upload components are not implemented yet
"use client";
import { useState } from "react";
export function UploadForm() {
const [retentionMode, setRetentionMode] = useState("delete_immediately");
return (
<form className="scanner-form">
<label>
<input
type="radio"
name="retention"
value="delete_immediately"
checked={retentionMode === "delete_immediately"}
onChange={() => setRetentionMode("delete_immediately")}
/>
Delete immediately after analysis
</label>
<label>
<input
type="radio"
name="retention"
value="temporary_cache"
checked={retentionMode === "temporary_cache"}
onChange={() => setRetentionMode("temporary_cache")}
/>
Keep a temporary cached result
</label>
<input type="file" name="file" accept="application/pdf" />
<button type="submit">Scan PDF</button>
</form>
);
}
import { UploadForm } from "@/src/components/upload-form";
export default function HomePage() {
return (
<main>
<h1>PDFy</h1>
<p>Analyze uploaded PDFs for suspicious structure, JavaScript, and network indicators.</p>
<UploadForm />
</main>
);
}
export function ScanSummary(props: {
verdict: string;
score: number;
retentionMode: string;
}) {
return (
<section>
<h2>{props.verdict.toUpperCase()}</h2>
<p>Risk score: {props.score}</p>
<p>Retention: {props.retentionMode}</p>
</section>
);
}
import { getScan } from "@/src/lib/scans/get-scan";
import { ScanSummary } from "@/src/components/scan-summary";
export default async function ScanPage({
params
}: {
params: Promise<{ scanId: string }>;
}) {
const { scanId } = await params;
const scan = await getScan(scanId);
return <ScanSummary verdict={scan.verdict!} score={scan.score!} retentionMode={scan.retentionMode} />;
}
Run: pnpm --dir apps/web vitest run tests/ui/upload-form.test.tsx
Expected: PASS
Run: pnpm --dir apps/web test
Expected: PASS for contract, API, UI, and health tests
git add apps/web/app/page.tsx apps/web/app/scans apps/web/src/components apps/web/tests/ui/upload-form.test.tsx
git commit -m "feat: add scanner homepage and result pages"
Files:
services/analyzer/app/services/advanced_scan.pyservices/analyzer/app/routes/jobs.pyservices/analyzer/tests/test_advanced_scan.pyworkers/jobs/pyproject.tomlworkers/jobs/worker.pyworkers/jobs/tests/test_cleanup_job.pyapps/web/src/lib/scans/create-scan.tsModify: apps/web/prisma/schema.prisma
from app.services.advanced_scan import summarize_advanced_findings
def test_advanced_scan_flags_embedded_file_keyword() -> None:
pdf_bytes = b"%PDF-1.4 /EmbeddedFile /OpenAction /JS"
result = summarize_advanced_findings(pdf_bytes)
assert "embedded_file" in result.categories
import { describe, expect, it, vi } from "vitest";
import { createScan } from "@/src/lib/scans/create-scan";
describe("createScan advanced queueing", () => {
it("marks advanced analysis as queued after fast scan succeeds", async () => {
const scan = await createScan({
fileName: "sample.pdf",
contentType: "application/pdf",
bytes: new Uint8Array([37, 80, 68, 70])
});
expect(scan.status).toBe("queued_advanced_scan");
});
});
Run: uv run --project services/analyzer pytest services/analyzer/tests/test_advanced_scan.py -q
Expected: FAIL because advanced scan helpers do not exist yet
Run: pnpm --dir apps/web vitest run tests/api/create-scan.test.ts
Expected: FAIL because the scan status still stops at completed_fast_scan
model Scan {
id String @id @default(cuid())
sha256 String
fileName String
storageKey String?
retentionMode String
status String
verdict String?
score Int?
advancedStatus String @default("queued")
reportGeneratedAt DateTime?
enableEnrichment Boolean @default(true)
expiresAt DateTime?
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
findings Finding[]
}
def summarize_advanced_findings(pdf_bytes: bytes) -> dict[str, list[str]]:
categories: list[str] = []
if b"/EmbeddedFile" in pdf_bytes:
categories.append("embedded_file")
if b"/OpenAction" in pdf_bytes and b"/JS" in pdf_bytes:
categories.append("open_action_javascript")
return {"categories": categories}
from redis import Redis
from rq import Queue
redis = Redis.from_url("redis://localhost:6379/0")
queue = Queue("advanced-scans", connection=redis)
def enqueue_advanced_scan(scan_id: str, storage_key: str) -> str:
job = queue.enqueue("workers.jobs.worker.run_advanced_scan_job", scan_id, storage_key)
return job.id
from minio import Minio
from psycopg import connect
def run_advanced_scan_job(scan_id: str, storage_key: str) -> None:
storage = Minio(
"localhost:9000",
access_key="minio",
secret_key="minio123",
secure=False
)
response = storage.get_object("pdfy", storage_key)
payload = response.read()
response.close()
response.release_conn()
findings = summarize_advanced_findings(payload)
with connect("postgresql://pdfy:pdfy@localhost:5432/pdfy") as conn:
with conn.cursor() as cur:
for category in findings["categories"]:
cur.execute(
"""
insert into "Finding" ("id", "scanId", "category", "severity", "title", "detail", "createdAt")
values (gen_random_uuid()::text, %s, %s, %s, %s, %s::jsonb, now())
""",
(
scan_id,
category,
"medium",
f"Advanced finding: {category}",
'{"source":"advanced_scan"}'
)
)
cur.execute(
"""
update "Scan"
set "advancedStatus" = %s,
"status" = %s,
"reportGeneratedAt" = now(),
"updatedAt" = now()
where "id" = %s
""",
("completed", "completed", scan_id)
)
conn.commit()
[project]
name = "pdfy-jobs"
version = "0.1.0"
requires-python = ">=3.12"
dependencies = [
"minio>=7.2.0",
"psycopg[binary]>=3.2.0",
"pytest>=8.3.0",
"redis>=5.0.0",
"rq>=1.16.0"
]
[tool.pytest.ini_options]
pythonpath = ["."]
testpaths = ["tests"]
createScan so fast scans queue advanced workconst scan = await db.scan.create({
data: {
fileName: input.fileName,
sha256: fastScan.sha256,
storageKey: stored.storageKey,
retentionMode: parsed.retentionMode,
status: "queued_advanced_scan",
advancedStatus: "queued",
verdict: fastScan.summary.verdict,
score: fastScan.summary.score,
enableEnrichment: parsed.enableEnrichment
}
});
await scheduleAdvancedScan({
scanId: scan.id,
storageKey: stored.storageKey
});
return scan;
Run: pnpm --dir apps/web prisma migrate dev --name add_advanced_status
Expected: migration created successfully
Run: uv run --project services/analyzer pytest -q
Expected: PASS for health, fast scan, and advanced scan tests
Run: pnpm --dir apps/web test
Expected: PASS with scan creation asserting queued_advanced_scan
git add services/analyzer workers/jobs apps/web/src/lib/scans/create-scan.ts apps/web/prisma/schema.prisma apps/web/tests/api/create-scan.test.ts
git commit -m "feat: add advanced scan scheduling and worker processing"
Files:
apps/web/app/api/scans/[scanId]/report/route.tsapps/web/app/scans/[scanId]/report/page.tsxapps/web/src/lib/scans/get-scan-report.tsworkers/jobs/cleanup.pyworkers/jobs/tests/test_retention_cleanup.pyapps/web/tests/report/get-scan-report.test.tsdocs/api/contracts.mddocs/security/privacy.mdModify: docs/runbooks/retention-cleanup.md
from workers.jobs.cleanup import should_delete_scan_file
def test_delete_immediately_scans_are_removed_after_processing() -> None:
assert should_delete_scan_file("delete_immediately", expires_at=None) is True
import { describe, expect, it } from "vitest";
import { getScanReport } from "@/src/lib/scans/get-scan-report";
describe("getScanReport", () => {
it("returns a structured report with summary and IOC sections", async () => {
const report = await getScanReport("scan_123");
expect(report).toHaveProperty("summary");
expect(report).toHaveProperty("iocs");
});
});
Run: uv run --project workers/jobs pytest workers/jobs/tests/test_retention_cleanup.py -q
Expected: FAIL because cleanup helpers do not exist yet
Run: pnpm --dir apps/web vitest run tests/report/get-scan-report.test.ts
Expected: FAIL because getScanReport does not exist yet
from datetime import datetime, timezone
from minio import Minio
from psycopg import connect
def should_delete_scan_file(retention_mode: str, expires_at: datetime | None) -> bool:
if retention_mode == "delete_immediately":
return True
if expires_at is None:
return False
return expires_at <= datetime.now(timezone.utc)
def run_cleanup_job() -> None:
with connect("postgresql://pdfy:pdfy@localhost:5432/pdfy") as conn:
with conn.cursor() as cur:
cur.execute(
"""
select "id", "storageKey", "retentionMode", "expiresAt"
from "Scan"
where "status" <> 'expired'
"""
)
rows = cur.fetchall()
storage = Minio(
"localhost:9000",
access_key="minio",
secret_key="minio123",
secure=False
)
for scan_id, storage_key, retention_mode, expires_at in rows:
if not should_delete_scan_file(retention_mode, expires_at):
continue
if storage_key:
storage.remove_object("pdfy", storage_key)
with conn.cursor() as cur:
cur.execute(
"""
update "Scan"
set "storageKey" = null,
"status" = %s,
"updatedAt" = now()
where "id" = %s
""",
("expired", scan_id)
)
conn.commit()
export async function getScanReport(scanId: string) {
const scan = await db.scan.findUniqueOrThrow({
where: { id: scanId },
include: { findings: true }
});
return {
scanId: scan.id,
summary: {
verdict: scan.verdict,
score: scan.score
},
findings: scan.findings,
iocs: {
urls: scan.findings
.flatMap((finding) => finding.detail.urls ?? []),
ips: scan.findings
.flatMap((finding) => finding.detail.ips ?? [])
},
mitigations: [
"Open suspicious PDFs only in isolated environments.",
"Block sender or hosting infrastructure if indicators are confirmed malicious."
]
};
}
export async function GET(
_request: Request,
context: { params: Promise<{ scanId: string }> }
) {
const { scanId } = await context.params;
return Response.json(await getScanReport(scanId));
}
- `GET /api/scans/:scanId/report` returns summary, findings, IOCs, and mitigation guidance.
- `delete_immediately` remains the default retention mode for all anonymous scans.
Run: uv run --project workers/jobs pytest workers/jobs/tests/test_retention_cleanup.py -q
Expected: PASS
Run: pnpm --dir apps/web test
Expected: PASS with report retrieval coverage
Run: rg -n "delete_immediately|temporary_cache" docs apps/web services/analyzer workers/jobs
Expected: consistent retention terminology across code and docs
git add apps/web/app/api/scans apps/web/app/scans workers/jobs docs/api/contracts.md docs/security/privacy.md docs/runbooks/retention-cleanup.md
git commit -m "feat: add report retrieval and retention cleanup"
Files:
apps/web/.env.exampleservices/analyzer/.env.exampleworkers/jobs/.env.exampleapps/web/tests/e2e/scan-flow.spec.tsdocs/operations/deployment.mdModify: README.md
import { expect, test } from "@playwright/test";
test("anonymous user can upload a pdf and see a fast-scan verdict", async ({ page }) => {
await page.goto("/");
await page.setInputFiles('input[type="file"]', "tests/fixtures/sample.pdf");
await page.getByRole("button", { name: "Scan PDF" }).click();
await expect(page.getByText(/risk score/i)).toBeVisible();
});
Run: pnpm --dir apps/web playwright test tests/e2e/scan-flow.spec.ts
Expected: FAIL until upload wiring, routing, and test fixtures are complete
DATABASE_URL=postgresql://pdfy:pdfy@localhost:5432/pdfy
REDIS_URL=redis://localhost:6379/0
S3_ENDPOINT=http://localhost:9000
S3_BUCKET=pdfy
ANALYZER_BASE_URL=http://localhost:8000
VT_API_KEY=
ENABLE_ENRICHMENT=true
def should_run_enrichment(enable_enrichment: bool, vt_api_key: str | None) -> bool:
return enable_enrichment and bool(vt_api_key)
const enableEnrichment = process.env.ENABLE_ENRICHMENT === "true";
## Local Development
1. Start infrastructure with `docker compose up -d`.
2. Install frontend dependencies with `pnpm install`.
3. Sync Python dependencies with `uv sync --project services/analyzer` and `uv sync --project workers/jobs`.
4. Run the web app, analyzer, and worker in separate terminals.
Run: pnpm --dir apps/web test
Expected: PASS
Run: pnpm --dir apps/web playwright test
Expected: PASS
Run: uv run --project services/analyzer pytest -q
Expected: PASS
Run: uv run --project workers/jobs pytest -q
Expected: PASS
Run: docker compose config
Expected: valid compose configuration
git add apps/web/.env.example services/analyzer/.env.example workers/jobs/.env.example apps/web/tests/e2e docs/operations/deployment.md README.md
git commit -m "chore: harden deployment and verify end-to-end flow"
retentionMode values exactly delete_immediately and temporary_cache.@pdfy/contracts.