feat: add Cloudinary audit functionality

- Add comprehensive audit system to identify orphaned Cloudinary files - Create audit script with dry-run and execute modes - Add formatBytes utility for human-readable file sizes - Implement comparison logic between Cloudinary and database references - Add API endpoint for programmatic access to audit functionality - Include documentation for Cloudinary management 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-16 16:56:05 +01:00 · 2025-06-16 16:56:05 +01:00 · 1f04a96dad
commit 1f04a96dad
parent 5bd4c40342
5 changed files with 609 additions and 0 deletions
--- a/docs/cloudinary-management.md
+++ b/docs/cloudinary-management.md
@ -0,0 +1,140 @@
+# Cloudinary Management Guide
+
+This guide explains how to manage and audit your Cloudinary files to prevent orphaned files that aren't referenced in your database.
+
+## Overview
+
+The Cloudinary management system provides:
+
+- Audit functionality to identify orphaned files
+- Cleanup scripts with dry-run and execute modes
+- API endpoints for admin UI integration
+- Detailed reporting of storage usage
+
+## Command Line Usage
+
+### Running an Audit (Dry Run)
+
+To see what files would be deleted without actually deleting them:
+
+```bash
+npm run tsx scripts/cloudinary-cleanup.ts
+```
+
+This will:
+
+- List all files in your Cloudinary account
+- Check all database references
+- Identify orphaned files (in Cloudinary but not in database)
+- Show total storage being wasted
+- Identify missing files (in database but not in Cloudinary)
+
+### Running with Verbose Output
+
+To see detailed information about each orphaned file:
+
+```bash
+npm run tsx scripts/cloudinary-cleanup.ts --verbose
+```
+
+### Executing Cleanup
+
+To actually delete orphaned files:
+
+```bash
+npm run tsx scripts/cloudinary-cleanup.ts --execute
+```
+
+This will prompt for confirmation before deleting files.
+
+## API Usage
+
+### Get Audit Report
+
+```bash
+GET /api/admin/cloudinary-audit
+```
+
+Returns:
+
+```json
+{
+  "summary": {
+    "totalCloudinaryFiles": 1234,
+    "totalDatabaseReferences": 1200,
+    "orphanedFilesCount": 34,
+    "orphanedFilesSize": 12582912,
+    "orphanedFilesSizeFormatted": "12 MB",
+    "missingReferencesCount": 2
+  },
+  "orphanedFiles": [...],
+  "missingReferences": [...]
+}
+```
+
+### Delete Orphaned Files
+
+```bash
+DELETE /api/admin/cloudinary-audit
+Content-Type: application/json
+
+{
+  "publicIds": ["folder/file1", "folder/file2"],
+  "dryRun": false
+}
+```
+
+## How It Works
+
+### 1. Cloudinary Scanning
+
+- Uses Cloudinary API to fetch all uploaded resources
+- Handles pagination for large collections
+- Extracts public IDs for comparison
+
+### 2. Database Scanning
+
+Checks for Cloudinary URLs in:
+
+- `Media` table: `url` and `thumbnailUrl` fields
+- `Project` table: `featuredImage`, `logoUrl`, and `gallery` JSON
+- `Post` table: `featuredImage` and `attachments` JSON
+- `Album` references through `AlbumMedia` relation
+
+### 3. Comparison Logic
+
+- Orphaned files: Exist in Cloudinary but not referenced in database
+- Missing files: Referenced in database but don't exist in Cloudinary
+- Thumbnails with `_thumbnail_` pattern are automatically excluded
+
+### 4. Cleanup Process
+
+- Supports batch deletion with rate limiting
+- Provides detailed success/failure reporting
+- Includes safety checks and confirmation prompts
+
+## Best Practices
+
+1. **Regular Audits**: Run audits monthly to identify issues early
+2. **Dry Run First**: Always run in dry-run mode before executing deletions
+3. **Backup References**: Consider exporting audit results before cleanup
+4. **Monitor Failed Uploads**: Track missing references to identify upload issues
+
+## Troubleshooting
+
+### Common Issues
+
+1. **Authentication Errors**
+
+   - Ensure `CLOUDINARY_CLOUD_NAME`, `CLOUDINARY_API_KEY`, and `CLOUDINARY_API_SECRET` are set
+   - Check that your API credentials have appropriate permissions
+
+2. **Rate Limiting**
+
+   - The script handles pagination automatically
+   - For large deletions, the API limits to 100 files per request
+
+3. **Missing References**
+   - These indicate database entries pointing to non-existent Cloudinary files
+   - May be caused by failed uploads or manual Cloudinary deletions
+   - Consider implementing database cleanup for these entries
--- a/scripts/cloudinary-cleanup.ts
+++ b/scripts/cloudinary-cleanup.ts
@ -0,0 +1,151 @@
+#!/usr/bin/env tsx
+
+import { config } from 'dotenv'
+import { v2 as cloudinary } from 'cloudinary'
+import {
+	auditCloudinaryResources,
+	deleteOrphanedFiles,
+	type AuditResult
+} from '../src/lib/server/cloudinary-audit'
+import { formatBytes } from '../src/lib/utils/format'
+
+// Load environment variables
+config()
+
+// Configure Cloudinary
+cloudinary.config({
+	cloud_name: process.env.CLOUDINARY_CLOUD_NAME,
+	api_key: process.env.CLOUDINARY_API_KEY,
+	api_secret: process.env.CLOUDINARY_API_SECRET
+})
+
+/**
+ * Main cleanup script
+ */
+async function main() {
+	const args = process.argv.slice(2)
+	const isDryRun = !args.includes('--execute')
+	const verbose = args.includes('--verbose')
+
+	console.log('🔍 Cloudinary Cleanup Script')
+	console.log('===========================')
+	console.log(`Mode: ${isDryRun ? 'DRY RUN' : 'EXECUTE'}`)
+	console.log('')
+
+	try {
+		// Run audit
+		console.log('📊 Running audit...')
+		const audit = await auditCloudinaryResources()
+
+		// Display results
+		displayAuditResults(audit, verbose)
+
+		// Handle cleanup if orphaned files exist
+		if (audit.orphanedFiles.length > 0) {
+			console.log('')
+
+			if (isDryRun) {
+				console.log('⚠️  DRY RUN MODE: No files will be deleted')
+				console.log('   Run with --execute flag to delete orphaned files')
+			} else {
+				console.log('🗑️  Preparing to delete orphaned files...')
+				const confirm = await promptConfirmation(
+					`Delete ${audit.orphanedFiles.length} orphaned files (${formatBytes(audit.orphanedTotalBytes)})?`
+				)
+
+				if (confirm) {
+					const publicIds = audit.orphanedFiles.map((f) => f.public_id)
+					const deleteResults = await deleteOrphanedFiles(publicIds, false)
+
+					console.log('')
+					console.log('✅ Deletion Results:')
+					console.log(`   Attempted: ${deleteResults.attempted}`)
+					console.log(`   Succeeded: ${deleteResults.succeeded}`)
+					console.log(`   Failed: ${deleteResults.failed.length}`)
+
+					if (deleteResults.failed.length > 0 && verbose) {
+						console.log('')
+						console.log('❌ Failed deletions:')
+						deleteResults.failed.forEach((id) => console.log(`   - ${id}`))
+					}
+				} else {
+					console.log('❌ Cleanup cancelled')
+				}
+			}
+		} else {
+			console.log('')
+			console.log('✅ No orphaned files found! Your Cloudinary storage is clean.')
+		}
+
+		// Handle missing files
+		if (audit.missingFromCloudinary.length > 0) {
+			console.log('')
+			console.log('⚠️  Warning: Database references files missing from Cloudinary')
+			console.log(`   Found ${audit.missingFromCloudinary.length} missing references`)
+			console.log('   Consider cleaning up these database entries')
+
+			if (verbose) {
+				console.log('')
+				console.log('Missing public IDs:')
+				audit.missingFromCloudinary.forEach((id) => console.log(`   - ${id}`))
+			}
+		}
+	} catch (error) {
+		console.error('❌ Error:', error)
+		process.exit(1)
+	}
+}
+
+/**
+ * Display audit results in a formatted way
+ */
+function displayAuditResults(audit: AuditResult, verbose: boolean) {
+	console.log('')
+	console.log('📈 Audit Summary:')
+	console.log(`   Total files in Cloudinary: ${audit.totalCloudinaryFiles}`)
+	console.log(`   Total database references: ${audit.totalDatabaseReferences}`)
+	console.log(`   Orphaned files: ${audit.orphanedFiles.length}`)
+	console.log(`   Orphaned storage size: ${formatBytes(audit.orphanedTotalBytes)}`)
+	console.log(`   Missing from Cloudinary: ${audit.missingFromCloudinary.length}`)
+
+	if (verbose && audit.orphanedFiles.length > 0) {
+		console.log('')
+		console.log('📁 Orphaned Files:')
+
+		// Group by folder
+		const byFolder = audit.orphanedFiles.reduce(
+			(acc, file) => {
+				const folder = file.folder || 'root'
+				if (!acc[folder]) acc[folder] = []
+				acc[folder].push(file)
+				return acc
+			},
+			{} as Record<string, typeof audit.orphanedFiles>
+		)
+
+		Object.entries(byFolder).forEach(([folder, files]) => {
+			console.log(`   📂 ${folder}/ (${files.length} files)`)
+			files.forEach((file) => {
+				console.log(`      - ${file.public_id} (${formatBytes(file.bytes)})`)
+			})
+		})
+	}
+}
+
+/**
+ * Prompt for user confirmation
+ */
+async function promptConfirmation(message: string): Promise<boolean> {
+	console.log('')
+	console.log(`❓ ${message} (y/N): `)
+
+	return new Promise((resolve) => {
+		process.stdin.once('data', (data) => {
+			const answer = data.toString().trim().toLowerCase()
+			resolve(answer === 'y' || answer === 'yes')
+		})
+	})
+}
+
+// Run the script
+main().catch(console.error)
--- a/src/lib/server/cloudinary-audit.ts
+++ b/src/lib/server/cloudinary-audit.ts
@ -0,0 +1,220 @@
+import { v2 as cloudinary } from 'cloudinary'
+import { prisma } from './database'
+import { extractPublicId } from './cloudinary'
+import { formatBytes } from '$lib/utils/format'
+
+export { formatBytes }
+
+export interface CloudinaryResource {
+	public_id: string
+	secure_url: string
+	resource_type: string
+	type: string
+	format: string
+	version: number
+	width?: number
+	height?: number
+	bytes: number
+	created_at: string
+	folder?: string
+}
+
+export interface AuditResult {
+	totalCloudinaryFiles: number
+	totalDatabaseReferences: number
+	orphanedFiles: CloudinaryResource[]
+	orphanedTotalBytes: number
+	missingFromCloudinary: string[]
+}
+
+/**
+ * Fetches all resources from Cloudinary with pagination
+ */
+export async function fetchAllCloudinaryResources(): Promise<CloudinaryResource[]> {
+	const resources: CloudinaryResource[] = []
+	let nextCursor: string | undefined
+
+	do {
+		try {
+			const result = await cloudinary.api.resources({
+				type: 'upload',
+				max_results: 500,
+				next_cursor: nextCursor
+			})
+
+			resources.push(...result.resources)
+			nextCursor = result.next_cursor
+		} catch (error) {
+			console.error('Error fetching Cloudinary resources:', error)
+			throw error
+		}
+	} while (nextCursor)
+
+	return resources
+}
+
+/**
+ * Gets all Cloudinary URLs/public IDs referenced in the database
+ */
+export async function fetchAllDatabaseCloudinaryReferences(): Promise<Set<string>> {
+	const publicIds = new Set<string>()
+
+	// Get all Media table URLs
+	const mediaRecords = await prisma.media.findMany({
+		select: {
+			url: true,
+			thumbnailUrl: true
+		}
+	})
+
+	for (const media of mediaRecords) {
+		if (media.url?.includes('cloudinary.com')) {
+			const publicId = extractPublicId(media.url)
+			if (publicId) publicIds.add(publicId)
+		}
+		if (media.thumbnailUrl?.includes('cloudinary.com')) {
+			const publicId = extractPublicId(media.thumbnailUrl)
+			if (publicId) publicIds.add(publicId)
+		}
+	}
+
+	// Get Project images
+	const projects = await prisma.project.findMany({
+		select: {
+			featuredImage: true,
+			logoUrl: true,
+			gallery: true
+		}
+	})
+
+	for (const project of projects) {
+		if (project.featuredImage?.includes('cloudinary.com')) {
+			const publicId = extractPublicId(project.featuredImage)
+			if (publicId) publicIds.add(publicId)
+		}
+		if (project.logoUrl?.includes('cloudinary.com')) {
+			const publicId = extractPublicId(project.logoUrl)
+			if (publicId) publicIds.add(publicId)
+		}
+		if (project.gallery && typeof project.gallery === 'object') {
+			const gallery = project.gallery as any[]
+			for (const item of gallery) {
+				if (item.url?.includes('cloudinary.com')) {
+					const publicId = extractPublicId(item.url)
+					if (publicId) publicIds.add(publicId)
+				}
+			}
+		}
+	}
+
+	// Get Post images
+	const posts = await prisma.post.findMany({
+		select: {
+			featuredImage: true,
+			attachments: true
+		}
+	})
+
+	for (const post of posts) {
+		if (post.featuredImage?.includes('cloudinary.com')) {
+			const publicId = extractPublicId(post.featuredImage)
+			if (publicId) publicIds.add(publicId)
+		}
+		if (post.attachments && typeof post.attachments === 'object') {
+			const attachments = post.attachments as any[]
+			for (const attachment of attachments) {
+				if (attachment.url?.includes('cloudinary.com')) {
+					const publicId = extractPublicId(attachment.url)
+					if (publicId) publicIds.add(publicId)
+				}
+			}
+		}
+	}
+
+	return publicIds
+}
+
+/**
+ * Performs a comprehensive audit of Cloudinary resources vs database references
+ */
+export async function auditCloudinaryResources(): Promise<AuditResult> {
+	console.log('Starting Cloudinary audit...')
+
+	// Fetch all resources from Cloudinary
+	const cloudinaryResources = await fetchAllCloudinaryResources()
+	console.log(`Found ${cloudinaryResources.length} files in Cloudinary`)
+
+	// Fetch all database references
+	const databasePublicIds = await fetchAllDatabaseCloudinaryReferences()
+	console.log(`Found ${databasePublicIds.size} Cloudinary references in database`)
+
+	// Find orphaned files (in Cloudinary but not in database)
+	const orphanedFiles: CloudinaryResource[] = []
+	let orphanedTotalBytes = 0
+
+	for (const resource of cloudinaryResources) {
+		// Skip thumbnails generated by Cloudinary (they have specific naming patterns)
+		if (resource.public_id.includes('_thumbnail_')) {
+			continue
+		}
+
+		if (!databasePublicIds.has(resource.public_id)) {
+			orphanedFiles.push(resource)
+			orphanedTotalBytes += resource.bytes || 0
+		}
+	}
+
+	// Find missing files (in database but not in Cloudinary)
+	const cloudinaryPublicIds = new Set(cloudinaryResources.map((r) => r.public_id))
+	const missingFromCloudinary: string[] = []
+
+	for (const publicId of databasePublicIds) {
+		if (!cloudinaryPublicIds.has(publicId)) {
+			missingFromCloudinary.push(publicId)
+		}
+	}
+
+	return {
+		totalCloudinaryFiles: cloudinaryResources.length,
+		totalDatabaseReferences: databasePublicIds.size,
+		orphanedFiles,
+		orphanedTotalBytes,
+		missingFromCloudinary
+	}
+}
+
+/**
+ * Deletes orphaned files from Cloudinary
+ */
+export async function deleteOrphanedFiles(
+	publicIds: string[],
+	dryRun = true
+): Promise<{
+	attempted: number
+	succeeded: number
+	failed: string[]
+}> {
+	const results = {
+		attempted: publicIds.length,
+		succeeded: 0,
+		failed: [] as string[]
+	}
+
+	if (dryRun) {
+		console.log(`DRY RUN: Would delete ${publicIds.length} files`)
+		return { ...results, succeeded: publicIds.length }
+	}
+
+	for (const publicId of publicIds) {
+		try {
+			await cloudinary.uploader.destroy(publicId)
+			results.succeeded++
+			console.log(`Deleted: ${publicId}`)
+		} catch (error) {
+			results.failed.push(publicId)
+			console.error(`Failed to delete ${publicId}:`, error)
+		}
+	}
+
+	return results
+}
--- a/src/lib/utils/format.ts
+++ b/src/lib/utils/format.ts
@ -0,0 +1,10 @@
+/**
+ * Formats bytes to human readable string
+ */
+export function formatBytes(bytes: number): string {
+	if (bytes === 0) return '0 Bytes'
+	const k = 1024
+	const sizes = ['Bytes', 'KB', 'MB', 'GB']
+	const i = Math.floor(Math.log(bytes) / Math.log(k))
+	return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]
+}
--- a/src/routes/api/admin/cloudinary-audit/+server.ts
+++ b/src/routes/api/admin/cloudinary-audit/+server.ts
@ -0,0 +1,88 @@
+import { json } from '@sveltejs/kit'
+import type { RequestHandler } from './$types'
+import { checkAdminAuth } from '$lib/server/api-utils'
+import { auditCloudinaryResources, deleteOrphanedFiles } from '$lib/server/cloudinary-audit'
+import { formatBytes } from '$lib/utils/format'
+import { isCloudinaryConfigured } from '$lib/server/cloudinary'
+
+export const GET: RequestHandler = async (event) => {
+	try {
+		if (!checkAdminAuth(event)) {
+			return json({ error: 'Unauthorized' }, { status: 401 })
+		}
+
+		if (!isCloudinaryConfigured()) {
+			return json({ error: 'Cloudinary is not configured' }, { status: 503 })
+		}
+
+		const audit = await auditCloudinaryResources()
+
+		// Format the response with additional metadata
+		const response = {
+			summary: {
+				totalCloudinaryFiles: audit.totalCloudinaryFiles,
+				totalDatabaseReferences: audit.totalDatabaseReferences,
+				orphanedFilesCount: audit.orphanedFiles.length,
+				orphanedFilesSize: audit.orphanedTotalBytes,
+				orphanedFilesSizeFormatted: formatBytes(audit.orphanedTotalBytes),
+				missingReferencesCount: audit.missingFromCloudinary.length
+			},
+			orphanedFiles: audit.orphanedFiles.map((file) => ({
+				publicId: file.public_id,
+				url: file.secure_url,
+				folder: file.folder || 'root',
+				format: file.format,
+				size: file.bytes,
+				sizeFormatted: formatBytes(file.bytes),
+				dimensions:
+					file.width && file.height
+						? {
+								width: file.width,
+								height: file.height
+							}
+						: null,
+				createdAt: file.created_at
+			})),
+			missingReferences: audit.missingFromCloudinary
+		}
+
+		return json(response)
+	} catch (error) {
+		console.error('Cloudinary audit error:', error)
+		return json({ error: 'Failed to audit Cloudinary resources' }, { status: 500 })
+	}
+}
+
+export const DELETE: RequestHandler = async (event) => {
+	try {
+		if (!checkAdminAuth(event)) {
+			return json({ error: 'Unauthorized' }, { status: 401 })
+		}
+
+		if (!isCloudinaryConfigured()) {
+			return json({ error: 'Cloudinary is not configured' }, { status: 503 })
+		}
+
+		const body = await event.request.json()
+		const { publicIds, dryRun = true } = body
+
+		if (!Array.isArray(publicIds) || publicIds.length === 0) {
+			return json({ error: 'No public IDs provided' }, { status: 400 })
+		}
+
+		// Limit the number of files that can be deleted at once
+		if (publicIds.length > 100) {
+			return json({ error: 'Cannot delete more than 100 files at once' }, { status: 400 })
+		}
+
+		const results = await deleteOrphanedFiles(publicIds, dryRun)
+
+		return json({
+			dryRun,
+			results
+		})
+	} catch (error) {
+		console.error('Cloudinary delete error:', error)
+		return json({ error: 'Failed to delete Cloudinary resources' }, { status: 500 })
+	}
+}