/** * Backfill Embeddings Script * * One-time script to generate embeddings for all existing projects and deliverables. * Run with: npx tsx scripts/backfill-embeddings.ts * * Prerequisites: * - PostgreSQL with pgvector extension enabled * - Ollama running with nomic-embed-text model pulled * - DATABASE_URL set in environment */ import "dotenv/config"; import { PrismaPg } from "@prisma/adapter-pg"; import { PrismaClient } from "../src/generated/prisma/client"; import { buildProjectText, buildDeliverableText, generateEmbedding, checkOllamaHealth, } from "../src/lib/services/embedding-service"; const connectionString = process.env.DATABASE_URL!; const adapter = new PrismaPg({ connectionString }); const prisma = new PrismaClient({ adapter }); async function backfillProjects() { const projects = await prisma.project.findMany({ include: { deliverables: { select: { name: true, status: true, priority: true }, }, }, }); console.log(`\nšŸ“¦ Processing ${projects.length} projects...`); let success = 0; let failed = 0; for (const project of projects) { try { const text = buildProjectText(project); const embedding = await generateEmbedding(text); if (embedding) { const vectorStr = `[${embedding.join(",")}]`; await prisma.$executeRawUnsafe( `UPDATE "projects" SET "embedding" = $1::vector WHERE "id" = $2`, vectorStr, project.id ); success++; process.stdout.write( `\r āœ… Projects: ${success} done, ${failed} failed (${success + failed}/${projects.length})` ); } else { failed++; console.warn(`\n āš ļø Failed to generate embedding for project: ${project.name}`); } } catch (error) { failed++; console.error( `\n āŒ Error processing project "${project.name}":`, error instanceof Error ? error.message : error ); } } console.log(`\n Projects complete: ${success} succeeded, ${failed} failed`); } async function backfillDeliverables() { const deliverables = await prisma.deliverable.findMany({ include: { project: { select: { name: true, projectCode: true } }, stages: { include: { template: { select: { name: true } } }, }, }, }); console.log(`\nšŸ“‹ Processing ${deliverables.length} deliverables...`); let success = 0; let failed = 0; for (const deliverable of deliverables) { try { const text = buildDeliverableText({ ...deliverable, stages: deliverable.stages.map((s) => ({ template: s.template, status: s.status, })), }); const embedding = await generateEmbedding(text); if (embedding) { const vectorStr = `[${embedding.join(",")}]`; await prisma.$executeRawUnsafe( `UPDATE "deliverables" SET "embedding" = $1::vector WHERE "id" = $2`, vectorStr, deliverable.id ); success++; process.stdout.write( `\r āœ… Deliverables: ${success} done, ${failed} failed (${success + failed}/${deliverables.length})` ); } else { failed++; console.warn( `\n āš ļø Failed to generate embedding for deliverable: ${deliverable.name}` ); } } catch (error) { failed++; console.error( `\n āŒ Error processing deliverable "${deliverable.name}":`, error instanceof Error ? error.message : error ); } } console.log( `\n Deliverables complete: ${success} succeeded, ${failed} failed` ); } async function main() { console.log("šŸ” Backfill Embeddings — HP CG Production Tracker"); console.log("═".repeat(50)); // Check Ollama health first console.log("\nšŸ„ Checking Ollama health..."); const health = await checkOllamaHealth(); if (!health.available) { console.error( "āŒ Ollama is not available. Make sure it's running at", process.env.OLLAMA_HOST || "http://localhost:11434" ); console.error( " Start it with: ollama serve (or docker run -p 11434:11434 ollama/ollama)" ); process.exit(1); } console.log(`āœ… Ollama is running. Available models: ${health.models.join(", ")}`); const embedModel = process.env.OLLAMA_EMBED_MODEL || "nomic-embed-text"; const hasModel = health.models.some((m) => m.includes(embedModel.split(":")[0])); if (!hasModel) { console.error( `āŒ Embedding model "${embedModel}" not found. Pull it with: ollama pull ${embedModel}` ); process.exit(1); } console.log(`āœ… Using embedding model: ${embedModel}`); const startTime = Date.now(); await backfillProjects(); await backfillDeliverables(); const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); console.log(`\n${"═".repeat(50)}`); console.log(`✨ Backfill complete in ${elapsed}s`); await prisma.$disconnect(); } main().catch((error) => { console.error("\nšŸ’„ Fatal error:", error); prisma.$disconnect(); process.exit(1); });