hp-prod-tracker/scripts/backfill-embeddings.ts
Leivur Djurhuus 9d5acf1683 feat: add Smart Search Panel with semantic search capabilities
- Implemented Smart Search Panel component for enhanced project and deliverable search functionality.
- Introduced useSemanticSearch and useOllamaHealth hooks for managing search queries and AI availability.
- Developed embedding-service to generate and store vector embeddings for projects and deliverables.
- Created semantic-search-service to handle vector search, structural query detection, and LLM summarization.
- Added support for hybrid search combining structural filters and semantic queries.
- Integrated UI components for displaying search results and user interactions.
2026-03-06 16:13:36 -06:00

177 lines
5 KiB
TypeScript

/**
* Backfill Embeddings Script
*
* One-time script to generate embeddings for all existing projects and deliverables.
* Run with: npx tsx scripts/backfill-embeddings.ts
*
* Prerequisites:
* - PostgreSQL with pgvector extension enabled
* - Ollama running with nomic-embed-text model pulled
* - DATABASE_URL set in environment
*/
import "dotenv/config";
import { PrismaPg } from "@prisma/adapter-pg";
import { PrismaClient } from "../src/generated/prisma/client";
import {
buildProjectText,
buildDeliverableText,
generateEmbedding,
checkOllamaHealth,
} from "../src/lib/services/embedding-service";
const connectionString = process.env.DATABASE_URL!;
const adapter = new PrismaPg({ connectionString });
const prisma = new PrismaClient({ adapter });
async function backfillProjects() {
const projects = await prisma.project.findMany({
include: {
deliverables: {
select: { name: true, status: true, priority: true },
},
},
});
console.log(`\n📦 Processing ${projects.length} projects...`);
let success = 0;
let failed = 0;
for (const project of projects) {
try {
const text = buildProjectText(project);
const embedding = await generateEmbedding(text);
if (embedding) {
const vectorStr = `[${embedding.join(",")}]`;
await prisma.$executeRawUnsafe(
`UPDATE "projects" SET "embedding" = $1::vector WHERE "id" = $2`,
vectorStr,
project.id
);
success++;
process.stdout.write(
`\r ✅ Projects: ${success} done, ${failed} failed (${success + failed}/${projects.length})`
);
} else {
failed++;
console.warn(`\n ⚠️ Failed to generate embedding for project: ${project.name}`);
}
} catch (error) {
failed++;
console.error(
`\n ❌ Error processing project "${project.name}":`,
error instanceof Error ? error.message : error
);
}
}
console.log(`\n Projects complete: ${success} succeeded, ${failed} failed`);
}
async function backfillDeliverables() {
const deliverables = await prisma.deliverable.findMany({
include: {
project: { select: { name: true, projectCode: true } },
stages: {
include: { template: { select: { name: true } } },
},
},
});
console.log(`\n📋 Processing ${deliverables.length} deliverables...`);
let success = 0;
let failed = 0;
for (const deliverable of deliverables) {
try {
const text = buildDeliverableText({
...deliverable,
stages: deliverable.stages.map((s) => ({
template: s.template,
status: s.status,
})),
});
const embedding = await generateEmbedding(text);
if (embedding) {
const vectorStr = `[${embedding.join(",")}]`;
await prisma.$executeRawUnsafe(
`UPDATE "deliverables" SET "embedding" = $1::vector WHERE "id" = $2`,
vectorStr,
deliverable.id
);
success++;
process.stdout.write(
`\r ✅ Deliverables: ${success} done, ${failed} failed (${success + failed}/${deliverables.length})`
);
} else {
failed++;
console.warn(
`\n ⚠️ Failed to generate embedding for deliverable: ${deliverable.name}`
);
}
} catch (error) {
failed++;
console.error(
`\n ❌ Error processing deliverable "${deliverable.name}":`,
error instanceof Error ? error.message : error
);
}
}
console.log(
`\n Deliverables complete: ${success} succeeded, ${failed} failed`
);
}
async function main() {
console.log("🔍 Backfill Embeddings — HP CG Production Tracker");
console.log("═".repeat(50));
// Check Ollama health first
console.log("\n🏥 Checking Ollama health...");
const health = await checkOllamaHealth();
if (!health.available) {
console.error(
"❌ Ollama is not available. Make sure it's running at",
process.env.OLLAMA_HOST || "http://localhost:11434"
);
console.error(
" Start it with: ollama serve (or docker run -p 11434:11434 ollama/ollama)"
);
process.exit(1);
}
console.log(`✅ Ollama is running. Available models: ${health.models.join(", ")}`);
const embedModel = process.env.OLLAMA_EMBED_MODEL || "nomic-embed-text";
const hasModel = health.models.some((m) => m.includes(embedModel.split(":")[0]));
if (!hasModel) {
console.error(
`❌ Embedding model "${embedModel}" not found. Pull it with: ollama pull ${embedModel}`
);
process.exit(1);
}
console.log(`✅ Using embedding model: ${embedModel}`);
const startTime = Date.now();
await backfillProjects();
await backfillDeliverables();
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
console.log(`\n${"═".repeat(50)}`);
console.log(`✨ Backfill complete in ${elapsed}s`);
await prisma.$disconnect();
}
main().catch((error) => {
console.error("\n💥 Fatal error:", error);
prisma.$disconnect();
process.exit(1);
});