modcomms/backend/alembic/versions/006_add_knowledge_base.py
michael 9e2473c3e9 Add Knowledge Base management system for AI agent specs
Full-stack implementation enabling UI-driven management of the 5 AI agent knowledge bases
(Legal, Brand Barclays, Brand Barclaycard, Channel Best Practices, Channel Tech Specs).

Backend:
- 4 new DB models: KnowledgeBase, SourceDocument, SpecVersion, ProcessingJob
- Migration 006: creates tables, seeds 5 KB rows, imports existing prompts/*.md as v1 specs
- KnowledgeBaseRepository with full CRUD for all 4 tables
- LlamaParseService for document parsing, KnowledgeBaseService for pipeline orchestration
- ReferenceDocsService updated with DB-backed spec loading + cache invalidation
- 11 REST endpoints under /api/knowledge-base (list, detail, upload, delete, process, job status, versions, diff, activate)
- StorageService extended with KB document storage

Frontend:
- TypeScript types for all KB entities (KnowledgeBaseListItem, SourceDocument, ProcessingJob, SpecVersion, DiffResult)
- ApiService methods for all KB endpoints including multipart file upload
- KnowledgeBase component with 3-level UI: agent grid, detail view (documents + versions tabs), diff viewer
- Drag-and-drop file upload, processing progress bar with 3s polling, version comparison
- KnowledgeBaseIcon + Sidebar nav item with adminOnly filtering

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-02-12 15:00:36 -06:00

179 lines
8.5 KiB
Python

"""Add knowledge base tables and seed initial data
Revision ID: 006_add_knowledge_base
Revises: 005_add_file_hash
Create Date: 2025-02-12
"""
from pathlib import Path
from typing import Sequence, Union
from uuid import uuid4
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision: str = '006_add_knowledge_base'
down_revision: Union[str, None] = '005_add_file_hash'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Create knowledge base tables and seed initial data."""
# ===========================================
# 1. Create knowledge_bases table
# ===========================================
op.create_table(
'knowledge_bases',
sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column('agent_key', sa.String(100), unique=True, nullable=False),
sa.Column('display_name', sa.String(255), nullable=False),
sa.Column('description', sa.Text(), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now()),
)
# ===========================================
# 2. Create source_documents table
# ===========================================
op.create_table(
'source_documents',
sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column('knowledge_base_id', postgresql.UUID(as_uuid=True),
sa.ForeignKey('knowledge_bases.id', ondelete='CASCADE'), nullable=False),
sa.Column('filename', sa.String(500), nullable=False),
sa.Column('file_storage_key', sa.String(500), nullable=False),
sa.Column('file_size_bytes', sa.Integer(), nullable=False),
sa.Column('mime_type', sa.String(255), nullable=False),
sa.Column('uploaded_by_id', postgresql.UUID(as_uuid=True),
sa.ForeignKey('users.id'), nullable=True),
sa.Column('uploaded_by_name', sa.String(255), nullable=True),
sa.Column('parsed_markdown', sa.Text(), nullable=True),
sa.Column('parse_status', sa.String(50), nullable=False, server_default='pending'),
sa.Column('parse_error', sa.Text(), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now()),
)
op.create_index('ix_source_documents_kb_id', 'source_documents', ['knowledge_base_id'])
# ===========================================
# 3. Create processing_jobs table (before spec_versions due to FK)
# ===========================================
op.create_table(
'processing_jobs',
sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column('knowledge_base_id', postgresql.UUID(as_uuid=True),
sa.ForeignKey('knowledge_bases.id', ondelete='CASCADE'), nullable=False),
sa.Column('status', sa.String(50), nullable=False, server_default='pending'),
sa.Column('triggered_by_id', postgresql.UUID(as_uuid=True),
sa.ForeignKey('users.id'), nullable=True),
sa.Column('triggered_by_name', sa.String(255), nullable=True),
sa.Column('total_documents', sa.Integer(), nullable=False, server_default='0'),
sa.Column('parsed_documents', sa.Integer(), nullable=False, server_default='0'),
sa.Column('spec_version_id', postgresql.UUID(as_uuid=True), nullable=True),
sa.Column('error_message', sa.Text(), nullable=True),
sa.Column('log', postgresql.JSONB(), nullable=True),
sa.Column('started_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('completed_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now()),
)
op.create_index('ix_processing_jobs_kb_id', 'processing_jobs', ['knowledge_base_id'])
# ===========================================
# 4. Create spec_versions table
# ===========================================
op.create_table(
'spec_versions',
sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column('knowledge_base_id', postgresql.UUID(as_uuid=True),
sa.ForeignKey('knowledge_bases.id', ondelete='CASCADE'), nullable=False),
sa.Column('version_number', sa.Integer(), nullable=False),
sa.Column('content', sa.Text(), nullable=False),
sa.Column('source_document_ids', postgresql.JSONB(), nullable=True),
sa.Column('generated_by_id', postgresql.UUID(as_uuid=True),
sa.ForeignKey('users.id'), nullable=True),
sa.Column('generated_by_name', sa.String(255), nullable=True),
sa.Column('processing_job_id', postgresql.UUID(as_uuid=True),
sa.ForeignKey('processing_jobs.id'), nullable=True),
sa.Column('is_active', sa.Boolean(), nullable=False, server_default='true'),
sa.Column('char_count', sa.Integer(), nullable=False),
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now()),
sa.UniqueConstraint('knowledge_base_id', 'version_number', name='uq_kb_version_number'),
)
op.create_index('ix_spec_versions_kb_id', 'spec_versions', ['knowledge_base_id'])
# Add the FK from processing_jobs.spec_version_id -> spec_versions.id
# (deferred because of circular reference)
op.create_foreign_key(
'fk_processing_jobs_spec_version',
'processing_jobs', 'spec_versions',
['spec_version_id'], ['id'],
)
# ===========================================
# 5. Seed 5 knowledge base rows
# ===========================================
conn = op.get_bind()
kb_seeds = [
("legal", "Legal", "Legal compliance, advertising standards, disclaimers, and financial promotion rules."),
("brand_barclays", "Brand (Barclays)", "Barclays brand guidelines: logo usage, colors, typography, and design principles."),
("brand_barclaycard", "Brand (Barclaycard)", "Barclaycard brand guidelines: logo usage, colors, typography, and design principles."),
("channel_best_practices", "Channel Best Practices", "Channel-specific best practices for social, display, email, print, and OOH."),
("channel_tech_specs", "Channel Tech Specs", "Technical specifications, dimensions, file formats, and platform requirements."),
]
kb_ids = {}
for agent_key, display_name, description in kb_seeds:
kb_id = str(uuid4())
kb_ids[agent_key] = kb_id
conn.execute(
sa.text("""
INSERT INTO knowledge_bases (id, agent_key, display_name, description)
VALUES (:id, :agent_key, :display_name, :description)
ON CONFLICT (agent_key) DO NOTHING
"""),
{"id": kb_id, "agent_key": agent_key, "display_name": display_name, "description": description}
)
# ===========================================
# 6. Seed existing prompts/*.md as spec_versions v1
# ===========================================
prompts_dir = Path(__file__).parent.parent.parent.parent / "prompts"
spec_file_map = {
"legal": "legal.md",
"brand_barclays": "brand_barclays.md",
"brand_barclaycard": "brand_barclaycard.md",
"channel_best_practices": "channel_best_practices.md",
"channel_tech_specs": "channel_tech_specs.md",
}
for agent_key, filename in spec_file_map.items():
spec_path = prompts_dir / filename
if spec_path.exists():
content = spec_path.read_text(encoding="utf-8")
spec_id = str(uuid4())
conn.execute(
sa.text("""
INSERT INTO spec_versions (id, knowledge_base_id, version_number, content, generated_by_name, is_active, char_count)
VALUES (:id, :kb_id, 1, :content, :generated_by_name, true, :char_count)
"""),
{
"id": spec_id,
"kb_id": kb_ids[agent_key],
"content": content,
"generated_by_name": "System (Migration)",
"char_count": len(content),
}
)
def downgrade() -> None:
"""Drop knowledge base tables."""
op.drop_constraint('fk_processing_jobs_spec_version', 'processing_jobs', type_='foreignkey')
op.drop_table('spec_versions')
op.drop_table('processing_jobs')
op.drop_table('source_documents')
op.drop_table('knowledge_bases')