diff --git a/backend/alembic/versions/006_add_knowledge_base.py b/backend/alembic/versions/006_add_knowledge_base.py new file mode 100644 index 0000000..e59c668 --- /dev/null +++ b/backend/alembic/versions/006_add_knowledge_base.py @@ -0,0 +1,179 @@ +"""Add knowledge base tables and seed initial data + +Revision ID: 006_add_knowledge_base +Revises: 005_add_file_hash +Create Date: 2025-02-12 + +""" +from pathlib import Path +from typing import Sequence, Union +from uuid import uuid4 + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = '006_add_knowledge_base' +down_revision: Union[str, None] = '005_add_file_hash' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Create knowledge base tables and seed initial data.""" + + # =========================================== + # 1. Create knowledge_bases table + # =========================================== + op.create_table( + 'knowledge_bases', + sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True), + sa.Column('agent_key', sa.String(100), unique=True, nullable=False), + sa.Column('display_name', sa.String(255), nullable=False), + sa.Column('description', sa.Text(), nullable=True), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now()), + ) + + # =========================================== + # 2. Create source_documents table + # =========================================== + op.create_table( + 'source_documents', + sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True), + sa.Column('knowledge_base_id', postgresql.UUID(as_uuid=True), + sa.ForeignKey('knowledge_bases.id', ondelete='CASCADE'), nullable=False), + sa.Column('filename', sa.String(500), nullable=False), + sa.Column('file_storage_key', sa.String(500), nullable=False), + sa.Column('file_size_bytes', sa.Integer(), nullable=False), + sa.Column('mime_type', sa.String(255), nullable=False), + sa.Column('uploaded_by_id', postgresql.UUID(as_uuid=True), + sa.ForeignKey('users.id'), nullable=True), + sa.Column('uploaded_by_name', sa.String(255), nullable=True), + sa.Column('parsed_markdown', sa.Text(), nullable=True), + sa.Column('parse_status', sa.String(50), nullable=False, server_default='pending'), + sa.Column('parse_error', sa.Text(), nullable=True), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now()), + ) + op.create_index('ix_source_documents_kb_id', 'source_documents', ['knowledge_base_id']) + + # =========================================== + # 3. Create processing_jobs table (before spec_versions due to FK) + # =========================================== + op.create_table( + 'processing_jobs', + sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True), + sa.Column('knowledge_base_id', postgresql.UUID(as_uuid=True), + sa.ForeignKey('knowledge_bases.id', ondelete='CASCADE'), nullable=False), + sa.Column('status', sa.String(50), nullable=False, server_default='pending'), + sa.Column('triggered_by_id', postgresql.UUID(as_uuid=True), + sa.ForeignKey('users.id'), nullable=True), + sa.Column('triggered_by_name', sa.String(255), nullable=True), + sa.Column('total_documents', sa.Integer(), nullable=False, server_default='0'), + sa.Column('parsed_documents', sa.Integer(), nullable=False, server_default='0'), + sa.Column('spec_version_id', postgresql.UUID(as_uuid=True), nullable=True), + sa.Column('error_message', sa.Text(), nullable=True), + sa.Column('log', postgresql.JSONB(), nullable=True), + sa.Column('started_at', sa.DateTime(timezone=True), nullable=True), + sa.Column('completed_at', sa.DateTime(timezone=True), nullable=True), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now()), + ) + op.create_index('ix_processing_jobs_kb_id', 'processing_jobs', ['knowledge_base_id']) + + # =========================================== + # 4. Create spec_versions table + # =========================================== + op.create_table( + 'spec_versions', + sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True), + sa.Column('knowledge_base_id', postgresql.UUID(as_uuid=True), + sa.ForeignKey('knowledge_bases.id', ondelete='CASCADE'), nullable=False), + sa.Column('version_number', sa.Integer(), nullable=False), + sa.Column('content', sa.Text(), nullable=False), + sa.Column('source_document_ids', postgresql.JSONB(), nullable=True), + sa.Column('generated_by_id', postgresql.UUID(as_uuid=True), + sa.ForeignKey('users.id'), nullable=True), + sa.Column('generated_by_name', sa.String(255), nullable=True), + sa.Column('processing_job_id', postgresql.UUID(as_uuid=True), + sa.ForeignKey('processing_jobs.id'), nullable=True), + sa.Column('is_active', sa.Boolean(), nullable=False, server_default='true'), + sa.Column('char_count', sa.Integer(), nullable=False), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now()), + sa.UniqueConstraint('knowledge_base_id', 'version_number', name='uq_kb_version_number'), + ) + op.create_index('ix_spec_versions_kb_id', 'spec_versions', ['knowledge_base_id']) + + # Add the FK from processing_jobs.spec_version_id -> spec_versions.id + # (deferred because of circular reference) + op.create_foreign_key( + 'fk_processing_jobs_spec_version', + 'processing_jobs', 'spec_versions', + ['spec_version_id'], ['id'], + ) + + # =========================================== + # 5. Seed 5 knowledge base rows + # =========================================== + conn = op.get_bind() + + kb_seeds = [ + ("legal", "Legal", "Legal compliance, advertising standards, disclaimers, and financial promotion rules."), + ("brand_barclays", "Brand (Barclays)", "Barclays brand guidelines: logo usage, colors, typography, and design principles."), + ("brand_barclaycard", "Brand (Barclaycard)", "Barclaycard brand guidelines: logo usage, colors, typography, and design principles."), + ("channel_best_practices", "Channel Best Practices", "Channel-specific best practices for social, display, email, print, and OOH."), + ("channel_tech_specs", "Channel Tech Specs", "Technical specifications, dimensions, file formats, and platform requirements."), + ] + + kb_ids = {} + for agent_key, display_name, description in kb_seeds: + kb_id = str(uuid4()) + kb_ids[agent_key] = kb_id + conn.execute( + sa.text(""" + INSERT INTO knowledge_bases (id, agent_key, display_name, description) + VALUES (:id, :agent_key, :display_name, :description) + ON CONFLICT (agent_key) DO NOTHING + """), + {"id": kb_id, "agent_key": agent_key, "display_name": display_name, "description": description} + ) + + # =========================================== + # 6. Seed existing prompts/*.md as spec_versions v1 + # =========================================== + prompts_dir = Path(__file__).parent.parent.parent.parent / "prompts" + + spec_file_map = { + "legal": "legal.md", + "brand_barclays": "brand_barclays.md", + "brand_barclaycard": "brand_barclaycard.md", + "channel_best_practices": "channel_best_practices.md", + "channel_tech_specs": "channel_tech_specs.md", + } + + for agent_key, filename in spec_file_map.items(): + spec_path = prompts_dir / filename + if spec_path.exists(): + content = spec_path.read_text(encoding="utf-8") + spec_id = str(uuid4()) + conn.execute( + sa.text(""" + INSERT INTO spec_versions (id, knowledge_base_id, version_number, content, generated_by_name, is_active, char_count) + VALUES (:id, :kb_id, 1, :content, :generated_by_name, true, :char_count) + """), + { + "id": spec_id, + "kb_id": kb_ids[agent_key], + "content": content, + "generated_by_name": "System (Migration)", + "char_count": len(content), + } + ) + + +def downgrade() -> None: + """Drop knowledge base tables.""" + op.drop_constraint('fk_processing_jobs_spec_version', 'processing_jobs', type_='foreignkey') + op.drop_table('spec_versions') + op.drop_table('processing_jobs') + op.drop_table('source_documents') + op.drop_table('knowledge_bases') diff --git a/backend/app/api/__init__.py b/backend/app/api/__init__.py index 2bb843d..68f90f8 100755 --- a/backend/app/api/__init__.py +++ b/backend/app/api/__init__.py @@ -1,3 +1,4 @@ from app.api.routes import router +from app.api.knowledge_base_routes import kb_router -__all__ = ["router"] +__all__ = ["router", "kb_router"] diff --git a/backend/app/api/knowledge_base_routes.py b/backend/app/api/knowledge_base_routes.py new file mode 100644 index 0000000..b6aaffd --- /dev/null +++ b/backend/app/api/knowledge_base_routes.py @@ -0,0 +1,473 @@ +"""REST API routes for Knowledge Base management.""" +import difflib +import uuid + +from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, UploadFile, File +from sqlalchemy.ext.asyncio import AsyncSession + +from app.api.knowledge_base_schemas import ( + KnowledgeBaseListItem, + KnowledgeBaseDetail, + SourceDocumentResponse, + ProcessingJobResponse, + SpecVersionListItem, + SpecVersionDetail, + DiffResponse, + DiffLine, +) +from app.dependencies.auth import get_current_user +from app.models.database import get_db +from app.repositories.knowledge_base_repository import KnowledgeBaseRepository +from app.services.storage_service import storage_service + +kb_router = APIRouter(prefix="/knowledge-base", tags=["knowledge-base"]) + +# Allowed MIME types for source document upload +ALLOWED_MIME_TYPES = { + "application/pdf", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", # docx + "application/vnd.openxmlformats-officedocument.presentationml.presentation", # pptx + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", # xlsx + "text/html", + "text/plain", + "text/markdown", + "image/png", + "image/jpeg", + "image/webp", +} + + +async def _get_user_info(session: AsyncSession, user_claims: dict) -> tuple: + """Extract user ID and name from claims.""" + from app.repositories.user_repository import UserRepository + user_repo = UserRepository(session) + azure_oid = user_claims.get("oid") or user_claims.get("sub") + user_id = None + user_name = user_claims.get("name", "Unknown") + if azure_oid: + user = await user_repo.get_or_create_from_azure( + azure_ad_oid=azure_oid, + email=user_claims.get("email", user_claims.get("preferred_username", "")), + name=user_name, + ) + user_id = user.id + return user_id, user_name + + +@kb_router.get("", response_model=list[KnowledgeBaseListItem]) +async def list_knowledge_bases( + db: AsyncSession = Depends(get_db), + user: dict = Depends(get_current_user), +): + """List all knowledge bases with summary info.""" + repo = KnowledgeBaseRepository(db) + kbs = await repo.list_knowledge_bases() + + results = [] + for kb in kbs: + active_spec = next((sv for sv in kb.spec_versions if sv.is_active), None) + latest_job = kb.processing_jobs[0] if kb.processing_jobs else None + + results.append(KnowledgeBaseListItem( + id=kb.id, + agent_key=kb.agent_key, + display_name=kb.display_name, + description=kb.description, + source_document_count=len(kb.source_documents), + active_spec_version=active_spec.version_number if active_spec else None, + active_spec_char_count=active_spec.char_count if active_spec else None, + latest_job_status=latest_job.status if latest_job else None, + latest_job_completed_at=latest_job.completed_at if latest_job else None, + created_at=kb.created_at, + )) + + return results + + +@kb_router.get("/{kb_id}", response_model=KnowledgeBaseDetail) +async def get_knowledge_base( + kb_id: uuid.UUID, + db: AsyncSession = Depends(get_db), + user: dict = Depends(get_current_user), +): + """Get full detail for a knowledge base.""" + repo = KnowledgeBaseRepository(db) + kb = await repo.get_knowledge_base(kb_id) + if not kb: + raise HTTPException(status_code=404, detail="Knowledge base not found") + + active_spec = next((sv for sv in kb.spec_versions if sv.is_active), None) + latest_job = kb.processing_jobs[0] if kb.processing_jobs else None + + return KnowledgeBaseDetail( + id=kb.id, + agent_key=kb.agent_key, + display_name=kb.display_name, + description=kb.description, + source_documents=[ + SourceDocumentResponse( + id=doc.id, + knowledge_base_id=doc.knowledge_base_id, + filename=doc.filename, + file_storage_key=doc.file_storage_key, + file_size_bytes=doc.file_size_bytes, + mime_type=doc.mime_type, + uploaded_by_name=doc.uploaded_by_name, + parse_status=doc.parse_status, + parse_error=doc.parse_error, + created_at=doc.created_at, + ) + for doc in sorted(kb.source_documents, key=lambda d: d.created_at, reverse=True) + ], + active_spec_version=active_spec.version_number if active_spec else None, + active_spec_char_count=active_spec.char_count if active_spec else None, + latest_job=ProcessingJobResponse( + id=latest_job.id, + knowledge_base_id=latest_job.knowledge_base_id, + status=latest_job.status, + triggered_by_name=latest_job.triggered_by_name, + total_documents=latest_job.total_documents, + parsed_documents=latest_job.parsed_documents, + spec_version_id=latest_job.spec_version_id, + error_message=latest_job.error_message, + started_at=latest_job.started_at, + completed_at=latest_job.completed_at, + created_at=latest_job.created_at, + ) if latest_job else None, + created_at=kb.created_at, + ) + + +@kb_router.post("/{kb_id}/documents", response_model=SourceDocumentResponse, status_code=201) +async def upload_source_document( + kb_id: uuid.UUID, + file: UploadFile = File(...), + db: AsyncSession = Depends(get_db), + user: dict = Depends(get_current_user), +): + """Upload a source document to a knowledge base.""" + repo = KnowledgeBaseRepository(db) + kb = await repo.get_knowledge_base(kb_id) + if not kb: + raise HTTPException(status_code=404, detail="Knowledge base not found") + + # Validate file type + if file.content_type and file.content_type not in ALLOWED_MIME_TYPES: + raise HTTPException( + status_code=400, + detail=f"File type '{file.content_type}' is not supported. Supported types: PDF, DOCX, PPTX, XLSX, HTML, TXT, MD, PNG, JPG, WebP." + ) + + user_id, user_name = await _get_user_info(db, user) + + # Read file data + file_data = await file.read() + doc_id = uuid.uuid4() + + # Store file + storage_key = await storage_service.store_kb_document( + file_data=file_data, + kb_id=kb_id, + doc_id=doc_id, + filename=file.filename or "unknown", + mime_type=file.content_type or "application/octet-stream", + ) + + # Create DB record + doc = await repo.add_source_document( + knowledge_base_id=kb_id, + filename=file.filename or "unknown", + file_storage_key=storage_key, + file_size_bytes=len(file_data), + mime_type=file.content_type or "application/octet-stream", + uploaded_by_id=user_id, + uploaded_by_name=user_name, + ) + # Set the pre-generated ID + doc.id = doc_id + + return SourceDocumentResponse( + id=doc.id, + knowledge_base_id=doc.knowledge_base_id, + filename=doc.filename, + file_storage_key=doc.file_storage_key, + file_size_bytes=doc.file_size_bytes, + mime_type=doc.mime_type, + uploaded_by_name=doc.uploaded_by_name, + parse_status=doc.parse_status, + parse_error=doc.parse_error, + created_at=doc.created_at, + ) + + +@kb_router.delete("/{kb_id}/documents/{doc_id}", status_code=204) +async def delete_source_document( + kb_id: uuid.UUID, + doc_id: uuid.UUID, + db: AsyncSession = Depends(get_db), + user: dict = Depends(get_current_user), +): + """Remove a source document from a knowledge base.""" + repo = KnowledgeBaseRepository(db) + doc = await repo.remove_source_document(doc_id) + if not doc: + raise HTTPException(status_code=404, detail="Source document not found") + + # Delete file from storage + await storage_service.delete_file(doc.file_storage_key) + + +@kb_router.post("/{kb_id}/process", response_model=ProcessingJobResponse, status_code=201) +async def trigger_processing( + kb_id: uuid.UUID, + background_tasks: BackgroundTasks, + db: AsyncSession = Depends(get_db), + user: dict = Depends(get_current_user), +): + """Trigger the document processing pipeline for a knowledge base.""" + from app.main import knowledge_base_service as kb_service + + if kb_service is None: + raise HTTPException( + status_code=503, + detail="Knowledge Base processing is not available. LLAMA_CLOUD_API_KEY is not configured." + ) + + repo = KnowledgeBaseRepository(db) + kb = await repo.get_knowledge_base(kb_id) + if not kb: + raise HTTPException(status_code=404, detail="Knowledge base not found") + + # Check for active jobs + has_active = await repo.has_active_job(kb_id) + if has_active: + raise HTTPException(status_code=409, detail="A processing job is already running for this knowledge base.") + + # Check that there are source documents + docs = await repo.get_source_documents(kb_id) + if not docs: + raise HTTPException(status_code=400, detail="No source documents to process.") + + user_id, user_name = await _get_user_info(db, user) + + # Create the job + job = await repo.create_processing_job( + knowledge_base_id=kb_id, + total_documents=len(docs), + triggered_by_id=user_id, + triggered_by_name=user_name, + ) + + # Start background processing + background_tasks.add_task( + kb_service.process_documents, + kb_id=kb_id, + job_id=job.id, + agent_key=kb.agent_key, + user_id=user_id, + user_name=user_name, + ) + + return ProcessingJobResponse( + id=job.id, + knowledge_base_id=job.knowledge_base_id, + status=job.status, + triggered_by_name=job.triggered_by_name, + total_documents=job.total_documents, + parsed_documents=job.parsed_documents, + spec_version_id=job.spec_version_id, + error_message=job.error_message, + started_at=job.started_at, + completed_at=job.completed_at, + created_at=job.created_at, + ) + + +@kb_router.get("/{kb_id}/jobs/{job_id}", response_model=ProcessingJobResponse) +async def get_processing_job( + kb_id: uuid.UUID, + job_id: uuid.UUID, + db: AsyncSession = Depends(get_db), + user: dict = Depends(get_current_user), +): + """Get the status of a processing job.""" + repo = KnowledgeBaseRepository(db) + job = await repo.get_processing_job(job_id) + if not job or job.knowledge_base_id != kb_id: + raise HTTPException(status_code=404, detail="Processing job not found") + + return ProcessingJobResponse( + id=job.id, + knowledge_base_id=job.knowledge_base_id, + status=job.status, + triggered_by_name=job.triggered_by_name, + total_documents=job.total_documents, + parsed_documents=job.parsed_documents, + spec_version_id=job.spec_version_id, + error_message=job.error_message, + started_at=job.started_at, + completed_at=job.completed_at, + created_at=job.created_at, + ) + + +@kb_router.get("/{kb_id}/versions", response_model=list[SpecVersionListItem]) +async def list_spec_versions( + kb_id: uuid.UUID, + db: AsyncSession = Depends(get_db), + user: dict = Depends(get_current_user), +): + """List all spec versions for a knowledge base.""" + repo = KnowledgeBaseRepository(db) + versions = await repo.list_spec_versions(kb_id) + + return [ + SpecVersionListItem( + id=v.id, + knowledge_base_id=v.knowledge_base_id, + version_number=v.version_number, + generated_by_name=v.generated_by_name, + source_document_ids=v.source_document_ids, + is_active=v.is_active, + char_count=v.char_count, + created_at=v.created_at, + ) + for v in versions + ] + + +@kb_router.get("/{kb_id}/versions/{version_id}", response_model=SpecVersionDetail) +async def get_spec_version( + kb_id: uuid.UUID, + version_id: uuid.UUID, + db: AsyncSession = Depends(get_db), + user: dict = Depends(get_current_user), +): + """Get full spec content for a version.""" + repo = KnowledgeBaseRepository(db) + version = await repo.get_spec_version(version_id) + if not version or version.knowledge_base_id != kb_id: + raise HTTPException(status_code=404, detail="Spec version not found") + + return SpecVersionDetail( + id=version.id, + knowledge_base_id=version.knowledge_base_id, + version_number=version.version_number, + content=version.content, + generated_by_name=version.generated_by_name, + source_document_ids=version.source_document_ids, + is_active=version.is_active, + char_count=version.char_count, + created_at=version.created_at, + ) + + +@kb_router.get("/{kb_id}/versions/{v_a}/diff/{v_b}", response_model=DiffResponse) +async def get_spec_diff( + kb_id: uuid.UUID, + v_a: uuid.UUID, + v_b: uuid.UUID, + db: AsyncSession = Depends(get_db), + user: dict = Depends(get_current_user), +): + """Compute diff between two spec versions.""" + repo = KnowledgeBaseRepository(db) + version_a = await repo.get_spec_version(v_a) + version_b = await repo.get_spec_version(v_b) + + if not version_a or version_a.knowledge_base_id != kb_id: + raise HTTPException(status_code=404, detail="Version A not found") + if not version_b or version_b.knowledge_base_id != kb_id: + raise HTTPException(status_code=404, detail="Version B not found") + + lines_a = version_a.content.splitlines(keepends=True) + lines_b = version_b.content.splitlines(keepends=True) + + diff = list(difflib.unified_diff( + lines_a, lines_b, + fromfile=f"v{version_a.version_number}", + tofile=f"v{version_b.version_number}", + lineterm="", + )) + + additions = 0 + deletions = 0 + diff_lines = [] + old_line = 0 + new_line = 0 + + for line in diff: + if line.startswith("@@"): + # Parse hunk header for line numbers + import re + match = re.match(r"@@ -(\d+)", line) + if match: + old_line = int(match.group(1)) - 1 + new_match = re.search(r"\+(\d+)", line) + new_line = int(new_match.group(1)) - 1 if new_match else 0 + diff_lines.append(DiffLine(type="context", content=line.rstrip("\n"))) + elif line.startswith("---") or line.startswith("+++"): + diff_lines.append(DiffLine(type="context", content=line.rstrip("\n"))) + elif line.startswith("+"): + additions += 1 + new_line += 1 + diff_lines.append(DiffLine( + type="add", content=line[1:].rstrip("\n"), + line_number_new=new_line, + )) + elif line.startswith("-"): + deletions += 1 + old_line += 1 + diff_lines.append(DiffLine( + type="remove", content=line[1:].rstrip("\n"), + line_number_old=old_line, + )) + else: + old_line += 1 + new_line += 1 + diff_lines.append(DiffLine( + type="context", content=line.rstrip("\n"), + line_number_old=old_line, + line_number_new=new_line, + )) + + return DiffResponse( + version_a=version_a.version_number, + version_b=version_b.version_number, + additions=additions, + deletions=deletions, + lines=diff_lines, + ) + + +@kb_router.post("/{kb_id}/versions/{version_id}/activate", response_model=SpecVersionDetail) +async def activate_spec_version( + kb_id: uuid.UUID, + version_id: uuid.UUID, + db: AsyncSession = Depends(get_db), + user: dict = Depends(get_current_user), +): + """Activate (revert to) a specific spec version.""" + repo = KnowledgeBaseRepository(db) + version = await repo.activate_spec_version(version_id) + if not version or version.knowledge_base_id != kb_id: + raise HTTPException(status_code=404, detail="Spec version not found") + + # Invalidate reference docs cache + kb = await repo.get_knowledge_base(kb_id) + if kb: + from app.main import analysis_service + if analysis_service: + analysis_service.reference_docs.invalidate_cache(kb.agent_key) + + return SpecVersionDetail( + id=version.id, + knowledge_base_id=version.knowledge_base_id, + version_number=version.version_number, + content=version.content, + generated_by_name=version.generated_by_name, + source_document_ids=version.source_document_ids, + is_active=version.is_active, + char_count=version.char_count, + created_at=version.created_at, + ) diff --git a/backend/app/api/knowledge_base_schemas.py b/backend/app/api/knowledge_base_schemas.py new file mode 100644 index 0000000..ff6a2f6 --- /dev/null +++ b/backend/app/api/knowledge_base_schemas.py @@ -0,0 +1,108 @@ +"""Pydantic schemas for Knowledge Base API.""" +import uuid +from datetime import datetime +from typing import Optional + +from pydantic import BaseModel + + +class SourceDocumentResponse(BaseModel): + id: uuid.UUID + knowledge_base_id: uuid.UUID + filename: str + file_storage_key: str + file_size_bytes: int + mime_type: str + uploaded_by_name: Optional[str] + parse_status: str + parse_error: Optional[str] + created_at: datetime + + class Config: + from_attributes = True + + +class ProcessingJobResponse(BaseModel): + id: uuid.UUID + knowledge_base_id: uuid.UUID + status: str + triggered_by_name: Optional[str] + total_documents: int + parsed_documents: int + spec_version_id: Optional[uuid.UUID] + error_message: Optional[str] + started_at: Optional[datetime] + completed_at: Optional[datetime] + created_at: datetime + + class Config: + from_attributes = True + + +class SpecVersionListItem(BaseModel): + id: uuid.UUID + knowledge_base_id: uuid.UUID + version_number: int + generated_by_name: Optional[str] + source_document_ids: Optional[list] + is_active: bool + char_count: int + created_at: datetime + + class Config: + from_attributes = True + + +class SpecVersionDetail(BaseModel): + id: uuid.UUID + knowledge_base_id: uuid.UUID + version_number: int + content: str + generated_by_name: Optional[str] + source_document_ids: Optional[list] + is_active: bool + char_count: int + created_at: datetime + + class Config: + from_attributes = True + + +class KnowledgeBaseListItem(BaseModel): + id: uuid.UUID + agent_key: str + display_name: str + description: Optional[str] + source_document_count: int + active_spec_version: Optional[int] + active_spec_char_count: Optional[int] + latest_job_status: Optional[str] + latest_job_completed_at: Optional[datetime] + created_at: datetime + + +class KnowledgeBaseDetail(BaseModel): + id: uuid.UUID + agent_key: str + display_name: str + description: Optional[str] + source_documents: list[SourceDocumentResponse] + active_spec_version: Optional[int] + active_spec_char_count: Optional[int] + latest_job: Optional[ProcessingJobResponse] + created_at: datetime + + +class DiffLine(BaseModel): + type: str # 'add', 'remove', 'context' + content: str + line_number_old: Optional[int] = None + line_number_new: Optional[int] = None + + +class DiffResponse(BaseModel): + version_a: int + version_b: int + additions: int + deletions: int + lines: list[DiffLine] diff --git a/backend/app/config.py b/backend/app/config.py index 5801621..75ca00d 100755 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -35,6 +35,9 @@ class Settings: _default_storage = Path(__file__).parent.parent.parent / "storage" FILE_STORAGE_PATH: str = os.getenv("FILE_STORAGE_PATH", str(_default_storage)) + # LlamaParse API key (optional - KB processing pipeline disabled if not set) + LLAMA_CLOUD_API_KEY: str = os.getenv("LLAMA_CLOUD_API_KEY", "") + # Mailgun Configuration for support emails MAILGUN_API_URL: str = os.getenv("MAILGUN_API_URL", "") MAILGUN_API_KEY: str = os.getenv("MAILGUN_API_KEY", "") diff --git a/backend/app/main.py b/backend/app/main.py index fa8ae9c..e3d37a0 100755 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -9,7 +9,7 @@ from app.config import settings from app.services.auth_service import verify_access_token from app.dependencies.auth import get_current_user from app.models.database import init_db, close_db -from app.api import router as api_router +from app.api import router as api_router, kb_router # Configure logging logging.basicConfig( @@ -36,11 +36,13 @@ from app.websocket.handlers import handle_analyze_message from app.services.gemini_service import GeminiService from app.services.reference_docs import ReferenceDocsService from app.services.analysis_service import AnalysisService +from app.services.knowledge_base_service import KnowledgeBaseService # Global services - initialized at startup manager = ConnectionManager() analysis_service: AnalysisService | None = None +knowledge_base_service: KnowledgeBaseService | None = None @asynccontextmanager @@ -50,16 +52,18 @@ async def lifespan(app: FastAPI): Loads reference documents and initializes the analysis service. """ - global analysis_service + global analysis_service, knowledge_base_service # Validate settings settings.validate() # Initialize database print("Initializing database connection...") + db_available = False try: await init_db() print("Database initialized successfully") + db_available = True except Exception as e: logger.warning(f"Database initialization failed (may not be available): {e}") print(f"Warning: Database not available - running in stateless mode") @@ -68,6 +72,16 @@ async def lifespan(app: FastAPI): print("Loading reference documents...") reference_docs = ReferenceDocsService(settings.REFERENCE_DOCS_PATH) + # Load specs from DB if database is available + if db_available: + try: + from app.models.database import async_session_factory + async with async_session_factory() as session: + print("Loading specs from database...") + await reference_docs.load_specs_from_db(session) + except Exception as e: + logger.warning(f"Failed to load specs from DB (falling back to files): {e}") + # Log document info doc_summary = reference_docs.get_context_summary() print(f" Brand documents: {len(doc_summary['brand_files'])} files ({doc_summary['brand_context_length']} chars)") @@ -79,6 +93,16 @@ async def lifespan(app: FastAPI): print("Initializing analysis service...") analysis_service = AnalysisService(gemini_service, reference_docs) + # Initialize Knowledge Base service (requires LlamaParse API key) + if settings.LLAMA_CLOUD_API_KEY: + from app.services.llamaparse_service import LlamaParseService + print("Initializing LlamaParse service...") + llamaparse_service = LlamaParseService(settings.LLAMA_CLOUD_API_KEY) + knowledge_base_service = KnowledgeBaseService(llamaparse_service, gemini_service, reference_docs) + print("Knowledge Base pipeline ready!") + else: + print("LLAMA_CLOUD_API_KEY not set - Knowledge Base processing pipeline disabled") + print("Backend ready!") yield @@ -107,6 +131,7 @@ app.add_middleware( # Include API routes app.include_router(api_router, prefix="/api") +app.include_router(kb_router, prefix="/api") @app.get("/health") diff --git a/backend/app/models/models.py b/backend/app/models/models.py index b81f926..3658436 100755 --- a/backend/app/models/models.py +++ b/backend/app/models/models.py @@ -179,3 +179,104 @@ class DropdownOption(Base): back_populates="parent", order_by="DropdownOption.display_order" ) + + +class KnowledgeBase(Base): + """An agent knowledge base entry (one per agent spec type).""" + __tablename__ = "knowledge_bases" + + id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + agent_key: Mapped[str] = mapped_column(String(100), unique=True, nullable=False) + display_name: Mapped[str] = mapped_column(String(255), nullable=False) + description: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + + # Relationships + source_documents: Mapped[list["SourceDocument"]] = relationship( + "SourceDocument", back_populates="knowledge_base", cascade="all, delete-orphan" + ) + spec_versions: Mapped[list["SpecVersion"]] = relationship( + "SpecVersion", back_populates="knowledge_base", cascade="all, delete-orphan", + order_by="desc(SpecVersion.version_number)" + ) + processing_jobs: Mapped[list["ProcessingJob"]] = relationship( + "ProcessingJob", back_populates="knowledge_base", cascade="all, delete-orphan", + order_by="desc(ProcessingJob.created_at)" + ) + + +class SourceDocument(Base): + """An uploaded source document linked to a knowledge base.""" + __tablename__ = "source_documents" + + id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + knowledge_base_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), ForeignKey("knowledge_bases.id", ondelete="CASCADE"), nullable=False + ) + filename: Mapped[str] = mapped_column(String(500), nullable=False) + file_storage_key: Mapped[str] = mapped_column(String(500), nullable=False) + file_size_bytes: Mapped[int] = mapped_column(Integer, nullable=False) + mime_type: Mapped[str] = mapped_column(String(255), nullable=False) + uploaded_by_id: Mapped[Optional[uuid.UUID]] = mapped_column(UUID(as_uuid=True), ForeignKey("users.id"), nullable=True) + uploaded_by_name: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) + parsed_markdown: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + parse_status: Mapped[str] = mapped_column(String(50), default="pending", nullable=False) + parse_error: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + + # Relationships + knowledge_base: Mapped["KnowledgeBase"] = relationship("KnowledgeBase", back_populates="source_documents") + + +class SpecVersion(Base): + """A generated spec document with version history.""" + __tablename__ = "spec_versions" + + id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + knowledge_base_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), ForeignKey("knowledge_bases.id", ondelete="CASCADE"), nullable=False + ) + version_number: Mapped[int] = mapped_column(Integer, nullable=False) + content: Mapped[str] = mapped_column(Text, nullable=False) + source_document_ids: Mapped[Optional[dict]] = mapped_column(JSONB, nullable=True) + generated_by_id: Mapped[Optional[uuid.UUID]] = mapped_column(UUID(as_uuid=True), ForeignKey("users.id"), nullable=True) + generated_by_name: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) + processing_job_id: Mapped[Optional[uuid.UUID]] = mapped_column( + UUID(as_uuid=True), ForeignKey("processing_jobs.id"), nullable=True + ) + is_active: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False) + char_count: Mapped[int] = mapped_column(Integer, nullable=False) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + + # Relationships + knowledge_base: Mapped["KnowledgeBase"] = relationship("KnowledgeBase", back_populates="spec_versions") + processing_job: Mapped[Optional["ProcessingJob"]] = relationship("ProcessingJob", back_populates="spec_version") + + __table_args__ = ( + UniqueConstraint("knowledge_base_id", "version_number", name="uq_kb_version_number"), + ) + + +class ProcessingJob(Base): + """Tracks a document processing pipeline run.""" + __tablename__ = "processing_jobs" + + id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + knowledge_base_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), ForeignKey("knowledge_bases.id", ondelete="CASCADE"), nullable=False + ) + status: Mapped[str] = mapped_column(String(50), default="pending", nullable=False) + triggered_by_id: Mapped[Optional[uuid.UUID]] = mapped_column(UUID(as_uuid=True), ForeignKey("users.id"), nullable=True) + triggered_by_name: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) + total_documents: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + parsed_documents: Mapped[int] = mapped_column(Integer, default=0, nullable=False) + spec_version_id: Mapped[Optional[uuid.UUID]] = mapped_column(UUID(as_uuid=True), ForeignKey("spec_versions.id"), nullable=True) + error_message: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + log: Mapped[Optional[dict]] = mapped_column(JSONB, nullable=True) + started_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True) + completed_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + + # Relationships + knowledge_base: Mapped["KnowledgeBase"] = relationship("KnowledgeBase", back_populates="processing_jobs") + spec_version: Mapped[Optional["SpecVersion"]] = relationship("SpecVersion", back_populates="processing_job") diff --git a/backend/app/repositories/__init__.py b/backend/app/repositories/__init__.py index dbb9ef9..d390bb2 100755 --- a/backend/app/repositories/__init__.py +++ b/backend/app/repositories/__init__.py @@ -3,6 +3,7 @@ from app.repositories.proof_repository import ProofRepository from app.repositories.user_repository import UserRepository from app.repositories.audit_repository import AuditRepository from app.repositories.dropdown_repository import DropdownRepository +from app.repositories.knowledge_base_repository import KnowledgeBaseRepository __all__ = [ "CampaignRepository", @@ -10,4 +11,5 @@ __all__ = [ "UserRepository", "AuditRepository", "DropdownRepository", + "KnowledgeBaseRepository", ] diff --git a/backend/app/repositories/knowledge_base_repository.py b/backend/app/repositories/knowledge_base_repository.py new file mode 100644 index 0000000..bfcf1a3 --- /dev/null +++ b/backend/app/repositories/knowledge_base_repository.py @@ -0,0 +1,299 @@ +import uuid +from datetime import datetime, timezone +from typing import Optional + +from sqlalchemy import select, func +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import selectinload + +from app.models.models import KnowledgeBase, SourceDocument, SpecVersion, ProcessingJob + + +class KnowledgeBaseRepository: + """Repository for knowledge base database operations.""" + + def __init__(self, session: AsyncSession): + self.session = session + + # ---- Knowledge Bases ---- + + async def list_knowledge_bases(self) -> list[KnowledgeBase]: + """List all knowledge bases with eager-loaded relationships.""" + query = ( + select(KnowledgeBase) + .options( + selectinload(KnowledgeBase.source_documents), + selectinload(KnowledgeBase.spec_versions), + selectinload(KnowledgeBase.processing_jobs), + ) + .order_by(KnowledgeBase.display_name) + ) + result = await self.session.execute(query) + return list(result.scalars().all()) + + async def get_knowledge_base(self, kb_id: uuid.UUID) -> Optional[KnowledgeBase]: + """Get a knowledge base by ID with all relationships.""" + query = ( + select(KnowledgeBase) + .options( + selectinload(KnowledgeBase.source_documents), + selectinload(KnowledgeBase.spec_versions), + selectinload(KnowledgeBase.processing_jobs), + ) + .where(KnowledgeBase.id == kb_id) + ) + result = await self.session.execute(query) + return result.scalar_one_or_none() + + async def get_knowledge_base_by_key(self, agent_key: str) -> Optional[KnowledgeBase]: + """Get a knowledge base by agent_key.""" + query = select(KnowledgeBase).where(KnowledgeBase.agent_key == agent_key) + result = await self.session.execute(query) + return result.scalar_one_or_none() + + # ---- Source Documents ---- + + async def add_source_document( + self, + knowledge_base_id: uuid.UUID, + filename: str, + file_storage_key: str, + file_size_bytes: int, + mime_type: str, + uploaded_by_id: Optional[uuid.UUID] = None, + uploaded_by_name: Optional[str] = None, + ) -> SourceDocument: + """Add a source document to a knowledge base.""" + doc = SourceDocument( + knowledge_base_id=knowledge_base_id, + filename=filename, + file_storage_key=file_storage_key, + file_size_bytes=file_size_bytes, + mime_type=mime_type, + uploaded_by_id=uploaded_by_id, + uploaded_by_name=uploaded_by_name, + ) + self.session.add(doc) + await self.session.flush() + return doc + + async def remove_source_document(self, doc_id: uuid.UUID) -> Optional[SourceDocument]: + """Remove a source document by ID. Returns the deleted doc or None.""" + query = select(SourceDocument).where(SourceDocument.id == doc_id) + result = await self.session.execute(query) + doc = result.scalar_one_or_none() + if doc: + await self.session.delete(doc) + await self.session.flush() + return doc + + async def get_source_documents(self, kb_id: uuid.UUID) -> list[SourceDocument]: + """Get all source documents for a knowledge base.""" + query = ( + select(SourceDocument) + .where(SourceDocument.knowledge_base_id == kb_id) + .order_by(SourceDocument.created_at.desc()) + ) + result = await self.session.execute(query) + return list(result.scalars().all()) + + async def get_source_document(self, doc_id: uuid.UUID) -> Optional[SourceDocument]: + """Get a single source document by ID.""" + query = select(SourceDocument).where(SourceDocument.id == doc_id) + result = await self.session.execute(query) + return result.scalar_one_or_none() + + async def update_source_document_parse_status( + self, + doc_id: uuid.UUID, + status: str, + parsed_markdown: Optional[str] = None, + parse_error: Optional[str] = None, + ) -> None: + """Update parse status of a source document.""" + query = select(SourceDocument).where(SourceDocument.id == doc_id) + result = await self.session.execute(query) + doc = result.scalar_one_or_none() + if doc: + doc.parse_status = status + if parsed_markdown is not None: + doc.parsed_markdown = parsed_markdown + if parse_error is not None: + doc.parse_error = parse_error + await self.session.flush() + + # ---- Spec Versions ---- + + async def get_active_spec_by_key(self, agent_key: str) -> Optional[SpecVersion]: + """Get the active spec version for a given agent key.""" + query = ( + select(SpecVersion) + .join(KnowledgeBase) + .where(KnowledgeBase.agent_key == agent_key) + .where(SpecVersion.is_active == True) + ) + result = await self.session.execute(query) + return result.scalar_one_or_none() + + async def list_spec_versions(self, kb_id: uuid.UUID) -> list[SpecVersion]: + """List all spec versions for a knowledge base.""" + query = ( + select(SpecVersion) + .where(SpecVersion.knowledge_base_id == kb_id) + .order_by(SpecVersion.version_number.desc()) + ) + result = await self.session.execute(query) + return list(result.scalars().all()) + + async def get_spec_version(self, version_id: uuid.UUID) -> Optional[SpecVersion]: + """Get a spec version by ID.""" + query = select(SpecVersion).where(SpecVersion.id == version_id) + result = await self.session.execute(query) + return result.scalar_one_or_none() + + async def create_spec_version( + self, + knowledge_base_id: uuid.UUID, + content: str, + source_document_ids: Optional[list] = None, + generated_by_id: Optional[uuid.UUID] = None, + generated_by_name: Optional[str] = None, + processing_job_id: Optional[uuid.UUID] = None, + ) -> SpecVersion: + """Create a new spec version, auto-incrementing version number and deactivating prior.""" + # Get next version number + max_query = ( + select(func.coalesce(func.max(SpecVersion.version_number), 0)) + .where(SpecVersion.knowledge_base_id == knowledge_base_id) + ) + result = await self.session.execute(max_query) + next_version = result.scalar() + 1 + + # Deactivate all existing versions for this KB + deactivate_query = ( + select(SpecVersion) + .where(SpecVersion.knowledge_base_id == knowledge_base_id) + .where(SpecVersion.is_active == True) + ) + deactivate_result = await self.session.execute(deactivate_query) + for sv in deactivate_result.scalars().all(): + sv.is_active = False + + # Create the new version + spec = SpecVersion( + knowledge_base_id=knowledge_base_id, + version_number=next_version, + content=content, + source_document_ids=source_document_ids, + generated_by_id=generated_by_id, + generated_by_name=generated_by_name, + processing_job_id=processing_job_id, + is_active=True, + char_count=len(content), + ) + self.session.add(spec) + await self.session.flush() + return spec + + async def activate_spec_version(self, version_id: uuid.UUID) -> Optional[SpecVersion]: + """Activate a specific version (revert), deactivating all others for same KB.""" + query = select(SpecVersion).where(SpecVersion.id == version_id) + result = await self.session.execute(query) + target = result.scalar_one_or_none() + if not target: + return None + + # Deactivate all versions for this KB + all_query = ( + select(SpecVersion) + .where(SpecVersion.knowledge_base_id == target.knowledge_base_id) + .where(SpecVersion.is_active == True) + ) + all_result = await self.session.execute(all_query) + for sv in all_result.scalars().all(): + sv.is_active = False + + # Activate the target + target.is_active = True + await self.session.flush() + return target + + # ---- Processing Jobs ---- + + async def create_processing_job( + self, + knowledge_base_id: uuid.UUID, + total_documents: int, + triggered_by_id: Optional[uuid.UUID] = None, + triggered_by_name: Optional[str] = None, + ) -> ProcessingJob: + """Create a new processing job.""" + job = ProcessingJob( + knowledge_base_id=knowledge_base_id, + total_documents=total_documents, + triggered_by_id=triggered_by_id, + triggered_by_name=triggered_by_name, + started_at=datetime.now(timezone.utc), + ) + self.session.add(job) + await self.session.flush() + return job + + async def update_processing_job( + self, + job_id: uuid.UUID, + status: Optional[str] = None, + parsed_documents: Optional[int] = None, + spec_version_id: Optional[uuid.UUID] = None, + error_message: Optional[str] = None, + completed_at: Optional[datetime] = None, + ) -> Optional[ProcessingJob]: + """Update a processing job's fields.""" + query = select(ProcessingJob).where(ProcessingJob.id == job_id) + result = await self.session.execute(query) + job = result.scalar_one_or_none() + if not job: + return None + + if status is not None: + job.status = status + if parsed_documents is not None: + job.parsed_documents = parsed_documents + if spec_version_id is not None: + job.spec_version_id = spec_version_id + if error_message is not None: + job.error_message = error_message + if completed_at is not None: + job.completed_at = completed_at + + await self.session.flush() + return job + + async def get_processing_job(self, job_id: uuid.UUID) -> Optional[ProcessingJob]: + """Get a processing job by ID.""" + query = select(ProcessingJob).where(ProcessingJob.id == job_id) + result = await self.session.execute(query) + return result.scalar_one_or_none() + + async def get_latest_processing_job(self, kb_id: uuid.UUID) -> Optional[ProcessingJob]: + """Get the most recent processing job for a knowledge base.""" + query = ( + select(ProcessingJob) + .where(ProcessingJob.knowledge_base_id == kb_id) + .order_by(ProcessingJob.created_at.desc()) + .limit(1) + ) + result = await self.session.execute(query) + return result.scalar_one_or_none() + + async def has_active_job(self, kb_id: uuid.UUID) -> bool: + """Check if there's an active (non-terminal) processing job for this KB.""" + active_statuses = ["pending", "parsing_documents", "distilling"] + query = ( + select(func.count()) + .select_from(ProcessingJob) + .where(ProcessingJob.knowledge_base_id == kb_id) + .where(ProcessingJob.status.in_(active_statuses)) + ) + result = await self.session.execute(query) + return result.scalar() > 0 diff --git a/backend/app/services/knowledge_base_service.py b/backend/app/services/knowledge_base_service.py new file mode 100644 index 0000000..f48ad71 --- /dev/null +++ b/backend/app/services/knowledge_base_service.py @@ -0,0 +1,218 @@ +import logging +import uuid +from datetime import datetime, timezone +from typing import Optional + +from app.models.database import async_session_factory +from app.repositories.knowledge_base_repository import KnowledgeBaseRepository +from app.services.storage_service import storage_service + +logger = logging.getLogger(__name__) + +# Distillation prompt templates per agent type +DISTILLATION_PROMPTS = { + "legal": """You are a compliance documentation specialist. Below is raw reference material about legal compliance, advertising standards, financial promotions, and disclaimers relevant to Barclays marketing materials. + +Your task is to distil this into a clear, structured specification document that an AI compliance agent can use to review marketing proofs. Organise the content into logical sections with clear headings. Focus on actionable rules, required disclaimers, prohibited content, and compliance requirements. + +Remove any redundancy, marketing fluff, or content not relevant to compliance checking. Preserve all specific rules, thresholds, and requirements verbatim. + +RAW REFERENCE MATERIAL: +{combined_markdown} + +OUTPUT: A well-structured markdown specification document.""", + + "brand_barclays": """You are a brand guidelines specialist. Below is raw reference material about Barclays brand guidelines including logo usage, colour palettes, typography, design principles, and visual identity standards. + +Your task is to distil this into a clear, structured specification document that an AI brand compliance agent can use to review marketing proofs. Organise the content into logical sections: logo rules, colour specifications (with exact hex/RGB values), typography rules, spacing/layout requirements, do's and don'ts. + +Remove any redundancy or content not directly relevant to visual brand compliance checking. Preserve all specific measurements, colour values, and rules verbatim. + +RAW REFERENCE MATERIAL: +{combined_markdown} + +OUTPUT: A well-structured markdown specification document.""", + + "brand_barclaycard": """You are a brand guidelines specialist. Below is raw reference material about Barclaycard brand guidelines including logo usage, colour palettes, typography, design principles, and visual identity standards. + +Your task is to distil this into a clear, structured specification document that an AI brand compliance agent can use to review marketing proofs. Organise the content into logical sections: logo rules, colour specifications (with exact hex/RGB values), typography rules, spacing/layout requirements, do's and don'ts. + +Remove any redundancy or content not directly relevant to visual brand compliance checking. Preserve all specific measurements, colour values, and rules verbatim. + +RAW REFERENCE MATERIAL: +{combined_markdown} + +OUTPUT: A well-structured markdown specification document.""", + + "channel_best_practices": """You are a marketing channel specialist. Below is raw reference material about best practices for various marketing channels (social media, display, email, print, OOH) relevant to Barclays marketing. + +Your task is to distil this into a clear, structured specification document that an AI channel compliance agent can use to review marketing proofs. Organise by channel type, then by platform/format. Focus on content guidelines, accessibility requirements, and engagement best practices. + +Remove any redundancy or content not directly relevant to proof review. Preserve all specific recommendations and requirements. + +RAW REFERENCE MATERIAL: +{combined_markdown} + +OUTPUT: A well-structured markdown specification document.""", + + "channel_tech_specs": """You are a marketing production specialist. Below is raw reference material about technical specifications for various marketing channels (dimensions, file formats, file sizes, resolution requirements, platform constraints). + +Your task is to distil this into a clear, structured specification document that an AI technical compliance agent can use to review marketing proofs. Organise by channel, then platform, then format. Use tables where appropriate for dimensions and specs. + +Remove any redundancy or content not directly relevant to technical spec checking. Preserve all specific dimensions, file size limits, format requirements, and platform constraints verbatim. + +RAW REFERENCE MATERIAL: +{combined_markdown} + +OUTPUT: A well-structured markdown specification document.""", +} + + +class KnowledgeBaseService: + """Orchestrates the document processing pipeline.""" + + def __init__(self, llamaparse_service, gemini_service, reference_docs_service): + self.llamaparse = llamaparse_service + self.gemini = gemini_service + self.reference_docs = reference_docs_service + + async def process_documents( + self, + kb_id: uuid.UUID, + job_id: uuid.UUID, + agent_key: str, + user_id: Optional[uuid.UUID] = None, + user_name: Optional[str] = None, + ) -> None: + """ + Run the full processing pipeline as a background task. + + 1. Parse each source document with LlamaParse + 2. Combine parsed markdown + 3. Distil with Gemini into a spec + 4. Save as new SpecVersion + 5. Invalidate ReferenceDocsService cache + """ + async with async_session_factory() as session: + try: + repo = KnowledgeBaseRepository(session) + + # Update job status to parsing + await repo.update_processing_job(job_id, status="parsing_documents") + await session.commit() + + # Get all source documents for this KB + docs = await repo.get_source_documents(kb_id) + if not docs: + await repo.update_processing_job( + job_id, status="failed", + error_message="No source documents found.", + completed_at=datetime.now(timezone.utc), + ) + await session.commit() + return + + # Parse each document + parsed_count = 0 + combined_parts = [] + source_doc_ids = [] + + for doc in docs: + try: + # Load file data from storage + file_data = await storage_service.get_file(doc.file_storage_key) + if not file_data: + logger.error(f"[KB_SERVICE] File not found: {doc.file_storage_key}") + await repo.update_source_document_parse_status( + doc.id, "error", parse_error="File not found in storage." + ) + await session.commit() + continue + + # Parse with LlamaParse + await repo.update_source_document_parse_status(doc.id, "parsing") + await session.commit() + + markdown = await self.llamaparse.parse_document(file_data, doc.filename) + + # Update parse status + await repo.update_source_document_parse_status( + doc.id, "parsed", parsed_markdown=markdown + ) + parsed_count += 1 + await repo.update_processing_job(job_id, parsed_documents=parsed_count) + await session.commit() + + if markdown.strip(): + combined_parts.append(f"# {doc.filename}\n\n{markdown}") + source_doc_ids.append(str(doc.id)) + + except Exception as e: + logger.error(f"[KB_SERVICE] Error parsing {doc.filename}: {e}") + await repo.update_source_document_parse_status( + doc.id, "error", parse_error=str(e) + ) + parsed_count += 1 + await repo.update_processing_job(job_id, parsed_documents=parsed_count) + await session.commit() + + if not combined_parts: + await repo.update_processing_job( + job_id, status="failed", + error_message="No documents were successfully parsed.", + completed_at=datetime.now(timezone.utc), + ) + await session.commit() + return + + # Distil with Gemini + await repo.update_processing_job(job_id, status="distilling") + await session.commit() + + combined_markdown = "\n\n---\n\n".join(combined_parts) + prompt_template = DISTILLATION_PROMPTS.get(agent_key, DISTILLATION_PROMPTS["legal"]) + prompt = prompt_template.format(combined_markdown=combined_markdown) + + logger.info(f"[KB_SERVICE] Sending {len(combined_markdown)} chars to Gemini for distillation") + response = await self.gemini.client.aio.models.generate_content( + model=self.gemini.model, + contents=prompt, + ) + spec_content = response.text.strip() + logger.info(f"[KB_SERVICE] Distillation complete: {len(spec_content)} chars") + + # Save as new spec version + spec = await repo.create_spec_version( + knowledge_base_id=kb_id, + content=spec_content, + source_document_ids=source_doc_ids, + generated_by_id=user_id, + generated_by_name=user_name, + processing_job_id=job_id, + ) + await session.commit() + + # Update job as completed + await repo.update_processing_job( + job_id, + status="completed", + spec_version_id=spec.id, + completed_at=datetime.now(timezone.utc), + ) + await session.commit() + + # Invalidate reference docs cache + self.reference_docs.invalidate_cache(agent_key) + logger.info(f"[KB_SERVICE] Pipeline complete for {agent_key}, spec version {spec.version_number}") + + except Exception as e: + logger.error(f"[KB_SERVICE] Pipeline failed for job {job_id}: {e}") + try: + await repo.update_processing_job( + job_id, status="failed", + error_message=str(e), + completed_at=datetime.now(timezone.utc), + ) + await session.commit() + except Exception: + logger.error("[KB_SERVICE] Failed to update job status after error") diff --git a/backend/app/services/llamaparse_service.py b/backend/app/services/llamaparse_service.py new file mode 100644 index 0000000..9828d47 --- /dev/null +++ b/backend/app/services/llamaparse_service.py @@ -0,0 +1,58 @@ +import logging +import tempfile +from pathlib import Path + +logger = logging.getLogger(__name__) + + +class LlamaParseService: + """Service for parsing documents using LlamaParse.""" + + def __init__(self, api_key: str): + self.api_key = api_key + + async def parse_document(self, file_data: bytes, filename: str) -> str: + """ + Parse a document and return its content as markdown. + + Args: + file_data: Raw bytes of the document + filename: Original filename (used for format detection) + + Returns: + Parsed markdown text + """ + from llama_cloud_services import LlamaParse + + logger.info(f"[LLAMAPARSE] Starting parse for '{filename}' ({len(file_data)} bytes)") + + parser = LlamaParse( + api_key=self.api_key, + num_workers=1, + verbose=True, + language="en", + ) + + # Write bytes to a temp file since LlamaParse needs a file path + suffix = Path(filename).suffix or ".pdf" + with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp: + tmp.write(file_data) + tmp_path = tmp.name + + try: + documents = await parser.aload_data(tmp_path) + if not documents: + logger.warning(f"[LLAMAPARSE] No documents returned for '{filename}'") + return "" + + # Combine all document pages into a single markdown string + combined = "\n\n".join(doc.text for doc in documents if doc.text) + logger.info(f"[LLAMAPARSE] Parsed '{filename}' -> {len(combined)} chars from {len(documents)} pages") + return combined + + finally: + # Clean up temp file + try: + Path(tmp_path).unlink() + except OSError: + pass diff --git a/backend/app/services/reference_docs.py b/backend/app/services/reference_docs.py index ceeb92c..b565ed8 100755 --- a/backend/app/services/reference_docs.py +++ b/backend/app/services/reference_docs.py @@ -29,6 +29,47 @@ class ReferenceDocsService: self._channel_tech_specs_spec: str | None = None self._legal_spec: str | None = None + # DB-backed spec cache (takes priority over file-based) + self._db_specs: dict[str, str] = {} + + async def load_specs_from_db(self, session) -> None: + """Load active spec content from DB for all agent keys.""" + from app.repositories.knowledge_base_repository import KnowledgeBaseRepository + + repo = KnowledgeBaseRepository(session) + agent_keys = [ + "legal", "brand_barclays", "brand_barclaycard", + "channel_best_practices", "channel_tech_specs", + ] + for key in agent_keys: + spec = await repo.get_active_spec_by_key(key) + if spec and spec.content: + self._db_specs[key] = spec.content + print(f" Loaded DB spec for {key}: {len(spec.content)} chars (v{spec.version_number})") + + def invalidate_cache(self, agent_key: str | None = None) -> None: + """Clear cached specs. If agent_key is None, clear all.""" + if agent_key is None: + self._db_specs.clear() + self._barclaycard_brand_spec = None + self._barclays_brand_spec = None + self._channel_best_practices_spec = None + self._channel_tech_specs_spec = None + self._legal_spec = None + else: + self._db_specs.pop(agent_key, None) + # Also clear the file-based cache so next call re-checks DB + cache_map = { + "legal": "_legal_spec", + "brand_barclays": "_barclays_brand_spec", + "brand_barclaycard": "_barclaycard_brand_spec", + "channel_best_practices": "_channel_best_practices_spec", + "channel_tech_specs": "_channel_tech_specs_spec", + } + attr = cache_map.get(agent_key) + if attr: + setattr(self, attr, None) + def get_brand_context(self) -> str: """Load and return all brand guideline documents as a single context string.""" if self._brand_context is None: @@ -37,7 +78,11 @@ class ReferenceDocsService: return self._brand_context def get_barclaycard_brand_spec(self) -> str: - """Load and return the Barclaycard brand specification from prompts directory.""" + """Load and return the Barclaycard brand specification.""" + # Check DB cache first + if "brand_barclaycard" in self._db_specs: + return self._db_specs["brand_barclaycard"] + if self._barclaycard_brand_spec is None: spec_path = self.prompts_path / "brand_barclaycard.md" try: @@ -53,7 +98,11 @@ class ReferenceDocsService: return self._barclaycard_brand_spec def get_barclays_brand_spec(self) -> str: - """Load and return the Barclays brand specification from prompts directory.""" + """Load and return the Barclays brand specification.""" + # Check DB cache first + if "brand_barclays" in self._db_specs: + return self._db_specs["brand_barclays"] + # Check cache first if not hasattr(self, '_barclays_brand_spec'): self._barclays_brand_spec = None @@ -80,7 +129,11 @@ class ReferenceDocsService: return self._channel_context def get_channel_best_practices_spec(self) -> str: - """Load and return the Channel Best Practices specification from prompts directory.""" + """Load and return the Channel Best Practices specification.""" + # Check DB cache first + if "channel_best_practices" in self._db_specs: + return self._db_specs["channel_best_practices"] + if self._channel_best_practices_spec is None: spec_path = self.prompts_path / "channel_best_practices.md" try: @@ -95,7 +148,11 @@ class ReferenceDocsService: return self._channel_best_practices_spec def get_channel_tech_specs_spec(self) -> str: - """Load and return the Channel Tech Specs specification from prompts directory.""" + """Load and return the Channel Tech Specs specification.""" + # Check DB cache first + if "channel_tech_specs" in self._db_specs: + return self._db_specs["channel_tech_specs"] + if self._channel_tech_specs_spec is None: spec_path = self.prompts_path / "channel_tech_specs.md" try: @@ -110,7 +167,11 @@ class ReferenceDocsService: return self._channel_tech_specs_spec def get_legal_spec(self) -> str: - """Load and return the Legal specification from prompts directory.""" + """Load and return the Legal specification.""" + # Check DB cache first + if "legal" in self._db_specs: + return self._db_specs["legal"] + if self._legal_spec is None: spec_path = self.prompts_path / "legal.md" try: diff --git a/backend/app/services/storage_service.py b/backend/app/services/storage_service.py index 8691270..ccd9bc9 100755 --- a/backend/app/services/storage_service.py +++ b/backend/app/services/storage_service.py @@ -110,6 +110,27 @@ class StorageService: """Calculate MD5 checksum of file data.""" return hashlib.md5(file_data).hexdigest() + async def store_kb_document( + self, + file_data: bytes, + kb_id: uuid.UUID, + doc_id: uuid.UUID, + filename: str, + mime_type: str, + ) -> str: + """Store a knowledge base source document and return the storage key.""" + # Sanitize filename + safe_name = "".join(c if c.isalnum() or c in "-_." else "_" for c in filename) + storage_key = f"kb/{kb_id}/{doc_id}_{safe_name}" + + file_path = self._get_file_path(storage_key) + file_path.parent.mkdir(parents=True, exist_ok=True) + + async with aiofiles.open(file_path, "wb") as f: + await f.write(file_data) + + return storage_key + # Singleton instance storage_service = StorageService() diff --git a/backend/requirements.txt b/backend/requirements.txt index 2d0e86e..92bc384 100755 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -12,3 +12,4 @@ sqlalchemy[asyncio]>=2.0.0 asyncpg>=0.29.0 alembic>=1.13.0 PyMuPDF>=1.23.0 +llama-cloud-services>=0.6.0 diff --git a/frontend/App.tsx b/frontend/App.tsx index bbf11bc..c12676d 100755 --- a/frontend/App.tsx +++ b/frontend/App.tsx @@ -19,8 +19,9 @@ import { Campaigns } from './components/Campaigns'; import { Auditing } from './components/Auditing'; import { Login } from './components/Login'; import { WIPReviewer } from './components/WIPReviewer'; +import { KnowledgeBase } from './components/KnowledgeBase'; -type View = 'Home' | 'Analytics' | 'Campaigns' | 'WIP Reviewer' | 'CopyGenAI' | 'Settings' | 'Profile' | 'Auditing'; +type View = 'Home' | 'Analytics' | 'Campaigns' | 'WIP Reviewer' | 'CopyGenAI' | 'Settings' | 'Profile' | 'Auditing' | 'Knowledge Base'; export interface DropdownOptions { campaigns: string[]; @@ -828,8 +829,10 @@ const App: React.FC = () => { errorItems={errorItems} onNavigate={handleNavigateToAuditedItem} />; + case 'Knowledge Base': + return ; case 'Settings': - return { onNavigate={(view) => handleNavigate(view as View)} userName={userInfo?.name} userEmail={userInfo?.email} + isAdmin={true} />
diff --git a/frontend/components/KnowledgeBase.tsx b/frontend/components/KnowledgeBase.tsx new file mode 100644 index 0000000..d44b7f0 --- /dev/null +++ b/frontend/components/KnowledgeBase.tsx @@ -0,0 +1,643 @@ +import React, { useState, useEffect, useCallback, useRef } from 'react'; +import apiService from '../services/apiService'; +import type { + KnowledgeBaseListItem, KnowledgeBaseDetail, SourceDocument, + ProcessingJob, SpecVersionListItem, SpecVersionDetail, DiffResult, +} from '../types'; +import { ArrowLeftIcon } from './icons/ArrowLeftIcon'; +import { TrashIcon } from './icons/TrashIcon'; +import { UploadIcon } from './icons/UploadIcon'; +import { SpinnerIcon } from './icons/SpinnerIcon'; + +// --- Helper Components --- + +const StatusBadge: React.FC<{ status: string }> = ({ status }) => { + const colors: Record = { + pending: 'bg-yellow-100 text-yellow-800', + parsing: 'bg-blue-100 text-blue-800', + parsed: 'bg-green-100 text-green-800', + error: 'bg-red-100 text-red-800', + parsing_documents: 'bg-blue-100 text-blue-800', + distilling: 'bg-purple-100 text-purple-800', + completed: 'bg-green-100 text-green-800', + failed: 'bg-red-100 text-red-800', + }; + return ( + + {status} + + ); +}; + +function formatBytes(bytes: number): string { + if (bytes < 1024) return `${bytes} B`; + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; + return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; +} + +function formatDate(dateStr: string | null): string { + if (!dateStr) return '-'; + return new Date(dateStr).toLocaleDateString('en-GB', { + day: '2-digit', month: 'short', year: 'numeric', hour: '2-digit', minute: '2-digit', + }); +} + +// --- Main Component --- + +export const KnowledgeBase: React.FC = () => { + // Level 1: List view + const [knowledgeBases, setKnowledgeBases] = useState([]); + const [loading, setLoading] = useState(true); + + // Level 2: Detail view + const [selectedKb, setSelectedKb] = useState(null); + const [activeTab, setActiveTab] = useState<'documents' | 'versions'>('documents'); + const [versions, setVersions] = useState([]); + const [uploading, setUploading] = useState(false); + + // Processing job polling + const [activeJob, setActiveJob] = useState(null); + const pollRef = useRef | null>(null); + + // Level 3: Diff view + const [diffResult, setDiffResult] = useState(null); + const [selectedForDiff, setSelectedForDiff] = useState([]); + + // Spec content modal + const [viewingSpec, setViewingSpec] = useState(null); + + // --- Data Loading --- + + const loadKnowledgeBases = useCallback(async () => { + try { + setLoading(true); + const data = await apiService.getKnowledgeBases(); + setKnowledgeBases(data); + } catch (err) { + console.error('Failed to load knowledge bases:', err); + } finally { + setLoading(false); + } + }, []); + + useEffect(() => { + loadKnowledgeBases(); + }, [loadKnowledgeBases]); + + const loadKbDetail = useCallback(async (kbId: string) => { + try { + const [detail, vers] = await Promise.all([ + apiService.getKnowledgeBase(kbId), + apiService.getSpecVersions(kbId), + ]); + setSelectedKb(detail); + setVersions(vers); + + // Check for active job + if (detail.latest_job && ['pending', 'parsing_documents', 'distilling'].includes(detail.latest_job.status)) { + setActiveJob(detail.latest_job); + } else { + setActiveJob(null); + } + } catch (err) { + console.error('Failed to load KB detail:', err); + } + }, []); + + // Poll active job + useEffect(() => { + if (activeJob && selectedKb) { + pollRef.current = setInterval(async () => { + try { + const job = await apiService.getProcessingJob(selectedKb.id, activeJob.id); + setActiveJob(job); + if (['completed', 'failed'].includes(job.status)) { + if (pollRef.current) clearInterval(pollRef.current); + pollRef.current = null; + // Refresh data + loadKbDetail(selectedKb.id); + loadKnowledgeBases(); + } + } catch (err) { + console.error('Failed to poll job:', err); + } + }, 3000); + } + return () => { + if (pollRef.current) { + clearInterval(pollRef.current); + pollRef.current = null; + } + }; + }, [activeJob, selectedKb, loadKbDetail, loadKnowledgeBases]); + + // --- Handlers --- + + const handleSelectKb = (kb: KnowledgeBaseListItem) => { + loadKbDetail(kb.id); + setActiveTab('documents'); + setDiffResult(null); + setSelectedForDiff([]); + }; + + const handleBack = () => { + setSelectedKb(null); + setDiffResult(null); + setSelectedForDiff([]); + setViewingSpec(null); + loadKnowledgeBases(); + }; + + const handleBackFromDiff = () => { + setDiffResult(null); + setSelectedForDiff([]); + }; + + const handleFileUpload = async (files: FileList | null) => { + if (!files || !selectedKb) return; + setUploading(true); + try { + for (let i = 0; i < files.length; i++) { + await apiService.uploadSourceDocument(selectedKb.id, files[i]); + } + await loadKbDetail(selectedKb.id); + loadKnowledgeBases(); + } catch (err) { + console.error('Failed to upload file:', err); + } finally { + setUploading(false); + } + }; + + const handleRemoveDoc = async (docId: string) => { + if (!selectedKb) return; + try { + await apiService.removeSourceDocument(selectedKb.id, docId); + await loadKbDetail(selectedKb.id); + loadKnowledgeBases(); + } catch (err) { + console.error('Failed to remove document:', err); + } + }; + + const handleProcess = async () => { + if (!selectedKb) return; + try { + const job = await apiService.triggerProcessing(selectedKb.id); + setActiveJob(job); + } catch (err: any) { + console.error('Failed to trigger processing:', err); + alert(err.message || 'Failed to trigger processing.'); + } + }; + + const handleViewSpec = async (versionId: string) => { + if (!selectedKb) return; + try { + const spec = await apiService.getSpecVersion(selectedKb.id, versionId); + setViewingSpec(spec); + } catch (err) { + console.error('Failed to load spec:', err); + } + }; + + const handleDiffToggle = (versionId: string) => { + setSelectedForDiff(prev => { + if (prev.includes(versionId)) { + return prev.filter(id => id !== versionId); + } + if (prev.length >= 2) { + return [prev[1], versionId]; + } + return [...prev, versionId]; + }); + }; + + const handleCompare = async () => { + if (selectedForDiff.length !== 2 || !selectedKb) return; + try { + const diff = await apiService.getSpecDiff(selectedKb.id, selectedForDiff[0], selectedForDiff[1]); + setDiffResult(diff); + } catch (err) { + console.error('Failed to compute diff:', err); + } + }; + + const handleActivateVersion = async (versionId: string) => { + if (!selectedKb) return; + if (!confirm('Are you sure you want to activate this version? This will change the spec used by the AI agent.')) return; + try { + await apiService.activateSpecVersion(selectedKb.id, versionId); + await loadKbDetail(selectedKb.id); + } catch (err) { + console.error('Failed to activate version:', err); + } + }; + + // --- Drag & Drop --- + + const [isDragging, setIsDragging] = useState(false); + + const handleDragOver = (e: React.DragEvent) => { + e.preventDefault(); + setIsDragging(true); + }; + + const handleDragLeave = () => setIsDragging(false); + + const handleDrop = (e: React.DragEvent) => { + e.preventDefault(); + setIsDragging(false); + handleFileUpload(e.dataTransfer.files); + }; + + // ===================== RENDER ===================== + + // Level 3: Diff view + if (diffResult && selectedKb) { + return ( +
+ + +
+

{selectedKb.display_name} - Diff

+

+ Version {diffResult.version_a} vs Version {diffResult.version_b} + +{diffResult.additions} + -{diffResult.deletions} +

+
+ +
+
+ {diffResult.lines.map((line, i) => ( +
+ + {line.line_number_old || ''} + + + {line.line_number_new || ''} + + + {line.type === 'add' ? '+' : line.type === 'remove' ? '-' : ' '} + +
{line.content}
+
+ ))} +
+
+
+ ); + } + + // Spec content modal + if (viewingSpec) { + return ( +
+ + +
+

+ {selectedKb?.display_name} - Version {viewingSpec.version_number} +

+

+ {formatDate(viewingSpec.created_at)} | {viewingSpec.char_count.toLocaleString()} chars + {viewingSpec.generated_by_name && | Generated by {viewingSpec.generated_by_name}} + {viewingSpec.is_active && Active} +

+
+ +
+
{viewingSpec.content}
+
+
+ ); + } + + // Level 2: Agent detail + if (selectedKb) { + const isJobRunning = activeJob && ['pending', 'parsing_documents', 'distilling'].includes(activeJob.status); + + return ( +
+ + +
+

{selectedKb.display_name}

+ {selectedKb.description &&

{selectedKb.description}

} +
+ + {/* Tabs */} +
+ {(['documents', 'versions'] as const).map(tab => ( + + ))} +
+ + {activeTab === 'documents' ? ( +
+ {/* Upload area */} +
+ +

Drag & drop files here, or

+ +

PDF, DOCX, PPTX, XLSX, HTML, TXT, MD, PNG, JPG, WebP

+
+ + {/* Process button + job status */} +
+ + {selectedKb.active_spec_version && ( + + Active spec: v{selectedKb.active_spec_version} ({selectedKb.active_spec_char_count?.toLocaleString()} chars) + + )} +
+ + {/* Processing progress */} + {isJobRunning && activeJob && ( +
+
+ + + {activeJob.status === 'parsing_documents' ? 'Parsing documents...' : + activeJob.status === 'distilling' ? 'Distilling spec with AI...' : + 'Starting...'} + +
+ {activeJob.status === 'parsing_documents' && ( +
+
0 ? (activeJob.parsed_documents / activeJob.total_documents) * 100 : 0}%` }} + /> +
+ )} +

+ {activeJob.parsed_documents} / {activeJob.total_documents} documents parsed +

+
+ )} + + {/* Last job result */} + {selectedKb.latest_job && !isJobRunning && ( +
+ Last processing: + {selectedKb.latest_job.completed_at && {formatDate(selectedKb.latest_job.completed_at)}} + {selectedKb.latest_job.error_message &&

{selectedKb.latest_job.error_message}

} +
+ )} + + {/* Documents table */} + {selectedKb.source_documents.length > 0 ? ( +
+ + + + + + + + + + + + + {selectedKb.source_documents.map(doc => ( + + + + + + + + + ))} + +
FilenameSizeUploadedUploaded ByParse Status
{doc.filename}{formatBytes(doc.file_size_bytes)}{formatDate(doc.created_at)}{doc.uploaded_by_name || '-'} + +
+
+ ) : ( +
+ No source documents uploaded yet. +
+ )} +
+ ) : ( + /* Versions tab */ +
+ {/* Compare button */} + {selectedForDiff.length === 2 && ( +
+ +
+ )} + + {versions.length > 0 ? ( +
+ + + + + + + + + + + + + + {versions.map(v => ( + + + + + + + + + + ))} + +
+ Compare + VersionDateGenerated ByCharsStatusActions
+ handleDiffToggle(v.id)} + className="h-4 w-4 rounded border-grey-300 text-active-blue focus:ring-active-blue" + /> + v{v.version_number}{formatDate(v.created_at)}{v.generated_by_name || '-'}{v.char_count.toLocaleString()} + {v.is_active ? ( + + Active + + ) : ( + Inactive + )} + +
+ + {!v.is_active && ( + + )} +
+
+
+ ) : ( +
+ No spec versions generated yet. +
+ )} +
+ )} +
+ ); + } + + // Level 1: Agent list + if (loading) { + return ( +
+ +
+ ); + } + + return ( +
+
+

Knowledge Base

+

Manage the AI agent knowledge bases. Upload source documents, process them, and version the resulting specs.

+
+ +
+ {knowledgeBases.map(kb => { + const statusColor = + kb.latest_job_status === 'failed' ? 'border-red-300 bg-red-50' : + kb.latest_job_status === 'completed' ? 'border-grey-300 bg-white' : + 'border-grey-300 bg-white'; + + return ( + + ); + })} +
+
+ ); +}; diff --git a/frontend/components/Sidebar.tsx b/frontend/components/Sidebar.tsx index 14c330e..7e4485f 100755 --- a/frontend/components/Sidebar.tsx +++ b/frontend/components/Sidebar.tsx @@ -7,6 +7,7 @@ import { SettingsIcon } from './icons/SettingsIcon'; import { UserIcon } from './icons/UserIcon'; import { CampaignsIcon } from './icons/CampaignsIcon'; import { AuditIcon } from './icons/AuditIcon'; +import { KnowledgeBaseIcon } from './icons/KnowledgeBaseIcon'; const navigation = [ { name: 'Home', icon: DashboardIcon }, @@ -15,6 +16,7 @@ const navigation = [ // { name: 'CopyGenAI', icon: CopyGenAIIcon }, // Hidden: Moved to Settings > Beta { name: 'Analytics', icon: AnalyticsIcon }, { name: 'Auditing', icon: AuditIcon }, + { name: 'Knowledge Base', icon: KnowledgeBaseIcon, adminOnly: true }, { name: 'Settings', icon: SettingsIcon }, ]; @@ -23,9 +25,10 @@ interface SidebarProps { onNavigate: (viewName: string) => void; userName?: string; userEmail?: string; + isAdmin?: boolean; } -export const Sidebar: React.FC = ({ activeItem, onNavigate, userName, userEmail }) => { +export const Sidebar: React.FC = ({ activeItem, onNavigate, userName, userEmail, isAdmin = true }) => { return (