Add Knowledge Base management system for AI agent specs

Full-stack implementation enabling UI-driven management of the 5 AI agent knowledge bases
(Legal, Brand Barclays, Brand Barclaycard, Channel Best Practices, Channel Tech Specs).

Backend:
- 4 new DB models: KnowledgeBase, SourceDocument, SpecVersion, ProcessingJob
- Migration 006: creates tables, seeds 5 KB rows, imports existing prompts/*.md as v1 specs
- KnowledgeBaseRepository with full CRUD for all 4 tables
- LlamaParseService for document parsing, KnowledgeBaseService for pipeline orchestration
- ReferenceDocsService updated with DB-backed spec loading + cache invalidation
- 11 REST endpoints under /api/knowledge-base (list, detail, upload, delete, process, job status, versions, diff, activate)
- StorageService extended with KB document storage

Frontend:
- TypeScript types for all KB entities (KnowledgeBaseListItem, SourceDocument, ProcessingJob, SpecVersion, DiffResult)
- ApiService methods for all KB endpoints including multipart file upload
- KnowledgeBase component with 3-level UI: agent grid, detail view (documents + versions tabs), diff viewer
- Drag-and-drop file upload, processing progress bar with 3s polling, version comparison
- KnowledgeBaseIcon + Sidebar nav item with adminOnly filtering

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
michael 2026-02-12 15:00:36 -06:00
parent 2b2d82ecec
commit 9e2473c3e9
20 changed files with 2372 additions and 13 deletions

View file

@ -0,0 +1,179 @@
"""Add knowledge base tables and seed initial data
Revision ID: 006_add_knowledge_base
Revises: 005_add_file_hash
Create Date: 2025-02-12
"""
from pathlib import Path
from typing import Sequence, Union
from uuid import uuid4
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision: str = '006_add_knowledge_base'
down_revision: Union[str, None] = '005_add_file_hash'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Create knowledge base tables and seed initial data."""
# ===========================================
# 1. Create knowledge_bases table
# ===========================================
op.create_table(
'knowledge_bases',
sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column('agent_key', sa.String(100), unique=True, nullable=False),
sa.Column('display_name', sa.String(255), nullable=False),
sa.Column('description', sa.Text(), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now()),
)
# ===========================================
# 2. Create source_documents table
# ===========================================
op.create_table(
'source_documents',
sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column('knowledge_base_id', postgresql.UUID(as_uuid=True),
sa.ForeignKey('knowledge_bases.id', ondelete='CASCADE'), nullable=False),
sa.Column('filename', sa.String(500), nullable=False),
sa.Column('file_storage_key', sa.String(500), nullable=False),
sa.Column('file_size_bytes', sa.Integer(), nullable=False),
sa.Column('mime_type', sa.String(255), nullable=False),
sa.Column('uploaded_by_id', postgresql.UUID(as_uuid=True),
sa.ForeignKey('users.id'), nullable=True),
sa.Column('uploaded_by_name', sa.String(255), nullable=True),
sa.Column('parsed_markdown', sa.Text(), nullable=True),
sa.Column('parse_status', sa.String(50), nullable=False, server_default='pending'),
sa.Column('parse_error', sa.Text(), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now()),
)
op.create_index('ix_source_documents_kb_id', 'source_documents', ['knowledge_base_id'])
# ===========================================
# 3. Create processing_jobs table (before spec_versions due to FK)
# ===========================================
op.create_table(
'processing_jobs',
sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column('knowledge_base_id', postgresql.UUID(as_uuid=True),
sa.ForeignKey('knowledge_bases.id', ondelete='CASCADE'), nullable=False),
sa.Column('status', sa.String(50), nullable=False, server_default='pending'),
sa.Column('triggered_by_id', postgresql.UUID(as_uuid=True),
sa.ForeignKey('users.id'), nullable=True),
sa.Column('triggered_by_name', sa.String(255), nullable=True),
sa.Column('total_documents', sa.Integer(), nullable=False, server_default='0'),
sa.Column('parsed_documents', sa.Integer(), nullable=False, server_default='0'),
sa.Column('spec_version_id', postgresql.UUID(as_uuid=True), nullable=True),
sa.Column('error_message', sa.Text(), nullable=True),
sa.Column('log', postgresql.JSONB(), nullable=True),
sa.Column('started_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('completed_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now()),
)
op.create_index('ix_processing_jobs_kb_id', 'processing_jobs', ['knowledge_base_id'])
# ===========================================
# 4. Create spec_versions table
# ===========================================
op.create_table(
'spec_versions',
sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column('knowledge_base_id', postgresql.UUID(as_uuid=True),
sa.ForeignKey('knowledge_bases.id', ondelete='CASCADE'), nullable=False),
sa.Column('version_number', sa.Integer(), nullable=False),
sa.Column('content', sa.Text(), nullable=False),
sa.Column('source_document_ids', postgresql.JSONB(), nullable=True),
sa.Column('generated_by_id', postgresql.UUID(as_uuid=True),
sa.ForeignKey('users.id'), nullable=True),
sa.Column('generated_by_name', sa.String(255), nullable=True),
sa.Column('processing_job_id', postgresql.UUID(as_uuid=True),
sa.ForeignKey('processing_jobs.id'), nullable=True),
sa.Column('is_active', sa.Boolean(), nullable=False, server_default='true'),
sa.Column('char_count', sa.Integer(), nullable=False),
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now()),
sa.UniqueConstraint('knowledge_base_id', 'version_number', name='uq_kb_version_number'),
)
op.create_index('ix_spec_versions_kb_id', 'spec_versions', ['knowledge_base_id'])
# Add the FK from processing_jobs.spec_version_id -> spec_versions.id
# (deferred because of circular reference)
op.create_foreign_key(
'fk_processing_jobs_spec_version',
'processing_jobs', 'spec_versions',
['spec_version_id'], ['id'],
)
# ===========================================
# 5. Seed 5 knowledge base rows
# ===========================================
conn = op.get_bind()
kb_seeds = [
("legal", "Legal", "Legal compliance, advertising standards, disclaimers, and financial promotion rules."),
("brand_barclays", "Brand (Barclays)", "Barclays brand guidelines: logo usage, colors, typography, and design principles."),
("brand_barclaycard", "Brand (Barclaycard)", "Barclaycard brand guidelines: logo usage, colors, typography, and design principles."),
("channel_best_practices", "Channel Best Practices", "Channel-specific best practices for social, display, email, print, and OOH."),
("channel_tech_specs", "Channel Tech Specs", "Technical specifications, dimensions, file formats, and platform requirements."),
]
kb_ids = {}
for agent_key, display_name, description in kb_seeds:
kb_id = str(uuid4())
kb_ids[agent_key] = kb_id
conn.execute(
sa.text("""
INSERT INTO knowledge_bases (id, agent_key, display_name, description)
VALUES (:id, :agent_key, :display_name, :description)
ON CONFLICT (agent_key) DO NOTHING
"""),
{"id": kb_id, "agent_key": agent_key, "display_name": display_name, "description": description}
)
# ===========================================
# 6. Seed existing prompts/*.md as spec_versions v1
# ===========================================
prompts_dir = Path(__file__).parent.parent.parent.parent / "prompts"
spec_file_map = {
"legal": "legal.md",
"brand_barclays": "brand_barclays.md",
"brand_barclaycard": "brand_barclaycard.md",
"channel_best_practices": "channel_best_practices.md",
"channel_tech_specs": "channel_tech_specs.md",
}
for agent_key, filename in spec_file_map.items():
spec_path = prompts_dir / filename
if spec_path.exists():
content = spec_path.read_text(encoding="utf-8")
spec_id = str(uuid4())
conn.execute(
sa.text("""
INSERT INTO spec_versions (id, knowledge_base_id, version_number, content, generated_by_name, is_active, char_count)
VALUES (:id, :kb_id, 1, :content, :generated_by_name, true, :char_count)
"""),
{
"id": spec_id,
"kb_id": kb_ids[agent_key],
"content": content,
"generated_by_name": "System (Migration)",
"char_count": len(content),
}
)
def downgrade() -> None:
"""Drop knowledge base tables."""
op.drop_constraint('fk_processing_jobs_spec_version', 'processing_jobs', type_='foreignkey')
op.drop_table('spec_versions')
op.drop_table('processing_jobs')
op.drop_table('source_documents')
op.drop_table('knowledge_bases')

View file

@ -1,3 +1,4 @@
from app.api.routes import router
from app.api.knowledge_base_routes import kb_router
__all__ = ["router"]
__all__ = ["router", "kb_router"]

View file

@ -0,0 +1,473 @@
"""REST API routes for Knowledge Base management."""
import difflib
import uuid
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, UploadFile, File
from sqlalchemy.ext.asyncio import AsyncSession
from app.api.knowledge_base_schemas import (
KnowledgeBaseListItem,
KnowledgeBaseDetail,
SourceDocumentResponse,
ProcessingJobResponse,
SpecVersionListItem,
SpecVersionDetail,
DiffResponse,
DiffLine,
)
from app.dependencies.auth import get_current_user
from app.models.database import get_db
from app.repositories.knowledge_base_repository import KnowledgeBaseRepository
from app.services.storage_service import storage_service
kb_router = APIRouter(prefix="/knowledge-base", tags=["knowledge-base"])
# Allowed MIME types for source document upload
ALLOWED_MIME_TYPES = {
"application/pdf",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document", # docx
"application/vnd.openxmlformats-officedocument.presentationml.presentation", # pptx
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", # xlsx
"text/html",
"text/plain",
"text/markdown",
"image/png",
"image/jpeg",
"image/webp",
}
async def _get_user_info(session: AsyncSession, user_claims: dict) -> tuple:
"""Extract user ID and name from claims."""
from app.repositories.user_repository import UserRepository
user_repo = UserRepository(session)
azure_oid = user_claims.get("oid") or user_claims.get("sub")
user_id = None
user_name = user_claims.get("name", "Unknown")
if azure_oid:
user = await user_repo.get_or_create_from_azure(
azure_ad_oid=azure_oid,
email=user_claims.get("email", user_claims.get("preferred_username", "")),
name=user_name,
)
user_id = user.id
return user_id, user_name
@kb_router.get("", response_model=list[KnowledgeBaseListItem])
async def list_knowledge_bases(
db: AsyncSession = Depends(get_db),
user: dict = Depends(get_current_user),
):
"""List all knowledge bases with summary info."""
repo = KnowledgeBaseRepository(db)
kbs = await repo.list_knowledge_bases()
results = []
for kb in kbs:
active_spec = next((sv for sv in kb.spec_versions if sv.is_active), None)
latest_job = kb.processing_jobs[0] if kb.processing_jobs else None
results.append(KnowledgeBaseListItem(
id=kb.id,
agent_key=kb.agent_key,
display_name=kb.display_name,
description=kb.description,
source_document_count=len(kb.source_documents),
active_spec_version=active_spec.version_number if active_spec else None,
active_spec_char_count=active_spec.char_count if active_spec else None,
latest_job_status=latest_job.status if latest_job else None,
latest_job_completed_at=latest_job.completed_at if latest_job else None,
created_at=kb.created_at,
))
return results
@kb_router.get("/{kb_id}", response_model=KnowledgeBaseDetail)
async def get_knowledge_base(
kb_id: uuid.UUID,
db: AsyncSession = Depends(get_db),
user: dict = Depends(get_current_user),
):
"""Get full detail for a knowledge base."""
repo = KnowledgeBaseRepository(db)
kb = await repo.get_knowledge_base(kb_id)
if not kb:
raise HTTPException(status_code=404, detail="Knowledge base not found")
active_spec = next((sv for sv in kb.spec_versions if sv.is_active), None)
latest_job = kb.processing_jobs[0] if kb.processing_jobs else None
return KnowledgeBaseDetail(
id=kb.id,
agent_key=kb.agent_key,
display_name=kb.display_name,
description=kb.description,
source_documents=[
SourceDocumentResponse(
id=doc.id,
knowledge_base_id=doc.knowledge_base_id,
filename=doc.filename,
file_storage_key=doc.file_storage_key,
file_size_bytes=doc.file_size_bytes,
mime_type=doc.mime_type,
uploaded_by_name=doc.uploaded_by_name,
parse_status=doc.parse_status,
parse_error=doc.parse_error,
created_at=doc.created_at,
)
for doc in sorted(kb.source_documents, key=lambda d: d.created_at, reverse=True)
],
active_spec_version=active_spec.version_number if active_spec else None,
active_spec_char_count=active_spec.char_count if active_spec else None,
latest_job=ProcessingJobResponse(
id=latest_job.id,
knowledge_base_id=latest_job.knowledge_base_id,
status=latest_job.status,
triggered_by_name=latest_job.triggered_by_name,
total_documents=latest_job.total_documents,
parsed_documents=latest_job.parsed_documents,
spec_version_id=latest_job.spec_version_id,
error_message=latest_job.error_message,
started_at=latest_job.started_at,
completed_at=latest_job.completed_at,
created_at=latest_job.created_at,
) if latest_job else None,
created_at=kb.created_at,
)
@kb_router.post("/{kb_id}/documents", response_model=SourceDocumentResponse, status_code=201)
async def upload_source_document(
kb_id: uuid.UUID,
file: UploadFile = File(...),
db: AsyncSession = Depends(get_db),
user: dict = Depends(get_current_user),
):
"""Upload a source document to a knowledge base."""
repo = KnowledgeBaseRepository(db)
kb = await repo.get_knowledge_base(kb_id)
if not kb:
raise HTTPException(status_code=404, detail="Knowledge base not found")
# Validate file type
if file.content_type and file.content_type not in ALLOWED_MIME_TYPES:
raise HTTPException(
status_code=400,
detail=f"File type '{file.content_type}' is not supported. Supported types: PDF, DOCX, PPTX, XLSX, HTML, TXT, MD, PNG, JPG, WebP."
)
user_id, user_name = await _get_user_info(db, user)
# Read file data
file_data = await file.read()
doc_id = uuid.uuid4()
# Store file
storage_key = await storage_service.store_kb_document(
file_data=file_data,
kb_id=kb_id,
doc_id=doc_id,
filename=file.filename or "unknown",
mime_type=file.content_type or "application/octet-stream",
)
# Create DB record
doc = await repo.add_source_document(
knowledge_base_id=kb_id,
filename=file.filename or "unknown",
file_storage_key=storage_key,
file_size_bytes=len(file_data),
mime_type=file.content_type or "application/octet-stream",
uploaded_by_id=user_id,
uploaded_by_name=user_name,
)
# Set the pre-generated ID
doc.id = doc_id
return SourceDocumentResponse(
id=doc.id,
knowledge_base_id=doc.knowledge_base_id,
filename=doc.filename,
file_storage_key=doc.file_storage_key,
file_size_bytes=doc.file_size_bytes,
mime_type=doc.mime_type,
uploaded_by_name=doc.uploaded_by_name,
parse_status=doc.parse_status,
parse_error=doc.parse_error,
created_at=doc.created_at,
)
@kb_router.delete("/{kb_id}/documents/{doc_id}", status_code=204)
async def delete_source_document(
kb_id: uuid.UUID,
doc_id: uuid.UUID,
db: AsyncSession = Depends(get_db),
user: dict = Depends(get_current_user),
):
"""Remove a source document from a knowledge base."""
repo = KnowledgeBaseRepository(db)
doc = await repo.remove_source_document(doc_id)
if not doc:
raise HTTPException(status_code=404, detail="Source document not found")
# Delete file from storage
await storage_service.delete_file(doc.file_storage_key)
@kb_router.post("/{kb_id}/process", response_model=ProcessingJobResponse, status_code=201)
async def trigger_processing(
kb_id: uuid.UUID,
background_tasks: BackgroundTasks,
db: AsyncSession = Depends(get_db),
user: dict = Depends(get_current_user),
):
"""Trigger the document processing pipeline for a knowledge base."""
from app.main import knowledge_base_service as kb_service
if kb_service is None:
raise HTTPException(
status_code=503,
detail="Knowledge Base processing is not available. LLAMA_CLOUD_API_KEY is not configured."
)
repo = KnowledgeBaseRepository(db)
kb = await repo.get_knowledge_base(kb_id)
if not kb:
raise HTTPException(status_code=404, detail="Knowledge base not found")
# Check for active jobs
has_active = await repo.has_active_job(kb_id)
if has_active:
raise HTTPException(status_code=409, detail="A processing job is already running for this knowledge base.")
# Check that there are source documents
docs = await repo.get_source_documents(kb_id)
if not docs:
raise HTTPException(status_code=400, detail="No source documents to process.")
user_id, user_name = await _get_user_info(db, user)
# Create the job
job = await repo.create_processing_job(
knowledge_base_id=kb_id,
total_documents=len(docs),
triggered_by_id=user_id,
triggered_by_name=user_name,
)
# Start background processing
background_tasks.add_task(
kb_service.process_documents,
kb_id=kb_id,
job_id=job.id,
agent_key=kb.agent_key,
user_id=user_id,
user_name=user_name,
)
return ProcessingJobResponse(
id=job.id,
knowledge_base_id=job.knowledge_base_id,
status=job.status,
triggered_by_name=job.triggered_by_name,
total_documents=job.total_documents,
parsed_documents=job.parsed_documents,
spec_version_id=job.spec_version_id,
error_message=job.error_message,
started_at=job.started_at,
completed_at=job.completed_at,
created_at=job.created_at,
)
@kb_router.get("/{kb_id}/jobs/{job_id}", response_model=ProcessingJobResponse)
async def get_processing_job(
kb_id: uuid.UUID,
job_id: uuid.UUID,
db: AsyncSession = Depends(get_db),
user: dict = Depends(get_current_user),
):
"""Get the status of a processing job."""
repo = KnowledgeBaseRepository(db)
job = await repo.get_processing_job(job_id)
if not job or job.knowledge_base_id != kb_id:
raise HTTPException(status_code=404, detail="Processing job not found")
return ProcessingJobResponse(
id=job.id,
knowledge_base_id=job.knowledge_base_id,
status=job.status,
triggered_by_name=job.triggered_by_name,
total_documents=job.total_documents,
parsed_documents=job.parsed_documents,
spec_version_id=job.spec_version_id,
error_message=job.error_message,
started_at=job.started_at,
completed_at=job.completed_at,
created_at=job.created_at,
)
@kb_router.get("/{kb_id}/versions", response_model=list[SpecVersionListItem])
async def list_spec_versions(
kb_id: uuid.UUID,
db: AsyncSession = Depends(get_db),
user: dict = Depends(get_current_user),
):
"""List all spec versions for a knowledge base."""
repo = KnowledgeBaseRepository(db)
versions = await repo.list_spec_versions(kb_id)
return [
SpecVersionListItem(
id=v.id,
knowledge_base_id=v.knowledge_base_id,
version_number=v.version_number,
generated_by_name=v.generated_by_name,
source_document_ids=v.source_document_ids,
is_active=v.is_active,
char_count=v.char_count,
created_at=v.created_at,
)
for v in versions
]
@kb_router.get("/{kb_id}/versions/{version_id}", response_model=SpecVersionDetail)
async def get_spec_version(
kb_id: uuid.UUID,
version_id: uuid.UUID,
db: AsyncSession = Depends(get_db),
user: dict = Depends(get_current_user),
):
"""Get full spec content for a version."""
repo = KnowledgeBaseRepository(db)
version = await repo.get_spec_version(version_id)
if not version or version.knowledge_base_id != kb_id:
raise HTTPException(status_code=404, detail="Spec version not found")
return SpecVersionDetail(
id=version.id,
knowledge_base_id=version.knowledge_base_id,
version_number=version.version_number,
content=version.content,
generated_by_name=version.generated_by_name,
source_document_ids=version.source_document_ids,
is_active=version.is_active,
char_count=version.char_count,
created_at=version.created_at,
)
@kb_router.get("/{kb_id}/versions/{v_a}/diff/{v_b}", response_model=DiffResponse)
async def get_spec_diff(
kb_id: uuid.UUID,
v_a: uuid.UUID,
v_b: uuid.UUID,
db: AsyncSession = Depends(get_db),
user: dict = Depends(get_current_user),
):
"""Compute diff between two spec versions."""
repo = KnowledgeBaseRepository(db)
version_a = await repo.get_spec_version(v_a)
version_b = await repo.get_spec_version(v_b)
if not version_a or version_a.knowledge_base_id != kb_id:
raise HTTPException(status_code=404, detail="Version A not found")
if not version_b or version_b.knowledge_base_id != kb_id:
raise HTTPException(status_code=404, detail="Version B not found")
lines_a = version_a.content.splitlines(keepends=True)
lines_b = version_b.content.splitlines(keepends=True)
diff = list(difflib.unified_diff(
lines_a, lines_b,
fromfile=f"v{version_a.version_number}",
tofile=f"v{version_b.version_number}",
lineterm="",
))
additions = 0
deletions = 0
diff_lines = []
old_line = 0
new_line = 0
for line in diff:
if line.startswith("@@"):
# Parse hunk header for line numbers
import re
match = re.match(r"@@ -(\d+)", line)
if match:
old_line = int(match.group(1)) - 1
new_match = re.search(r"\+(\d+)", line)
new_line = int(new_match.group(1)) - 1 if new_match else 0
diff_lines.append(DiffLine(type="context", content=line.rstrip("\n")))
elif line.startswith("---") or line.startswith("+++"):
diff_lines.append(DiffLine(type="context", content=line.rstrip("\n")))
elif line.startswith("+"):
additions += 1
new_line += 1
diff_lines.append(DiffLine(
type="add", content=line[1:].rstrip("\n"),
line_number_new=new_line,
))
elif line.startswith("-"):
deletions += 1
old_line += 1
diff_lines.append(DiffLine(
type="remove", content=line[1:].rstrip("\n"),
line_number_old=old_line,
))
else:
old_line += 1
new_line += 1
diff_lines.append(DiffLine(
type="context", content=line.rstrip("\n"),
line_number_old=old_line,
line_number_new=new_line,
))
return DiffResponse(
version_a=version_a.version_number,
version_b=version_b.version_number,
additions=additions,
deletions=deletions,
lines=diff_lines,
)
@kb_router.post("/{kb_id}/versions/{version_id}/activate", response_model=SpecVersionDetail)
async def activate_spec_version(
kb_id: uuid.UUID,
version_id: uuid.UUID,
db: AsyncSession = Depends(get_db),
user: dict = Depends(get_current_user),
):
"""Activate (revert to) a specific spec version."""
repo = KnowledgeBaseRepository(db)
version = await repo.activate_spec_version(version_id)
if not version or version.knowledge_base_id != kb_id:
raise HTTPException(status_code=404, detail="Spec version not found")
# Invalidate reference docs cache
kb = await repo.get_knowledge_base(kb_id)
if kb:
from app.main import analysis_service
if analysis_service:
analysis_service.reference_docs.invalidate_cache(kb.agent_key)
return SpecVersionDetail(
id=version.id,
knowledge_base_id=version.knowledge_base_id,
version_number=version.version_number,
content=version.content,
generated_by_name=version.generated_by_name,
source_document_ids=version.source_document_ids,
is_active=version.is_active,
char_count=version.char_count,
created_at=version.created_at,
)

View file

@ -0,0 +1,108 @@
"""Pydantic schemas for Knowledge Base API."""
import uuid
from datetime import datetime
from typing import Optional
from pydantic import BaseModel
class SourceDocumentResponse(BaseModel):
id: uuid.UUID
knowledge_base_id: uuid.UUID
filename: str
file_storage_key: str
file_size_bytes: int
mime_type: str
uploaded_by_name: Optional[str]
parse_status: str
parse_error: Optional[str]
created_at: datetime
class Config:
from_attributes = True
class ProcessingJobResponse(BaseModel):
id: uuid.UUID
knowledge_base_id: uuid.UUID
status: str
triggered_by_name: Optional[str]
total_documents: int
parsed_documents: int
spec_version_id: Optional[uuid.UUID]
error_message: Optional[str]
started_at: Optional[datetime]
completed_at: Optional[datetime]
created_at: datetime
class Config:
from_attributes = True
class SpecVersionListItem(BaseModel):
id: uuid.UUID
knowledge_base_id: uuid.UUID
version_number: int
generated_by_name: Optional[str]
source_document_ids: Optional[list]
is_active: bool
char_count: int
created_at: datetime
class Config:
from_attributes = True
class SpecVersionDetail(BaseModel):
id: uuid.UUID
knowledge_base_id: uuid.UUID
version_number: int
content: str
generated_by_name: Optional[str]
source_document_ids: Optional[list]
is_active: bool
char_count: int
created_at: datetime
class Config:
from_attributes = True
class KnowledgeBaseListItem(BaseModel):
id: uuid.UUID
agent_key: str
display_name: str
description: Optional[str]
source_document_count: int
active_spec_version: Optional[int]
active_spec_char_count: Optional[int]
latest_job_status: Optional[str]
latest_job_completed_at: Optional[datetime]
created_at: datetime
class KnowledgeBaseDetail(BaseModel):
id: uuid.UUID
agent_key: str
display_name: str
description: Optional[str]
source_documents: list[SourceDocumentResponse]
active_spec_version: Optional[int]
active_spec_char_count: Optional[int]
latest_job: Optional[ProcessingJobResponse]
created_at: datetime
class DiffLine(BaseModel):
type: str # 'add', 'remove', 'context'
content: str
line_number_old: Optional[int] = None
line_number_new: Optional[int] = None
class DiffResponse(BaseModel):
version_a: int
version_b: int
additions: int
deletions: int
lines: list[DiffLine]

View file

@ -35,6 +35,9 @@ class Settings:
_default_storage = Path(__file__).parent.parent.parent / "storage"
FILE_STORAGE_PATH: str = os.getenv("FILE_STORAGE_PATH", str(_default_storage))
# LlamaParse API key (optional - KB processing pipeline disabled if not set)
LLAMA_CLOUD_API_KEY: str = os.getenv("LLAMA_CLOUD_API_KEY", "")
# Mailgun Configuration for support emails
MAILGUN_API_URL: str = os.getenv("MAILGUN_API_URL", "")
MAILGUN_API_KEY: str = os.getenv("MAILGUN_API_KEY", "")

View file

@ -9,7 +9,7 @@ from app.config import settings
from app.services.auth_service import verify_access_token
from app.dependencies.auth import get_current_user
from app.models.database import init_db, close_db
from app.api import router as api_router
from app.api import router as api_router, kb_router
# Configure logging
logging.basicConfig(
@ -36,11 +36,13 @@ from app.websocket.handlers import handle_analyze_message
from app.services.gemini_service import GeminiService
from app.services.reference_docs import ReferenceDocsService
from app.services.analysis_service import AnalysisService
from app.services.knowledge_base_service import KnowledgeBaseService
# Global services - initialized at startup
manager = ConnectionManager()
analysis_service: AnalysisService | None = None
knowledge_base_service: KnowledgeBaseService | None = None
@asynccontextmanager
@ -50,16 +52,18 @@ async def lifespan(app: FastAPI):
Loads reference documents and initializes the analysis service.
"""
global analysis_service
global analysis_service, knowledge_base_service
# Validate settings
settings.validate()
# Initialize database
print("Initializing database connection...")
db_available = False
try:
await init_db()
print("Database initialized successfully")
db_available = True
except Exception as e:
logger.warning(f"Database initialization failed (may not be available): {e}")
print(f"Warning: Database not available - running in stateless mode")
@ -68,6 +72,16 @@ async def lifespan(app: FastAPI):
print("Loading reference documents...")
reference_docs = ReferenceDocsService(settings.REFERENCE_DOCS_PATH)
# Load specs from DB if database is available
if db_available:
try:
from app.models.database import async_session_factory
async with async_session_factory() as session:
print("Loading specs from database...")
await reference_docs.load_specs_from_db(session)
except Exception as e:
logger.warning(f"Failed to load specs from DB (falling back to files): {e}")
# Log document info
doc_summary = reference_docs.get_context_summary()
print(f" Brand documents: {len(doc_summary['brand_files'])} files ({doc_summary['brand_context_length']} chars)")
@ -79,6 +93,16 @@ async def lifespan(app: FastAPI):
print("Initializing analysis service...")
analysis_service = AnalysisService(gemini_service, reference_docs)
# Initialize Knowledge Base service (requires LlamaParse API key)
if settings.LLAMA_CLOUD_API_KEY:
from app.services.llamaparse_service import LlamaParseService
print("Initializing LlamaParse service...")
llamaparse_service = LlamaParseService(settings.LLAMA_CLOUD_API_KEY)
knowledge_base_service = KnowledgeBaseService(llamaparse_service, gemini_service, reference_docs)
print("Knowledge Base pipeline ready!")
else:
print("LLAMA_CLOUD_API_KEY not set - Knowledge Base processing pipeline disabled")
print("Backend ready!")
yield
@ -107,6 +131,7 @@ app.add_middleware(
# Include API routes
app.include_router(api_router, prefix="/api")
app.include_router(kb_router, prefix="/api")
@app.get("/health")

View file

@ -179,3 +179,104 @@ class DropdownOption(Base):
back_populates="parent",
order_by="DropdownOption.display_order"
)
class KnowledgeBase(Base):
"""An agent knowledge base entry (one per agent spec type)."""
__tablename__ = "knowledge_bases"
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
agent_key: Mapped[str] = mapped_column(String(100), unique=True, nullable=False)
display_name: Mapped[str] = mapped_column(String(255), nullable=False)
description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
# Relationships
source_documents: Mapped[list["SourceDocument"]] = relationship(
"SourceDocument", back_populates="knowledge_base", cascade="all, delete-orphan"
)
spec_versions: Mapped[list["SpecVersion"]] = relationship(
"SpecVersion", back_populates="knowledge_base", cascade="all, delete-orphan",
order_by="desc(SpecVersion.version_number)"
)
processing_jobs: Mapped[list["ProcessingJob"]] = relationship(
"ProcessingJob", back_populates="knowledge_base", cascade="all, delete-orphan",
order_by="desc(ProcessingJob.created_at)"
)
class SourceDocument(Base):
"""An uploaded source document linked to a knowledge base."""
__tablename__ = "source_documents"
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
knowledge_base_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("knowledge_bases.id", ondelete="CASCADE"), nullable=False
)
filename: Mapped[str] = mapped_column(String(500), nullable=False)
file_storage_key: Mapped[str] = mapped_column(String(500), nullable=False)
file_size_bytes: Mapped[int] = mapped_column(Integer, nullable=False)
mime_type: Mapped[str] = mapped_column(String(255), nullable=False)
uploaded_by_id: Mapped[Optional[uuid.UUID]] = mapped_column(UUID(as_uuid=True), ForeignKey("users.id"), nullable=True)
uploaded_by_name: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
parsed_markdown: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
parse_status: Mapped[str] = mapped_column(String(50), default="pending", nullable=False)
parse_error: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
# Relationships
knowledge_base: Mapped["KnowledgeBase"] = relationship("KnowledgeBase", back_populates="source_documents")
class SpecVersion(Base):
"""A generated spec document with version history."""
__tablename__ = "spec_versions"
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
knowledge_base_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("knowledge_bases.id", ondelete="CASCADE"), nullable=False
)
version_number: Mapped[int] = mapped_column(Integer, nullable=False)
content: Mapped[str] = mapped_column(Text, nullable=False)
source_document_ids: Mapped[Optional[dict]] = mapped_column(JSONB, nullable=True)
generated_by_id: Mapped[Optional[uuid.UUID]] = mapped_column(UUID(as_uuid=True), ForeignKey("users.id"), nullable=True)
generated_by_name: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
processing_job_id: Mapped[Optional[uuid.UUID]] = mapped_column(
UUID(as_uuid=True), ForeignKey("processing_jobs.id"), nullable=True
)
is_active: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False)
char_count: Mapped[int] = mapped_column(Integer, nullable=False)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
# Relationships
knowledge_base: Mapped["KnowledgeBase"] = relationship("KnowledgeBase", back_populates="spec_versions")
processing_job: Mapped[Optional["ProcessingJob"]] = relationship("ProcessingJob", back_populates="spec_version")
__table_args__ = (
UniqueConstraint("knowledge_base_id", "version_number", name="uq_kb_version_number"),
)
class ProcessingJob(Base):
"""Tracks a document processing pipeline run."""
__tablename__ = "processing_jobs"
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
knowledge_base_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("knowledge_bases.id", ondelete="CASCADE"), nullable=False
)
status: Mapped[str] = mapped_column(String(50), default="pending", nullable=False)
triggered_by_id: Mapped[Optional[uuid.UUID]] = mapped_column(UUID(as_uuid=True), ForeignKey("users.id"), nullable=True)
triggered_by_name: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
total_documents: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
parsed_documents: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
spec_version_id: Mapped[Optional[uuid.UUID]] = mapped_column(UUID(as_uuid=True), ForeignKey("spec_versions.id"), nullable=True)
error_message: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
log: Mapped[Optional[dict]] = mapped_column(JSONB, nullable=True)
started_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
completed_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
# Relationships
knowledge_base: Mapped["KnowledgeBase"] = relationship("KnowledgeBase", back_populates="processing_jobs")
spec_version: Mapped[Optional["SpecVersion"]] = relationship("SpecVersion", back_populates="processing_job")

View file

@ -3,6 +3,7 @@ from app.repositories.proof_repository import ProofRepository
from app.repositories.user_repository import UserRepository
from app.repositories.audit_repository import AuditRepository
from app.repositories.dropdown_repository import DropdownRepository
from app.repositories.knowledge_base_repository import KnowledgeBaseRepository
__all__ = [
"CampaignRepository",
@ -10,4 +11,5 @@ __all__ = [
"UserRepository",
"AuditRepository",
"DropdownRepository",
"KnowledgeBaseRepository",
]

View file

@ -0,0 +1,299 @@
import uuid
from datetime import datetime, timezone
from typing import Optional
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.models.models import KnowledgeBase, SourceDocument, SpecVersion, ProcessingJob
class KnowledgeBaseRepository:
"""Repository for knowledge base database operations."""
def __init__(self, session: AsyncSession):
self.session = session
# ---- Knowledge Bases ----
async def list_knowledge_bases(self) -> list[KnowledgeBase]:
"""List all knowledge bases with eager-loaded relationships."""
query = (
select(KnowledgeBase)
.options(
selectinload(KnowledgeBase.source_documents),
selectinload(KnowledgeBase.spec_versions),
selectinload(KnowledgeBase.processing_jobs),
)
.order_by(KnowledgeBase.display_name)
)
result = await self.session.execute(query)
return list(result.scalars().all())
async def get_knowledge_base(self, kb_id: uuid.UUID) -> Optional[KnowledgeBase]:
"""Get a knowledge base by ID with all relationships."""
query = (
select(KnowledgeBase)
.options(
selectinload(KnowledgeBase.source_documents),
selectinload(KnowledgeBase.spec_versions),
selectinload(KnowledgeBase.processing_jobs),
)
.where(KnowledgeBase.id == kb_id)
)
result = await self.session.execute(query)
return result.scalar_one_or_none()
async def get_knowledge_base_by_key(self, agent_key: str) -> Optional[KnowledgeBase]:
"""Get a knowledge base by agent_key."""
query = select(KnowledgeBase).where(KnowledgeBase.agent_key == agent_key)
result = await self.session.execute(query)
return result.scalar_one_or_none()
# ---- Source Documents ----
async def add_source_document(
self,
knowledge_base_id: uuid.UUID,
filename: str,
file_storage_key: str,
file_size_bytes: int,
mime_type: str,
uploaded_by_id: Optional[uuid.UUID] = None,
uploaded_by_name: Optional[str] = None,
) -> SourceDocument:
"""Add a source document to a knowledge base."""
doc = SourceDocument(
knowledge_base_id=knowledge_base_id,
filename=filename,
file_storage_key=file_storage_key,
file_size_bytes=file_size_bytes,
mime_type=mime_type,
uploaded_by_id=uploaded_by_id,
uploaded_by_name=uploaded_by_name,
)
self.session.add(doc)
await self.session.flush()
return doc
async def remove_source_document(self, doc_id: uuid.UUID) -> Optional[SourceDocument]:
"""Remove a source document by ID. Returns the deleted doc or None."""
query = select(SourceDocument).where(SourceDocument.id == doc_id)
result = await self.session.execute(query)
doc = result.scalar_one_or_none()
if doc:
await self.session.delete(doc)
await self.session.flush()
return doc
async def get_source_documents(self, kb_id: uuid.UUID) -> list[SourceDocument]:
"""Get all source documents for a knowledge base."""
query = (
select(SourceDocument)
.where(SourceDocument.knowledge_base_id == kb_id)
.order_by(SourceDocument.created_at.desc())
)
result = await self.session.execute(query)
return list(result.scalars().all())
async def get_source_document(self, doc_id: uuid.UUID) -> Optional[SourceDocument]:
"""Get a single source document by ID."""
query = select(SourceDocument).where(SourceDocument.id == doc_id)
result = await self.session.execute(query)
return result.scalar_one_or_none()
async def update_source_document_parse_status(
self,
doc_id: uuid.UUID,
status: str,
parsed_markdown: Optional[str] = None,
parse_error: Optional[str] = None,
) -> None:
"""Update parse status of a source document."""
query = select(SourceDocument).where(SourceDocument.id == doc_id)
result = await self.session.execute(query)
doc = result.scalar_one_or_none()
if doc:
doc.parse_status = status
if parsed_markdown is not None:
doc.parsed_markdown = parsed_markdown
if parse_error is not None:
doc.parse_error = parse_error
await self.session.flush()
# ---- Spec Versions ----
async def get_active_spec_by_key(self, agent_key: str) -> Optional[SpecVersion]:
"""Get the active spec version for a given agent key."""
query = (
select(SpecVersion)
.join(KnowledgeBase)
.where(KnowledgeBase.agent_key == agent_key)
.where(SpecVersion.is_active == True)
)
result = await self.session.execute(query)
return result.scalar_one_or_none()
async def list_spec_versions(self, kb_id: uuid.UUID) -> list[SpecVersion]:
"""List all spec versions for a knowledge base."""
query = (
select(SpecVersion)
.where(SpecVersion.knowledge_base_id == kb_id)
.order_by(SpecVersion.version_number.desc())
)
result = await self.session.execute(query)
return list(result.scalars().all())
async def get_spec_version(self, version_id: uuid.UUID) -> Optional[SpecVersion]:
"""Get a spec version by ID."""
query = select(SpecVersion).where(SpecVersion.id == version_id)
result = await self.session.execute(query)
return result.scalar_one_or_none()
async def create_spec_version(
self,
knowledge_base_id: uuid.UUID,
content: str,
source_document_ids: Optional[list] = None,
generated_by_id: Optional[uuid.UUID] = None,
generated_by_name: Optional[str] = None,
processing_job_id: Optional[uuid.UUID] = None,
) -> SpecVersion:
"""Create a new spec version, auto-incrementing version number and deactivating prior."""
# Get next version number
max_query = (
select(func.coalesce(func.max(SpecVersion.version_number), 0))
.where(SpecVersion.knowledge_base_id == knowledge_base_id)
)
result = await self.session.execute(max_query)
next_version = result.scalar() + 1
# Deactivate all existing versions for this KB
deactivate_query = (
select(SpecVersion)
.where(SpecVersion.knowledge_base_id == knowledge_base_id)
.where(SpecVersion.is_active == True)
)
deactivate_result = await self.session.execute(deactivate_query)
for sv in deactivate_result.scalars().all():
sv.is_active = False
# Create the new version
spec = SpecVersion(
knowledge_base_id=knowledge_base_id,
version_number=next_version,
content=content,
source_document_ids=source_document_ids,
generated_by_id=generated_by_id,
generated_by_name=generated_by_name,
processing_job_id=processing_job_id,
is_active=True,
char_count=len(content),
)
self.session.add(spec)
await self.session.flush()
return spec
async def activate_spec_version(self, version_id: uuid.UUID) -> Optional[SpecVersion]:
"""Activate a specific version (revert), deactivating all others for same KB."""
query = select(SpecVersion).where(SpecVersion.id == version_id)
result = await self.session.execute(query)
target = result.scalar_one_or_none()
if not target:
return None
# Deactivate all versions for this KB
all_query = (
select(SpecVersion)
.where(SpecVersion.knowledge_base_id == target.knowledge_base_id)
.where(SpecVersion.is_active == True)
)
all_result = await self.session.execute(all_query)
for sv in all_result.scalars().all():
sv.is_active = False
# Activate the target
target.is_active = True
await self.session.flush()
return target
# ---- Processing Jobs ----
async def create_processing_job(
self,
knowledge_base_id: uuid.UUID,
total_documents: int,
triggered_by_id: Optional[uuid.UUID] = None,
triggered_by_name: Optional[str] = None,
) -> ProcessingJob:
"""Create a new processing job."""
job = ProcessingJob(
knowledge_base_id=knowledge_base_id,
total_documents=total_documents,
triggered_by_id=triggered_by_id,
triggered_by_name=triggered_by_name,
started_at=datetime.now(timezone.utc),
)
self.session.add(job)
await self.session.flush()
return job
async def update_processing_job(
self,
job_id: uuid.UUID,
status: Optional[str] = None,
parsed_documents: Optional[int] = None,
spec_version_id: Optional[uuid.UUID] = None,
error_message: Optional[str] = None,
completed_at: Optional[datetime] = None,
) -> Optional[ProcessingJob]:
"""Update a processing job's fields."""
query = select(ProcessingJob).where(ProcessingJob.id == job_id)
result = await self.session.execute(query)
job = result.scalar_one_or_none()
if not job:
return None
if status is not None:
job.status = status
if parsed_documents is not None:
job.parsed_documents = parsed_documents
if spec_version_id is not None:
job.spec_version_id = spec_version_id
if error_message is not None:
job.error_message = error_message
if completed_at is not None:
job.completed_at = completed_at
await self.session.flush()
return job
async def get_processing_job(self, job_id: uuid.UUID) -> Optional[ProcessingJob]:
"""Get a processing job by ID."""
query = select(ProcessingJob).where(ProcessingJob.id == job_id)
result = await self.session.execute(query)
return result.scalar_one_or_none()
async def get_latest_processing_job(self, kb_id: uuid.UUID) -> Optional[ProcessingJob]:
"""Get the most recent processing job for a knowledge base."""
query = (
select(ProcessingJob)
.where(ProcessingJob.knowledge_base_id == kb_id)
.order_by(ProcessingJob.created_at.desc())
.limit(1)
)
result = await self.session.execute(query)
return result.scalar_one_or_none()
async def has_active_job(self, kb_id: uuid.UUID) -> bool:
"""Check if there's an active (non-terminal) processing job for this KB."""
active_statuses = ["pending", "parsing_documents", "distilling"]
query = (
select(func.count())
.select_from(ProcessingJob)
.where(ProcessingJob.knowledge_base_id == kb_id)
.where(ProcessingJob.status.in_(active_statuses))
)
result = await self.session.execute(query)
return result.scalar() > 0

View file

@ -0,0 +1,218 @@
import logging
import uuid
from datetime import datetime, timezone
from typing import Optional
from app.models.database import async_session_factory
from app.repositories.knowledge_base_repository import KnowledgeBaseRepository
from app.services.storage_service import storage_service
logger = logging.getLogger(__name__)
# Distillation prompt templates per agent type
DISTILLATION_PROMPTS = {
"legal": """You are a compliance documentation specialist. Below is raw reference material about legal compliance, advertising standards, financial promotions, and disclaimers relevant to Barclays marketing materials.
Your task is to distil this into a clear, structured specification document that an AI compliance agent can use to review marketing proofs. Organise the content into logical sections with clear headings. Focus on actionable rules, required disclaimers, prohibited content, and compliance requirements.
Remove any redundancy, marketing fluff, or content not relevant to compliance checking. Preserve all specific rules, thresholds, and requirements verbatim.
RAW REFERENCE MATERIAL:
{combined_markdown}
OUTPUT: A well-structured markdown specification document.""",
"brand_barclays": """You are a brand guidelines specialist. Below is raw reference material about Barclays brand guidelines including logo usage, colour palettes, typography, design principles, and visual identity standards.
Your task is to distil this into a clear, structured specification document that an AI brand compliance agent can use to review marketing proofs. Organise the content into logical sections: logo rules, colour specifications (with exact hex/RGB values), typography rules, spacing/layout requirements, do's and don'ts.
Remove any redundancy or content not directly relevant to visual brand compliance checking. Preserve all specific measurements, colour values, and rules verbatim.
RAW REFERENCE MATERIAL:
{combined_markdown}
OUTPUT: A well-structured markdown specification document.""",
"brand_barclaycard": """You are a brand guidelines specialist. Below is raw reference material about Barclaycard brand guidelines including logo usage, colour palettes, typography, design principles, and visual identity standards.
Your task is to distil this into a clear, structured specification document that an AI brand compliance agent can use to review marketing proofs. Organise the content into logical sections: logo rules, colour specifications (with exact hex/RGB values), typography rules, spacing/layout requirements, do's and don'ts.
Remove any redundancy or content not directly relevant to visual brand compliance checking. Preserve all specific measurements, colour values, and rules verbatim.
RAW REFERENCE MATERIAL:
{combined_markdown}
OUTPUT: A well-structured markdown specification document.""",
"channel_best_practices": """You are a marketing channel specialist. Below is raw reference material about best practices for various marketing channels (social media, display, email, print, OOH) relevant to Barclays marketing.
Your task is to distil this into a clear, structured specification document that an AI channel compliance agent can use to review marketing proofs. Organise by channel type, then by platform/format. Focus on content guidelines, accessibility requirements, and engagement best practices.
Remove any redundancy or content not directly relevant to proof review. Preserve all specific recommendations and requirements.
RAW REFERENCE MATERIAL:
{combined_markdown}
OUTPUT: A well-structured markdown specification document.""",
"channel_tech_specs": """You are a marketing production specialist. Below is raw reference material about technical specifications for various marketing channels (dimensions, file formats, file sizes, resolution requirements, platform constraints).
Your task is to distil this into a clear, structured specification document that an AI technical compliance agent can use to review marketing proofs. Organise by channel, then platform, then format. Use tables where appropriate for dimensions and specs.
Remove any redundancy or content not directly relevant to technical spec checking. Preserve all specific dimensions, file size limits, format requirements, and platform constraints verbatim.
RAW REFERENCE MATERIAL:
{combined_markdown}
OUTPUT: A well-structured markdown specification document.""",
}
class KnowledgeBaseService:
"""Orchestrates the document processing pipeline."""
def __init__(self, llamaparse_service, gemini_service, reference_docs_service):
self.llamaparse = llamaparse_service
self.gemini = gemini_service
self.reference_docs = reference_docs_service
async def process_documents(
self,
kb_id: uuid.UUID,
job_id: uuid.UUID,
agent_key: str,
user_id: Optional[uuid.UUID] = None,
user_name: Optional[str] = None,
) -> None:
"""
Run the full processing pipeline as a background task.
1. Parse each source document with LlamaParse
2. Combine parsed markdown
3. Distil with Gemini into a spec
4. Save as new SpecVersion
5. Invalidate ReferenceDocsService cache
"""
async with async_session_factory() as session:
try:
repo = KnowledgeBaseRepository(session)
# Update job status to parsing
await repo.update_processing_job(job_id, status="parsing_documents")
await session.commit()
# Get all source documents for this KB
docs = await repo.get_source_documents(kb_id)
if not docs:
await repo.update_processing_job(
job_id, status="failed",
error_message="No source documents found.",
completed_at=datetime.now(timezone.utc),
)
await session.commit()
return
# Parse each document
parsed_count = 0
combined_parts = []
source_doc_ids = []
for doc in docs:
try:
# Load file data from storage
file_data = await storage_service.get_file(doc.file_storage_key)
if not file_data:
logger.error(f"[KB_SERVICE] File not found: {doc.file_storage_key}")
await repo.update_source_document_parse_status(
doc.id, "error", parse_error="File not found in storage."
)
await session.commit()
continue
# Parse with LlamaParse
await repo.update_source_document_parse_status(doc.id, "parsing")
await session.commit()
markdown = await self.llamaparse.parse_document(file_data, doc.filename)
# Update parse status
await repo.update_source_document_parse_status(
doc.id, "parsed", parsed_markdown=markdown
)
parsed_count += 1
await repo.update_processing_job(job_id, parsed_documents=parsed_count)
await session.commit()
if markdown.strip():
combined_parts.append(f"# {doc.filename}\n\n{markdown}")
source_doc_ids.append(str(doc.id))
except Exception as e:
logger.error(f"[KB_SERVICE] Error parsing {doc.filename}: {e}")
await repo.update_source_document_parse_status(
doc.id, "error", parse_error=str(e)
)
parsed_count += 1
await repo.update_processing_job(job_id, parsed_documents=parsed_count)
await session.commit()
if not combined_parts:
await repo.update_processing_job(
job_id, status="failed",
error_message="No documents were successfully parsed.",
completed_at=datetime.now(timezone.utc),
)
await session.commit()
return
# Distil with Gemini
await repo.update_processing_job(job_id, status="distilling")
await session.commit()
combined_markdown = "\n\n---\n\n".join(combined_parts)
prompt_template = DISTILLATION_PROMPTS.get(agent_key, DISTILLATION_PROMPTS["legal"])
prompt = prompt_template.format(combined_markdown=combined_markdown)
logger.info(f"[KB_SERVICE] Sending {len(combined_markdown)} chars to Gemini for distillation")
response = await self.gemini.client.aio.models.generate_content(
model=self.gemini.model,
contents=prompt,
)
spec_content = response.text.strip()
logger.info(f"[KB_SERVICE] Distillation complete: {len(spec_content)} chars")
# Save as new spec version
spec = await repo.create_spec_version(
knowledge_base_id=kb_id,
content=spec_content,
source_document_ids=source_doc_ids,
generated_by_id=user_id,
generated_by_name=user_name,
processing_job_id=job_id,
)
await session.commit()
# Update job as completed
await repo.update_processing_job(
job_id,
status="completed",
spec_version_id=spec.id,
completed_at=datetime.now(timezone.utc),
)
await session.commit()
# Invalidate reference docs cache
self.reference_docs.invalidate_cache(agent_key)
logger.info(f"[KB_SERVICE] Pipeline complete for {agent_key}, spec version {spec.version_number}")
except Exception as e:
logger.error(f"[KB_SERVICE] Pipeline failed for job {job_id}: {e}")
try:
await repo.update_processing_job(
job_id, status="failed",
error_message=str(e),
completed_at=datetime.now(timezone.utc),
)
await session.commit()
except Exception:
logger.error("[KB_SERVICE] Failed to update job status after error")

View file

@ -0,0 +1,58 @@
import logging
import tempfile
from pathlib import Path
logger = logging.getLogger(__name__)
class LlamaParseService:
"""Service for parsing documents using LlamaParse."""
def __init__(self, api_key: str):
self.api_key = api_key
async def parse_document(self, file_data: bytes, filename: str) -> str:
"""
Parse a document and return its content as markdown.
Args:
file_data: Raw bytes of the document
filename: Original filename (used for format detection)
Returns:
Parsed markdown text
"""
from llama_cloud_services import LlamaParse
logger.info(f"[LLAMAPARSE] Starting parse for '{filename}' ({len(file_data)} bytes)")
parser = LlamaParse(
api_key=self.api_key,
num_workers=1,
verbose=True,
language="en",
)
# Write bytes to a temp file since LlamaParse needs a file path
suffix = Path(filename).suffix or ".pdf"
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
tmp.write(file_data)
tmp_path = tmp.name
try:
documents = await parser.aload_data(tmp_path)
if not documents:
logger.warning(f"[LLAMAPARSE] No documents returned for '{filename}'")
return ""
# Combine all document pages into a single markdown string
combined = "\n\n".join(doc.text for doc in documents if doc.text)
logger.info(f"[LLAMAPARSE] Parsed '{filename}' -> {len(combined)} chars from {len(documents)} pages")
return combined
finally:
# Clean up temp file
try:
Path(tmp_path).unlink()
except OSError:
pass

View file

@ -29,6 +29,47 @@ class ReferenceDocsService:
self._channel_tech_specs_spec: str | None = None
self._legal_spec: str | None = None
# DB-backed spec cache (takes priority over file-based)
self._db_specs: dict[str, str] = {}
async def load_specs_from_db(self, session) -> None:
"""Load active spec content from DB for all agent keys."""
from app.repositories.knowledge_base_repository import KnowledgeBaseRepository
repo = KnowledgeBaseRepository(session)
agent_keys = [
"legal", "brand_barclays", "brand_barclaycard",
"channel_best_practices", "channel_tech_specs",
]
for key in agent_keys:
spec = await repo.get_active_spec_by_key(key)
if spec and spec.content:
self._db_specs[key] = spec.content
print(f" Loaded DB spec for {key}: {len(spec.content)} chars (v{spec.version_number})")
def invalidate_cache(self, agent_key: str | None = None) -> None:
"""Clear cached specs. If agent_key is None, clear all."""
if agent_key is None:
self._db_specs.clear()
self._barclaycard_brand_spec = None
self._barclays_brand_spec = None
self._channel_best_practices_spec = None
self._channel_tech_specs_spec = None
self._legal_spec = None
else:
self._db_specs.pop(agent_key, None)
# Also clear the file-based cache so next call re-checks DB
cache_map = {
"legal": "_legal_spec",
"brand_barclays": "_barclays_brand_spec",
"brand_barclaycard": "_barclaycard_brand_spec",
"channel_best_practices": "_channel_best_practices_spec",
"channel_tech_specs": "_channel_tech_specs_spec",
}
attr = cache_map.get(agent_key)
if attr:
setattr(self, attr, None)
def get_brand_context(self) -> str:
"""Load and return all brand guideline documents as a single context string."""
if self._brand_context is None:
@ -37,7 +78,11 @@ class ReferenceDocsService:
return self._brand_context
def get_barclaycard_brand_spec(self) -> str:
"""Load and return the Barclaycard brand specification from prompts directory."""
"""Load and return the Barclaycard brand specification."""
# Check DB cache first
if "brand_barclaycard" in self._db_specs:
return self._db_specs["brand_barclaycard"]
if self._barclaycard_brand_spec is None:
spec_path = self.prompts_path / "brand_barclaycard.md"
try:
@ -53,7 +98,11 @@ class ReferenceDocsService:
return self._barclaycard_brand_spec
def get_barclays_brand_spec(self) -> str:
"""Load and return the Barclays brand specification from prompts directory."""
"""Load and return the Barclays brand specification."""
# Check DB cache first
if "brand_barclays" in self._db_specs:
return self._db_specs["brand_barclays"]
# Check cache first
if not hasattr(self, '_barclays_brand_spec'):
self._barclays_brand_spec = None
@ -80,7 +129,11 @@ class ReferenceDocsService:
return self._channel_context
def get_channel_best_practices_spec(self) -> str:
"""Load and return the Channel Best Practices specification from prompts directory."""
"""Load and return the Channel Best Practices specification."""
# Check DB cache first
if "channel_best_practices" in self._db_specs:
return self._db_specs["channel_best_practices"]
if self._channel_best_practices_spec is None:
spec_path = self.prompts_path / "channel_best_practices.md"
try:
@ -95,7 +148,11 @@ class ReferenceDocsService:
return self._channel_best_practices_spec
def get_channel_tech_specs_spec(self) -> str:
"""Load and return the Channel Tech Specs specification from prompts directory."""
"""Load and return the Channel Tech Specs specification."""
# Check DB cache first
if "channel_tech_specs" in self._db_specs:
return self._db_specs["channel_tech_specs"]
if self._channel_tech_specs_spec is None:
spec_path = self.prompts_path / "channel_tech_specs.md"
try:
@ -110,7 +167,11 @@ class ReferenceDocsService:
return self._channel_tech_specs_spec
def get_legal_spec(self) -> str:
"""Load and return the Legal specification from prompts directory."""
"""Load and return the Legal specification."""
# Check DB cache first
if "legal" in self._db_specs:
return self._db_specs["legal"]
if self._legal_spec is None:
spec_path = self.prompts_path / "legal.md"
try:

View file

@ -110,6 +110,27 @@ class StorageService:
"""Calculate MD5 checksum of file data."""
return hashlib.md5(file_data).hexdigest()
async def store_kb_document(
self,
file_data: bytes,
kb_id: uuid.UUID,
doc_id: uuid.UUID,
filename: str,
mime_type: str,
) -> str:
"""Store a knowledge base source document and return the storage key."""
# Sanitize filename
safe_name = "".join(c if c.isalnum() or c in "-_." else "_" for c in filename)
storage_key = f"kb/{kb_id}/{doc_id}_{safe_name}"
file_path = self._get_file_path(storage_key)
file_path.parent.mkdir(parents=True, exist_ok=True)
async with aiofiles.open(file_path, "wb") as f:
await f.write(file_data)
return storage_key
# Singleton instance
storage_service = StorageService()

View file

@ -12,3 +12,4 @@ sqlalchemy[asyncio]>=2.0.0
asyncpg>=0.29.0
alembic>=1.13.0
PyMuPDF>=1.23.0
llama-cloud-services>=0.6.0

View file

@ -19,8 +19,9 @@ import { Campaigns } from './components/Campaigns';
import { Auditing } from './components/Auditing';
import { Login } from './components/Login';
import { WIPReviewer } from './components/WIPReviewer';
import { KnowledgeBase } from './components/KnowledgeBase';
type View = 'Home' | 'Analytics' | 'Campaigns' | 'WIP Reviewer' | 'CopyGenAI' | 'Settings' | 'Profile' | 'Auditing';
type View = 'Home' | 'Analytics' | 'Campaigns' | 'WIP Reviewer' | 'CopyGenAI' | 'Settings' | 'Profile' | 'Auditing' | 'Knowledge Base';
export interface DropdownOptions {
campaigns: string[];
@ -828,8 +829,10 @@ const App: React.FC = () => {
errorItems={errorItems}
onNavigate={handleNavigateToAuditedItem}
/>;
case 'Knowledge Base':
return <KnowledgeBase />;
case 'Settings':
return <Settings
return <Settings
options={dropdownOptions}
onAddCampaign={handleAddCampaignOption}
onRemoveCampaign={handleRemoveCampaignOption}
@ -883,6 +886,7 @@ const App: React.FC = () => {
onNavigate={(view) => handleNavigate(view as View)}
userName={userInfo?.name}
userEmail={userInfo?.email}
isAdmin={true}
/>
<div className="flex-1 flex flex-col overflow-y-auto">
<main className="flex-1 flex flex-col min-h-full">

View file

@ -0,0 +1,643 @@
import React, { useState, useEffect, useCallback, useRef } from 'react';
import apiService from '../services/apiService';
import type {
KnowledgeBaseListItem, KnowledgeBaseDetail, SourceDocument,
ProcessingJob, SpecVersionListItem, SpecVersionDetail, DiffResult,
} from '../types';
import { ArrowLeftIcon } from './icons/ArrowLeftIcon';
import { TrashIcon } from './icons/TrashIcon';
import { UploadIcon } from './icons/UploadIcon';
import { SpinnerIcon } from './icons/SpinnerIcon';
// --- Helper Components ---
const StatusBadge: React.FC<{ status: string }> = ({ status }) => {
const colors: Record<string, string> = {
pending: 'bg-yellow-100 text-yellow-800',
parsing: 'bg-blue-100 text-blue-800',
parsed: 'bg-green-100 text-green-800',
error: 'bg-red-100 text-red-800',
parsing_documents: 'bg-blue-100 text-blue-800',
distilling: 'bg-purple-100 text-purple-800',
completed: 'bg-green-100 text-green-800',
failed: 'bg-red-100 text-red-800',
};
return (
<span className={`inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium ${colors[status] || 'bg-grey-100 text-grey-700'}`}>
{status}
</span>
);
};
function formatBytes(bytes: number): string {
if (bytes < 1024) return `${bytes} B`;
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
}
function formatDate(dateStr: string | null): string {
if (!dateStr) return '-';
return new Date(dateStr).toLocaleDateString('en-GB', {
day: '2-digit', month: 'short', year: 'numeric', hour: '2-digit', minute: '2-digit',
});
}
// --- Main Component ---
export const KnowledgeBase: React.FC = () => {
// Level 1: List view
const [knowledgeBases, setKnowledgeBases] = useState<KnowledgeBaseListItem[]>([]);
const [loading, setLoading] = useState(true);
// Level 2: Detail view
const [selectedKb, setSelectedKb] = useState<KnowledgeBaseDetail | null>(null);
const [activeTab, setActiveTab] = useState<'documents' | 'versions'>('documents');
const [versions, setVersions] = useState<SpecVersionListItem[]>([]);
const [uploading, setUploading] = useState(false);
// Processing job polling
const [activeJob, setActiveJob] = useState<ProcessingJob | null>(null);
const pollRef = useRef<ReturnType<typeof setInterval> | null>(null);
// Level 3: Diff view
const [diffResult, setDiffResult] = useState<DiffResult | null>(null);
const [selectedForDiff, setSelectedForDiff] = useState<string[]>([]);
// Spec content modal
const [viewingSpec, setViewingSpec] = useState<SpecVersionDetail | null>(null);
// --- Data Loading ---
const loadKnowledgeBases = useCallback(async () => {
try {
setLoading(true);
const data = await apiService.getKnowledgeBases();
setKnowledgeBases(data);
} catch (err) {
console.error('Failed to load knowledge bases:', err);
} finally {
setLoading(false);
}
}, []);
useEffect(() => {
loadKnowledgeBases();
}, [loadKnowledgeBases]);
const loadKbDetail = useCallback(async (kbId: string) => {
try {
const [detail, vers] = await Promise.all([
apiService.getKnowledgeBase(kbId),
apiService.getSpecVersions(kbId),
]);
setSelectedKb(detail);
setVersions(vers);
// Check for active job
if (detail.latest_job && ['pending', 'parsing_documents', 'distilling'].includes(detail.latest_job.status)) {
setActiveJob(detail.latest_job);
} else {
setActiveJob(null);
}
} catch (err) {
console.error('Failed to load KB detail:', err);
}
}, []);
// Poll active job
useEffect(() => {
if (activeJob && selectedKb) {
pollRef.current = setInterval(async () => {
try {
const job = await apiService.getProcessingJob(selectedKb.id, activeJob.id);
setActiveJob(job);
if (['completed', 'failed'].includes(job.status)) {
if (pollRef.current) clearInterval(pollRef.current);
pollRef.current = null;
// Refresh data
loadKbDetail(selectedKb.id);
loadKnowledgeBases();
}
} catch (err) {
console.error('Failed to poll job:', err);
}
}, 3000);
}
return () => {
if (pollRef.current) {
clearInterval(pollRef.current);
pollRef.current = null;
}
};
}, [activeJob, selectedKb, loadKbDetail, loadKnowledgeBases]);
// --- Handlers ---
const handleSelectKb = (kb: KnowledgeBaseListItem) => {
loadKbDetail(kb.id);
setActiveTab('documents');
setDiffResult(null);
setSelectedForDiff([]);
};
const handleBack = () => {
setSelectedKb(null);
setDiffResult(null);
setSelectedForDiff([]);
setViewingSpec(null);
loadKnowledgeBases();
};
const handleBackFromDiff = () => {
setDiffResult(null);
setSelectedForDiff([]);
};
const handleFileUpload = async (files: FileList | null) => {
if (!files || !selectedKb) return;
setUploading(true);
try {
for (let i = 0; i < files.length; i++) {
await apiService.uploadSourceDocument(selectedKb.id, files[i]);
}
await loadKbDetail(selectedKb.id);
loadKnowledgeBases();
} catch (err) {
console.error('Failed to upload file:', err);
} finally {
setUploading(false);
}
};
const handleRemoveDoc = async (docId: string) => {
if (!selectedKb) return;
try {
await apiService.removeSourceDocument(selectedKb.id, docId);
await loadKbDetail(selectedKb.id);
loadKnowledgeBases();
} catch (err) {
console.error('Failed to remove document:', err);
}
};
const handleProcess = async () => {
if (!selectedKb) return;
try {
const job = await apiService.triggerProcessing(selectedKb.id);
setActiveJob(job);
} catch (err: any) {
console.error('Failed to trigger processing:', err);
alert(err.message || 'Failed to trigger processing.');
}
};
const handleViewSpec = async (versionId: string) => {
if (!selectedKb) return;
try {
const spec = await apiService.getSpecVersion(selectedKb.id, versionId);
setViewingSpec(spec);
} catch (err) {
console.error('Failed to load spec:', err);
}
};
const handleDiffToggle = (versionId: string) => {
setSelectedForDiff(prev => {
if (prev.includes(versionId)) {
return prev.filter(id => id !== versionId);
}
if (prev.length >= 2) {
return [prev[1], versionId];
}
return [...prev, versionId];
});
};
const handleCompare = async () => {
if (selectedForDiff.length !== 2 || !selectedKb) return;
try {
const diff = await apiService.getSpecDiff(selectedKb.id, selectedForDiff[0], selectedForDiff[1]);
setDiffResult(diff);
} catch (err) {
console.error('Failed to compute diff:', err);
}
};
const handleActivateVersion = async (versionId: string) => {
if (!selectedKb) return;
if (!confirm('Are you sure you want to activate this version? This will change the spec used by the AI agent.')) return;
try {
await apiService.activateSpecVersion(selectedKb.id, versionId);
await loadKbDetail(selectedKb.id);
} catch (err) {
console.error('Failed to activate version:', err);
}
};
// --- Drag & Drop ---
const [isDragging, setIsDragging] = useState(false);
const handleDragOver = (e: React.DragEvent) => {
e.preventDefault();
setIsDragging(true);
};
const handleDragLeave = () => setIsDragging(false);
const handleDrop = (e: React.DragEvent) => {
e.preventDefault();
setIsDragging(false);
handleFileUpload(e.dataTransfer.files);
};
// ===================== RENDER =====================
// Level 3: Diff view
if (diffResult && selectedKb) {
return (
<div className="p-8 max-w-6xl mx-auto w-full">
<button onClick={handleBackFromDiff} className="flex items-center gap-2 text-active-blue hover:underline mb-6 text-sm font-medium">
<ArrowLeftIcon className="h-4 w-4" /> Back to Version History
</button>
<div className="mb-6">
<h1 className="text-2xl font-bold text-primary-blue">{selectedKb.display_name} - Diff</h1>
<p className="text-grey-700 mt-1">
Version {diffResult.version_a} vs Version {diffResult.version_b}
<span className="ml-4 text-green-600 font-medium">+{diffResult.additions}</span>
<span className="ml-2 text-red-600 font-medium">-{diffResult.deletions}</span>
</p>
</div>
<div className="bg-white rounded-xl border border-grey-300 overflow-hidden">
<div className="overflow-x-auto max-h-[70vh] overflow-y-auto font-mono text-sm">
{diffResult.lines.map((line, i) => (
<div
key={i}
className={`flex ${
line.type === 'add' ? 'bg-green-50' :
line.type === 'remove' ? 'bg-red-50' :
''
}`}
>
<span className="w-12 text-right pr-2 text-grey-700 select-none border-r border-grey-200 flex-shrink-0 py-0.5">
{line.line_number_old || ''}
</span>
<span className="w-12 text-right pr-2 text-grey-700 select-none border-r border-grey-200 flex-shrink-0 py-0.5">
{line.line_number_new || ''}
</span>
<span className={`w-6 text-center flex-shrink-0 py-0.5 ${
line.type === 'add' ? 'text-green-600' :
line.type === 'remove' ? 'text-red-600' :
'text-grey-400'
}`}>
{line.type === 'add' ? '+' : line.type === 'remove' ? '-' : ' '}
</span>
<pre className="flex-1 py-0.5 pr-4 whitespace-pre-wrap break-words">{line.content}</pre>
</div>
))}
</div>
</div>
</div>
);
}
// Spec content modal
if (viewingSpec) {
return (
<div className="p-8 max-w-6xl mx-auto w-full">
<button onClick={() => setViewingSpec(null)} className="flex items-center gap-2 text-active-blue hover:underline mb-6 text-sm font-medium">
<ArrowLeftIcon className="h-4 w-4" /> Back to Version History
</button>
<div className="mb-6">
<h1 className="text-2xl font-bold text-primary-blue">
{selectedKb?.display_name} - Version {viewingSpec.version_number}
</h1>
<p className="text-grey-700 mt-1">
{formatDate(viewingSpec.created_at)} | {viewingSpec.char_count.toLocaleString()} chars
{viewingSpec.generated_by_name && <span> | Generated by {viewingSpec.generated_by_name}</span>}
{viewingSpec.is_active && <span className="ml-2 inline-flex items-center px-2 py-0.5 rounded-full text-xs font-medium bg-green-100 text-green-800">Active</span>}
</p>
</div>
<div className="bg-white rounded-xl border border-grey-300 p-6 overflow-x-auto max-h-[70vh] overflow-y-auto">
<pre className="whitespace-pre-wrap text-sm text-black-title font-mono leading-relaxed">{viewingSpec.content}</pre>
</div>
</div>
);
}
// Level 2: Agent detail
if (selectedKb) {
const isJobRunning = activeJob && ['pending', 'parsing_documents', 'distilling'].includes(activeJob.status);
return (
<div className="p-8 max-w-6xl mx-auto w-full">
<button onClick={handleBack} className="flex items-center gap-2 text-active-blue hover:underline mb-6 text-sm font-medium">
<ArrowLeftIcon className="h-4 w-4" /> Back to Knowledge Bases
</button>
<div className="mb-6">
<h1 className="text-2xl font-bold text-primary-blue">{selectedKb.display_name}</h1>
{selectedKb.description && <p className="text-grey-700 mt-1">{selectedKb.description}</p>}
</div>
{/* Tabs */}
<div className="flex gap-1 mb-6 bg-grey-100 p-1 rounded-xl w-fit">
{(['documents', 'versions'] as const).map(tab => (
<button
key={tab}
onClick={() => setActiveTab(tab)}
className={`px-5 py-2 rounded-lg text-sm font-medium transition-all duration-200 ${
activeTab === tab
? 'bg-white text-primary-blue shadow-sm'
: 'text-grey-700 hover:text-primary-blue'
}`}
>
{tab === 'documents' ? 'Source Documents' : 'Version History'}
</button>
))}
</div>
{activeTab === 'documents' ? (
<div>
{/* Upload area */}
<div
onDragOver={handleDragOver}
onDragLeave={handleDragLeave}
onDrop={handleDrop}
className={`border-2 border-dashed rounded-xl p-8 mb-6 text-center transition-colors ${
isDragging ? 'border-active-blue bg-active-blue/5' : 'border-grey-300 hover:border-active-blue/50'
}`}
>
<UploadIcon className="h-10 w-10 mx-auto text-grey-700 mb-3" />
<p className="text-grey-700 mb-2">Drag & drop files here, or</p>
<label className="inline-flex items-center gap-2 bg-active-blue text-white font-semibold py-2 px-6 rounded-full hover:bg-active-blue/90 transition-colors cursor-pointer">
{uploading ? <SpinnerIcon className="h-4 w-4 animate-spin" /> : null}
{uploading ? 'Uploading...' : 'Browse Files'}
<input
type="file"
multiple
className="hidden"
onChange={(e) => handleFileUpload(e.target.files)}
accept=".pdf,.docx,.pptx,.xlsx,.html,.txt,.md,.png,.jpg,.jpeg,.webp"
disabled={uploading}
/>
</label>
<p className="text-xs text-grey-700 mt-2">PDF, DOCX, PPTX, XLSX, HTML, TXT, MD, PNG, JPG, WebP</p>
</div>
{/* Process button + job status */}
<div className="flex items-center gap-4 mb-6">
<button
onClick={handleProcess}
disabled={!!isJobRunning || selectedKb.source_documents.length === 0}
className="bg-active-blue text-white font-semibold py-2.5 px-8 rounded-full hover:bg-active-blue/90 transition-colors disabled:bg-grey-700 disabled:cursor-not-allowed"
>
{isJobRunning ? 'Processing...' : 'Process Documents'}
</button>
{selectedKb.active_spec_version && (
<span className="text-sm text-grey-700">
Active spec: v{selectedKb.active_spec_version} ({selectedKb.active_spec_char_count?.toLocaleString()} chars)
</span>
)}
</div>
{/* Processing progress */}
{isJobRunning && activeJob && (
<div className="bg-blue-50 border border-blue-200 rounded-xl p-4 mb-6">
<div className="flex items-center gap-3 mb-2">
<SpinnerIcon className="h-5 w-5 text-active-blue animate-spin" />
<span className="font-medium text-primary-blue">
{activeJob.status === 'parsing_documents' ? 'Parsing documents...' :
activeJob.status === 'distilling' ? 'Distilling spec with AI...' :
'Starting...'}
</span>
</div>
{activeJob.status === 'parsing_documents' && (
<div className="w-full bg-blue-100 rounded-full h-2">
<div
className="bg-active-blue h-2 rounded-full transition-all duration-500"
style={{ width: `${activeJob.total_documents > 0 ? (activeJob.parsed_documents / activeJob.total_documents) * 100 : 0}%` }}
/>
</div>
)}
<p className="text-sm text-grey-700 mt-1">
{activeJob.parsed_documents} / {activeJob.total_documents} documents parsed
</p>
</div>
)}
{/* Last job result */}
{selectedKb.latest_job && !isJobRunning && (
<div className={`border rounded-xl p-3 mb-6 text-sm ${
selectedKb.latest_job.status === 'completed'
? 'bg-green-50 border-green-200 text-green-800'
: selectedKb.latest_job.status === 'failed'
? 'bg-red-50 border-red-200 text-red-800'
: 'bg-grey-100 border-grey-300 text-grey-700'
}`}>
Last processing: <StatusBadge status={selectedKb.latest_job.status} />
{selectedKb.latest_job.completed_at && <span className="ml-2">{formatDate(selectedKb.latest_job.completed_at)}</span>}
{selectedKb.latest_job.error_message && <p className="mt-1 text-sm">{selectedKb.latest_job.error_message}</p>}
</div>
)}
{/* Documents table */}
{selectedKb.source_documents.length > 0 ? (
<div className="bg-white rounded-xl border border-grey-300 overflow-hidden">
<table className="w-full text-sm">
<thead>
<tr className="bg-grey-100 text-left text-grey-700">
<th className="px-4 py-3 font-medium">Filename</th>
<th className="px-4 py-3 font-medium">Size</th>
<th className="px-4 py-3 font-medium">Uploaded</th>
<th className="px-4 py-3 font-medium">Uploaded By</th>
<th className="px-4 py-3 font-medium">Parse Status</th>
<th className="px-4 py-3 font-medium w-12"></th>
</tr>
</thead>
<tbody className="divide-y divide-grey-200">
{selectedKb.source_documents.map(doc => (
<tr key={doc.id} className="hover:bg-grey-50 transition-colors">
<td className="px-4 py-3 font-medium text-primary-blue">{doc.filename}</td>
<td className="px-4 py-3 text-grey-700">{formatBytes(doc.file_size_bytes)}</td>
<td className="px-4 py-3 text-grey-700">{formatDate(doc.created_at)}</td>
<td className="px-4 py-3 text-grey-700">{doc.uploaded_by_name || '-'}</td>
<td className="px-4 py-3"><StatusBadge status={doc.parse_status} /></td>
<td className="px-4 py-3">
<button
onClick={() => handleRemoveDoc(doc.id)}
className="text-grey-700 hover:text-error transition-colors"
title="Remove document"
>
<TrashIcon className="h-4 w-4" />
</button>
</td>
</tr>
))}
</tbody>
</table>
</div>
) : (
<div className="text-center py-12 text-grey-700 bg-grey-100 rounded-xl">
No source documents uploaded yet.
</div>
)}
</div>
) : (
/* Versions tab */
<div>
{/* Compare button */}
{selectedForDiff.length === 2 && (
<div className="mb-4">
<button
onClick={handleCompare}
className="bg-active-blue text-white font-semibold py-2 px-6 rounded-full hover:bg-active-blue/90 transition-colors"
>
Compare Selected Versions
</button>
</div>
)}
{versions.length > 0 ? (
<div className="bg-white rounded-xl border border-grey-300 overflow-hidden">
<table className="w-full text-sm">
<thead>
<tr className="bg-grey-100 text-left text-grey-700">
<th className="px-4 py-3 font-medium w-10">
<span className="sr-only">Compare</span>
</th>
<th className="px-4 py-3 font-medium">Version</th>
<th className="px-4 py-3 font-medium">Date</th>
<th className="px-4 py-3 font-medium">Generated By</th>
<th className="px-4 py-3 font-medium">Chars</th>
<th className="px-4 py-3 font-medium">Status</th>
<th className="px-4 py-3 font-medium">Actions</th>
</tr>
</thead>
<tbody className="divide-y divide-grey-200">
{versions.map(v => (
<tr key={v.id} className="hover:bg-grey-50 transition-colors">
<td className="px-4 py-3">
<input
type="checkbox"
checked={selectedForDiff.includes(v.id)}
onChange={() => handleDiffToggle(v.id)}
className="h-4 w-4 rounded border-grey-300 text-active-blue focus:ring-active-blue"
/>
</td>
<td className="px-4 py-3 font-medium text-primary-blue">v{v.version_number}</td>
<td className="px-4 py-3 text-grey-700">{formatDate(v.created_at)}</td>
<td className="px-4 py-3 text-grey-700">{v.generated_by_name || '-'}</td>
<td className="px-4 py-3 text-grey-700">{v.char_count.toLocaleString()}</td>
<td className="px-4 py-3">
{v.is_active ? (
<span className="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-green-100 text-green-800">
Active
</span>
) : (
<span className="text-grey-700 text-xs">Inactive</span>
)}
</td>
<td className="px-4 py-3">
<div className="flex items-center gap-2">
<button
onClick={() => handleViewSpec(v.id)}
className="text-active-blue hover:underline text-xs font-medium"
>
View
</button>
{!v.is_active && (
<button
onClick={() => handleActivateVersion(v.id)}
className="text-amber-600 hover:underline text-xs font-medium"
>
Activate
</button>
)}
</div>
</td>
</tr>
))}
</tbody>
</table>
</div>
) : (
<div className="text-center py-12 text-grey-700 bg-grey-100 rounded-xl">
No spec versions generated yet.
</div>
)}
</div>
)}
</div>
);
}
// Level 1: Agent list
if (loading) {
return (
<div className="p-8 flex items-center justify-center min-h-[400px]">
<SpinnerIcon className="h-8 w-8 text-active-blue animate-spin" />
</div>
);
}
return (
<div className="p-8 max-w-6xl mx-auto w-full">
<div className="mb-8">
<h1 className="text-2xl font-bold text-primary-blue">Knowledge Base</h1>
<p className="text-grey-700 mt-1">Manage the AI agent knowledge bases. Upload source documents, process them, and version the resulting specs.</p>
</div>
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
{knowledgeBases.map(kb => {
const statusColor =
kb.latest_job_status === 'failed' ? 'border-red-300 bg-red-50' :
kb.latest_job_status === 'completed' ? 'border-grey-300 bg-white' :
'border-grey-300 bg-white';
return (
<button
key={kb.id}
onClick={() => handleSelectKb(kb)}
className={`text-left p-6 rounded-xl border-2 ${statusColor} hover:border-active-blue hover:shadow-md transition-all duration-200 group`}
>
<h3 className="text-lg font-semibold text-primary-blue group-hover:text-active-blue transition-colors">
{kb.display_name}
</h3>
{kb.description && (
<p className="text-sm text-grey-700 mt-1 line-clamp-2">{kb.description}</p>
)}
<div className="mt-4 space-y-1 text-sm text-grey-700">
<div className="flex justify-between">
<span>Source Documents</span>
<span className="font-medium text-primary-blue">{kb.source_document_count}</span>
</div>
<div className="flex justify-between">
<span>Active Spec</span>
<span className="font-medium text-primary-blue">
{kb.active_spec_version ? `v${kb.active_spec_version}` : 'None'}
</span>
</div>
{kb.latest_job_status && (
<div className="flex justify-between items-center">
<span>Last Job</span>
<StatusBadge status={kb.latest_job_status} />
</div>
)}
{kb.latest_job_completed_at && (
<div className="text-xs text-grey-700 text-right">
{formatDate(kb.latest_job_completed_at)}
</div>
)}
</div>
</button>
);
})}
</div>
</div>
);
};

View file

@ -7,6 +7,7 @@ import { SettingsIcon } from './icons/SettingsIcon';
import { UserIcon } from './icons/UserIcon';
import { CampaignsIcon } from './icons/CampaignsIcon';
import { AuditIcon } from './icons/AuditIcon';
import { KnowledgeBaseIcon } from './icons/KnowledgeBaseIcon';
const navigation = [
{ name: 'Home', icon: DashboardIcon },
@ -15,6 +16,7 @@ const navigation = [
// { name: 'CopyGenAI', icon: CopyGenAIIcon }, // Hidden: Moved to Settings > Beta
{ name: 'Analytics', icon: AnalyticsIcon },
{ name: 'Auditing', icon: AuditIcon },
{ name: 'Knowledge Base', icon: KnowledgeBaseIcon, adminOnly: true },
{ name: 'Settings', icon: SettingsIcon },
];
@ -23,9 +25,10 @@ interface SidebarProps {
onNavigate: (viewName: string) => void;
userName?: string;
userEmail?: string;
isAdmin?: boolean;
}
export const Sidebar: React.FC<SidebarProps> = ({ activeItem, onNavigate, userName, userEmail }) => {
export const Sidebar: React.FC<SidebarProps> = ({ activeItem, onNavigate, userName, userEmail, isAdmin = true }) => {
return (
<aside className="w-72 flex-shrink-0 bg-primary-blue text-slate-200 flex flex-col border-r border-white/10 font-sans">
{/* Brand Header */}
@ -44,7 +47,7 @@ export const Sidebar: React.FC<SidebarProps> = ({ activeItem, onNavigate, userNa
{/* Navigation */}
<nav className="flex-1 pr-4 pl-0 py-8 space-y-2 overflow-y-auto">
{navigation.map((item) => {
{navigation.filter(item => !(item as any).adminOnly || isAdmin).map((item) => {
const Icon = item.icon;
const isActive = item.name === activeItem;
const isComingSoon = (item as any).isComingSoon;

View file

@ -0,0 +1,7 @@
import React from 'react';
export const KnowledgeBaseIcon: React.FC<React.SVGProps<SVGSVGElement>> = (props) => (
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" strokeWidth={1.5} stroke="currentColor" {...props}>
<path strokeLinecap="round" strokeLinejoin="round" d="M12 6.042A8.967 8.967 0 006 3.75c-1.052 0-2.062.18-3 .512v14.25A8.987 8.987 0 016 18c2.305 0 4.408.867 6 2.292m0-14.25a8.966 8.966 0 016-2.292c1.052 0 2.062.18 3 .512v14.25A8.987 8.987 0 0018 18a8.967 8.967 0 00-6 2.292m0-14.25v14.25" />
</svg>
);

View file

@ -1,6 +1,10 @@
import { IPublicClientApplication } from '@azure/msal-browser';
import { getAccessToken } from './authService';
import type { AgentReview, FlaggedItem, ResolvedItem, ErrorItem, PDFPage } from '../types';
import type {
AgentReview, FlaggedItem, ResolvedItem, ErrorItem, PDFPage,
KnowledgeBaseListItem, KnowledgeBaseDetail, SourceDocument,
ProcessingJob, SpecVersionListItem, SpecVersionDetail, DiffResult,
} from '../types';
const API_URL = import.meta.env.VITE_BACKEND_URL || 'http://localhost:8000';
@ -404,6 +408,71 @@ class ApiService {
body: JSON.stringify(data),
});
}
// Knowledge Base endpoints
async getKnowledgeBases(): Promise<KnowledgeBaseListItem[]> {
return this.fetch<KnowledgeBaseListItem[]>('/knowledge-base');
}
async getKnowledgeBase(kbId: string): Promise<KnowledgeBaseDetail> {
return this.fetch<KnowledgeBaseDetail>(`/knowledge-base/${kbId}`);
}
async uploadSourceDocument(kbId: string, file: File): Promise<SourceDocument> {
const headers = await this.getHeaders();
// Remove Content-Type so browser sets multipart boundary
delete (headers as Record<string, string>)['Content-Type'];
const formData = new FormData();
formData.append('file', file);
const response = await fetch(`${API_URL}/api/knowledge-base/${kbId}/documents`, {
method: 'POST',
headers,
body: formData,
});
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.detail || `HTTP ${response.status}: ${response.statusText}`);
}
return response.json();
}
async removeSourceDocument(kbId: string, docId: string): Promise<void> {
return this.fetch<void>(`/knowledge-base/${kbId}/documents/${docId}`, {
method: 'DELETE',
});
}
async triggerProcessing(kbId: string): Promise<ProcessingJob> {
return this.fetch<ProcessingJob>(`/knowledge-base/${kbId}/process`, {
method: 'POST',
});
}
async getProcessingJob(kbId: string, jobId: string): Promise<ProcessingJob> {
return this.fetch<ProcessingJob>(`/knowledge-base/${kbId}/jobs/${jobId}`);
}
async getSpecVersions(kbId: string): Promise<SpecVersionListItem[]> {
return this.fetch<SpecVersionListItem[]>(`/knowledge-base/${kbId}/versions`);
}
async getSpecVersion(kbId: string, versionId: string): Promise<SpecVersionDetail> {
return this.fetch<SpecVersionDetail>(`/knowledge-base/${kbId}/versions/${versionId}`);
}
async getSpecDiff(kbId: string, versionIdA: string, versionIdB: string): Promise<DiffResult> {
return this.fetch<DiffResult>(`/knowledge-base/${kbId}/versions/${versionIdA}/diff/${versionIdB}`);
}
async activateSpecVersion(kbId: string, versionId: string): Promise<SpecVersionDetail> {
return this.fetch<SpecVersionDetail>(`/knowledge-base/${kbId}/versions/${versionId}/activate`, {
method: 'POST',
});
}
}
export interface DropdownOptionsResponse {

View file

@ -83,3 +83,86 @@ export interface ProofVersion {
feedback: AgentReview;
isIdenticalFile?: boolean;
}
// Knowledge Base types
export interface KnowledgeBaseListItem {
id: string;
agent_key: string;
display_name: string;
description: string | null;
source_document_count: number;
active_spec_version: number | null;
active_spec_char_count: number | null;
latest_job_status: string | null;
latest_job_completed_at: string | null;
created_at: string;
}
export interface SourceDocument {
id: string;
knowledge_base_id: string;
filename: string;
file_storage_key: string;
file_size_bytes: number;
mime_type: string;
uploaded_by_name: string | null;
parse_status: string;
parse_error: string | null;
created_at: string;
}
export interface ProcessingJob {
id: string;
knowledge_base_id: string;
status: string;
triggered_by_name: string | null;
total_documents: number;
parsed_documents: number;
spec_version_id: string | null;
error_message: string | null;
started_at: string | null;
completed_at: string | null;
created_at: string;
}
export interface KnowledgeBaseDetail {
id: string;
agent_key: string;
display_name: string;
description: string | null;
source_documents: SourceDocument[];
active_spec_version: number | null;
active_spec_char_count: number | null;
latest_job: ProcessingJob | null;
created_at: string;
}
export interface SpecVersionListItem {
id: string;
knowledge_base_id: string;
version_number: number;
generated_by_name: string | null;
source_document_ids: string[] | null;
is_active: boolean;
char_count: number;
created_at: string;
}
export interface SpecVersionDetail extends SpecVersionListItem {
content: string;
}
export interface DiffLine {
type: 'add' | 'remove' | 'context';
content: string;
line_number_old: number | null;
line_number_new: number | null;
}
export interface DiffResult {
version_a: number;
version_b: number;
additions: number;
deletions: number;
lines: DiffLine[];
}