diff --git a/.env.example b/.env.example index 0c16fb8..caf5699 100644 --- a/.env.example +++ b/.env.example @@ -59,10 +59,9 @@ JWT_EXPIRATION_MINUTES=15 REFRESH_TOKEN_EXPIRATION_DAYS=7 # ========================================== -# NotebookLlama Integration +# LlamaParse (Optional — cloud PDF parsing) # ========================================== -# URL to your internal NotebookLlama instance -NOTEBOOKLLAMA_URL=http://internal-notebook-server:8080 +LLAMAPARSE_API_KEY= # ========================================== # File Upload Configuration diff --git a/backend/.env.example b/backend/.env.example index 955bcff..dfc4a77 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -52,16 +52,9 @@ GOOGLE_API_KEY=AIza... ANTHROPIC_API_KEY=sk-ant-... # ============================================================================= -# NOTEBOOKLLAMA INTEGRATION +# LLAMAPARSE (Optional — cloud PDF parsing for better table extraction) # ============================================================================= -# URL of the NotebookLlama service (external API for isolated document analysis) -NOTEBOOKLLAMA_URL=http://localhost:8000 - -# Service account credentials for authenticating with NotebookLlama API -# These credentials are used by our backend to authenticate as a service account -# Create this account in NotebookLlama: POST /api/auth/signup -NOTEBOOKLLAMA_SERVICE_EMAIL=service@oliver.internal -NOTEBOOKLLAMA_SERVICE_PASSWORD=your_notebookllama_service_password_here +LLAMAPARSE_API_KEY= # ============================================================================= # FILE UPLOAD CONFIGURATION @@ -72,6 +65,16 @@ MAX_UPLOAD_SIZE_MB=100 # Directory for uploaded files (local filesystem) UPLOAD_DIR=/app/uploads +# ============================================================================= +# MICROSOFT GRAPH TOOL CALLING (Phase 2) +# ============================================================================= +# Encryption key for storing Graph tokens (falls back to JWT_SECRET if not set) +GRAPH_ENCRYPTION_KEY= +# Delegated scopes requested during Graph consent +GRAPH_DELEGATED_SCOPES=User.Read Calendars.ReadWrite Mail.ReadWrite Chat.ReadWrite Tasks.ReadWrite offline_access +# Redirect URI for Graph consent flow +GRAPH_CONSENT_REDIRECT_URI=http://localhost:3000/auth/graph-callback + # ============================================================================= # CORS CONFIGURATION # ============================================================================= diff --git a/backend/alembic/versions/006_add_knowledge_documents.py b/backend/alembic/versions/006_add_knowledge_documents.py new file mode 100644 index 0000000..5366201 --- /dev/null +++ b/backend/alembic/versions/006_add_knowledge_documents.py @@ -0,0 +1,57 @@ +"""Add knowledge_documents table for admin-uploaded RAG documents + +Revision ID: 006_knowledge_docs +Revises: 005_sharepoint +Create Date: 2026-03-04 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import UUID + +# revision identifiers, used by Alembic. +revision = '006_knowledge_docs' +down_revision = '005_sharepoint' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # Create documentstatus enum + documentstatus_enum = sa.Enum('pending', 'processing', 'completed', 'failed', name='documentstatus') + documentstatus_enum.create(op.get_bind(), checkfirst=True) + + # Create knowledge_documents table + op.create_table( + 'knowledge_documents', + sa.Column('id', UUID(as_uuid=True), primary_key=True), + sa.Column('file_name', sa.String(512), nullable=False), + sa.Column('file_type', sa.String(50), nullable=False), + sa.Column('file_size', sa.BigInteger(), nullable=False), + sa.Column('document_key', sa.String(255), nullable=False, unique=True), + sa.Column('status', documentstatus_enum, nullable=False, server_default='pending'), + sa.Column('vector_count', sa.Integer(), nullable=False, server_default='0'), + sa.Column('error_message', sa.Text(), nullable=True), + sa.Column('description', sa.Text(), nullable=True), + sa.Column('department_id', UUID(as_uuid=True), nullable=True), + sa.Column('region_code', sa.String(10), nullable=True), + sa.Column('uploaded_by', UUID(as_uuid=True), nullable=True), + sa.Column('is_active', sa.Boolean(), nullable=False, server_default='true'), + sa.Column('created_at', sa.DateTime(), nullable=False, server_default=sa.text('CURRENT_TIMESTAMP')), + sa.Column('updated_at', sa.DateTime(), nullable=False, server_default=sa.text('CURRENT_TIMESTAMP')), + sa.Column('processed_at', sa.DateTime(), nullable=True), + sa.ForeignKeyConstraint(['department_id'], ['departments.id'], ondelete='SET NULL'), + sa.ForeignKeyConstraint(['uploaded_by'], ['users.id'], ondelete='SET NULL'), + ) + + # Create indexes + op.create_index('ix_knowledge_documents_document_key', 'knowledge_documents', ['document_key'], unique=True) + op.create_index('ix_knowledge_documents_status', 'knowledge_documents', ['status']) + op.create_index('ix_knowledge_documents_department_id', 'knowledge_documents', ['department_id']) + op.create_index('ix_knowledge_documents_uploaded_by', 'knowledge_documents', ['uploaded_by']) + op.create_index('ix_knowledge_documents_is_active', 'knowledge_documents', ['is_active']) + + +def downgrade() -> None: + op.drop_table('knowledge_documents') + op.execute('DROP TYPE documentstatus') diff --git a/backend/alembic/versions/007_remove_notebook_add_features.py b/backend/alembic/versions/007_remove_notebook_add_features.py new file mode 100644 index 0000000..e844a23 --- /dev/null +++ b/backend/alembic/versions/007_remove_notebook_add_features.py @@ -0,0 +1,67 @@ +"""Remove notebook tables, add user features columns + +Revision ID: 007_remove_notebook_features +Revises: 006_knowledge_docs +Create Date: 2026-03-04 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import JSONB + +# revision identifiers, used by Alembic. +revision = '007_remove_notebook_features' +down_revision = '006_knowledge_docs' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # 1. Drop notebook-related tables + op.drop_table('uploaded_files') + op.drop_table('notebook_sessions') + + # 2. Remove 'notebook' from conversationmode enum (if it exists as DB enum) + # Since mode is stored as VARCHAR(20), no enum alteration needed. + # Just delete any conversations with mode='notebook' + op.execute("DELETE FROM messages WHERE conversation_id IN (SELECT id FROM conversations WHERE mode = 'notebook')") + op.execute("DELETE FROM conversations WHERE mode = 'notebook'") + + # 3. Add access control columns to users + op.add_column('users', sa.Column('allowed_features', JSONB, server_default='[]', nullable=False)) + op.add_column('users', sa.Column('features_override', sa.Boolean(), server_default='false', nullable=False)) + + +def downgrade() -> None: + # Remove access control columns + op.drop_column('users', 'features_override') + op.drop_column('users', 'allowed_features') + + # Recreate notebook tables + op.create_table( + 'notebook_sessions', + sa.Column('id', sa.UUID(), primary_key=True), + sa.Column('user_id', sa.UUID(), sa.ForeignKey('users.id'), nullable=False), + sa.Column('conversation_id', sa.UUID(), sa.ForeignKey('conversations.id'), nullable=False), + sa.Column('title', sa.String(255), nullable=True), + sa.Column('notebookllama_notebook_id', sa.Integer(), nullable=True, unique=True), + sa.Column('is_pinned', sa.Boolean(), default=False), + sa.Column('total_file_size', sa.BigInteger(), default=0), + sa.Column('expires_at', sa.DateTime(), nullable=True), + sa.Column('created_at', sa.DateTime(), nullable=False), + ) + + op.create_table( + 'uploaded_files', + sa.Column('id', sa.UUID(), primary_key=True), + sa.Column('session_id', sa.UUID(), sa.ForeignKey('notebook_sessions.id'), nullable=False), + sa.Column('file_name', sa.String(255), nullable=False), + sa.Column('file_size', sa.BigInteger(), nullable=False), + sa.Column('file_type', sa.String(50), nullable=False), + sa.Column('storage_path', sa.String(500), nullable=False), + sa.Column('notebookllama_task_id', sa.Integer(), nullable=True), + sa.Column('processing_status', sa.String(50), default='queued'), + sa.Column('processing_error', sa.String(500), nullable=True), + sa.Column('uploaded_at', sa.DateTime(), nullable=False), + sa.Column('processed_at', sa.DateTime(), nullable=True), + ) diff --git a/backend/app/api/v1/endpoints/admin.py b/backend/app/api/v1/endpoints/admin.py index 6cae10a..382658d 100644 --- a/backend/app/api/v1/endpoints/admin.py +++ b/backend/app/api/v1/endpoints/admin.py @@ -27,6 +27,8 @@ class UserResponse(BaseModel): is_active: bool created_at: str last_login_at: str | None + allowed_features: List[str] = [] + features_override: bool = False class Config: from_attributes = True @@ -37,6 +39,13 @@ class UsersListResponse(BaseModel): total: int +class UserUpdateRequest(BaseModel): + allowed_features: List[str] | None = None + features_override: bool | None = None + role: str | None = None + is_active: bool | None = None + + class LLMConfigRequest(BaseModel): openai_api_key: str | None = None azure_api_key: str | None = None @@ -84,7 +93,9 @@ async def get_all_users( department_id=str(u.department_id) if u.department_id else None, is_active=u.is_active, created_at=u.created_at.isoformat(), - last_login_at=u.last_login_at.isoformat() if u.last_login_at else None + last_login_at=u.last_login_at.isoformat() if u.last_login_at else None, + allowed_features=u.allowed_features or [], + features_override=u.features_override or False, ) for u in users ], @@ -92,6 +103,45 @@ async def get_all_users( ) +@router.patch("/users/{user_id}", response_model=UserResponse) +async def update_user( + user_id: str, + request: UserUpdateRequest, + current_user: User = Depends(require_super_admin), + db: AsyncSession = Depends(get_db), +): + """Update user features and settings (super_admin only)""" + result = await db.execute(select(User).where(User.id == user_id)) + target_user = result.scalar_one_or_none() + if not target_user: + raise HTTPException(status_code=404, detail="User not found") + + if request.allowed_features is not None: + target_user.allowed_features = request.allowed_features + if request.features_override is not None: + target_user.features_override = request.features_override + if request.role is not None: + target_user.role = request.role + if request.is_active is not None: + target_user.is_active = request.is_active + + await db.commit() + await db.refresh(target_user) + + return UserResponse( + id=str(target_user.id), + email=target_user.email, + display_name=target_user.display_name or "", + role=target_user.role, + department_id=str(target_user.department_id) if target_user.department_id else None, + is_active=target_user.is_active, + created_at=target_user.created_at.isoformat(), + last_login_at=target_user.last_login_at.isoformat() if target_user.last_login_at else None, + allowed_features=target_user.allowed_features or [], + features_override=target_user.features_override or False, + ) + + @router.post("/config", response_model=LLMConfigResponse) async def update_llm_config( request: LLMConfigRequest, diff --git a/backend/app/api/v1/endpoints/knowledge.py b/backend/app/api/v1/endpoints/knowledge.py new file mode 100644 index 0000000..4167d05 --- /dev/null +++ b/backend/app/api/v1/endpoints/knowledge.py @@ -0,0 +1,281 @@ +""" +Knowledge Base Admin API Endpoints + +Upload, list, get, and delete documents for RAG knowledge base. +All endpoints require admin role (super_admin or content_manager). +""" +import os +import uuid +import logging +from typing import Optional + +from pydantic import BaseModel +from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, Query, status +from sqlalchemy import select, func +from sqlalchemy.ext.asyncio import AsyncSession + +from app.config import settings +from app.database import get_db +from app.core.dependencies import require_admin +from app.core.document_processor import DocumentProcessor +from app.models.user import User +from app.models.knowledge_document import KnowledgeDocument, DocumentStatus +from app.schemas.knowledge_document import ( + KnowledgeDocumentResponse, + KnowledgeDocumentListResponse, + KnowledgeDocumentUploadResponse, +) + +logger = logging.getLogger(__name__) + +router = APIRouter() + +KNOWLEDGE_UPLOAD_DIR = os.path.join(settings.UPLOAD_DIR, "knowledge") + + +def _doc_to_response(doc: KnowledgeDocument) -> KnowledgeDocumentResponse: + return KnowledgeDocumentResponse( + id=str(doc.id), + file_name=doc.file_name, + file_type=doc.file_type, + file_size=doc.file_size, + document_key=doc.document_key, + status=doc.status.value, + vector_count=doc.vector_count, + error_message=doc.error_message, + description=doc.description, + department_id=str(doc.department_id) if doc.department_id else None, + region_code=doc.region_code, + uploaded_by=str(doc.uploaded_by) if doc.uploaded_by else None, + is_active=doc.is_active, + created_at=doc.created_at, + updated_at=doc.updated_at, + processed_at=doc.processed_at, + ) + + +@router.post("/upload", response_model=KnowledgeDocumentUploadResponse) +async def upload_knowledge_document( + file: UploadFile = File(...), + description: Optional[str] = Form(None), + department_id: Optional[str] = Form(None), + region_code: Optional[str] = Form(None), + current_user: User = Depends(require_admin), + db: AsyncSession = Depends(get_db), +): + """Upload a document to the RAG knowledge base.""" + # Validate file type + file_ext = file.filename.rsplit(".", 1)[-1].lower() if file.filename and "." in file.filename else "" + if file_ext not in settings.SUPPORTED_FILE_TYPES: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Unsupported file type: {file_ext}. Supported: {', '.join(settings.SUPPORTED_FILE_TYPES)}", + ) + + # Read file to get size + file_bytes = await file.read() + file_size = len(file_bytes) + + max_size = settings.MAX_UPLOAD_SIZE_MB * 1024 * 1024 + if file_size > max_size: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"File size exceeds {settings.MAX_UPLOAD_SIZE_MB}MB limit", + ) + + # Generate unique document key + doc_id = uuid.uuid4() + document_key = f"kb-{doc_id}" + + # Save file to disk for Celery task + os.makedirs(KNOWLEDGE_UPLOAD_DIR, exist_ok=True) + file_path = os.path.join(KNOWLEDGE_UPLOAD_DIR, f"{doc_id}.{file_ext}") + with open(file_path, "wb") as f: + f.write(file_bytes) + + # Create DB record + doc = KnowledgeDocument( + id=doc_id, + file_name=file.filename or f"document.{file_ext}", + file_type=file_ext, + file_size=file_size, + document_key=document_key, + status=DocumentStatus.PENDING, + description=description, + department_id=uuid.UUID(department_id) if department_id else None, + region_code=region_code, + uploaded_by=current_user.id, + ) + db.add(doc) + await db.commit() + await db.refresh(doc) + + # Dispatch Celery task + from app.tasks.knowledge_processing import process_knowledge_document + process_knowledge_document.delay(str(doc_id), file_path) + + logger.info("Knowledge document uploaded: %s (%s, %d bytes) by %s", file.filename, file_ext, file_size, current_user.email) + + return KnowledgeDocumentUploadResponse( + id=str(doc.id), + file_name=doc.file_name, + file_size=doc.file_size, + status=doc.status.value, + message="Document uploaded and queued for processing", + ) + + +@router.get("/documents", response_model=KnowledgeDocumentListResponse) +async def list_knowledge_documents( + skip: int = Query(0, ge=0), + limit: int = Query(50, ge=1, le=200), + status_filter: Optional[str] = Query(None, alias="status"), + current_user: User = Depends(require_admin), + db: AsyncSession = Depends(get_db), +): + """List knowledge base documents with pagination and optional status filter.""" + query = select(KnowledgeDocument).order_by(KnowledgeDocument.created_at.desc()) + count_query = select(func.count(KnowledgeDocument.id)) + + if status_filter: + try: + status_enum = DocumentStatus(status_filter) + query = query.where(KnowledgeDocument.status == status_enum) + count_query = count_query.where(KnowledgeDocument.status == status_enum) + except ValueError: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Invalid status: {status_filter}", + ) + + # Get total count + total_result = await db.execute(count_query) + total = total_result.scalar() or 0 + + # Get paginated results + query = query.offset(skip).limit(limit) + result = await db.execute(query) + documents = result.scalars().all() + + return KnowledgeDocumentListResponse( + documents=[_doc_to_response(doc) for doc in documents], + total=total, + ) + + +@router.get("/documents/{document_id}", response_model=KnowledgeDocumentResponse) +async def get_knowledge_document( + document_id: str, + current_user: User = Depends(require_admin), + db: AsyncSession = Depends(get_db), +): + """Get a single knowledge base document by ID.""" + result = await db.execute( + select(KnowledgeDocument).where(KnowledgeDocument.id == uuid.UUID(document_id)) + ) + doc = result.scalar_one_or_none() + + if not doc: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Document not found") + + return _doc_to_response(doc) + + +@router.delete("/documents/{document_id}") +async def delete_knowledge_document( + document_id: str, + current_user: User = Depends(require_admin), + db: AsyncSession = Depends(get_db), +): + """Delete a knowledge base document and its vectors from Qdrant.""" + result = await db.execute( + select(KnowledgeDocument).where(KnowledgeDocument.id == uuid.UUID(document_id)) + ) + doc = result.scalar_one_or_none() + + if not doc: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Document not found") + + # Delete vectors from Qdrant + try: + processor = DocumentProcessor() + processor.delete_document(doc.document_key) + except Exception as exc: + logger.warning("Failed to delete vectors for %s: %s", doc.document_key, exc) + + # Delete DB record + await db.delete(doc) + await db.commit() + + logger.info("Knowledge document deleted: %s by %s", doc.file_name, current_user.email) + + return {"status": "deleted", "document_id": document_id} + + +# ========================================================================= +# Web Scraping → Knowledge Base +# ========================================================================= + + +class ScrapeURLRequest(BaseModel): + url: str + description: Optional[str] = None + department_id: Optional[str] = None + region_code: Optional[str] = None + + +@router.post("/scrape", response_model=KnowledgeDocumentUploadResponse) +async def scrape_url_to_knowledge( + request: ScrapeURLRequest, + current_user: User = Depends(require_admin), + db: AsyncSession = Depends(get_db), +): + """Scrape a URL and add its content to the knowledge base.""" + from app.core.web_scraper import scrape_url, WebScraperError + + # Scrape URL content + try: + content = scrape_url(request.url) + except WebScraperError as e: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) + + # Create document record + doc_id = uuid.uuid4() + document_key = f"kb-{doc_id}" + file_name = request.url[:200] + + # Save content as a text file + os.makedirs(KNOWLEDGE_UPLOAD_DIR, exist_ok=True) + file_path = os.path.join(KNOWLEDGE_UPLOAD_DIR, f"{doc_id}.txt") + with open(file_path, "w", encoding="utf-8") as f: + f.write(content) + + doc = KnowledgeDocument( + id=doc_id, + file_name=file_name, + file_type="txt", + file_size=len(content.encode("utf-8")), + document_key=document_key, + status=DocumentStatus.PENDING, + description=request.description or f"Scraped from {request.url}", + department_id=uuid.UUID(request.department_id) if request.department_id else None, + region_code=request.region_code, + uploaded_by=current_user.id, + ) + db.add(doc) + await db.commit() + await db.refresh(doc) + + # Dispatch Celery task + from app.tasks.knowledge_processing import process_knowledge_document + process_knowledge_document.delay(str(doc_id), file_path) + + logger.info("URL scraped and queued: %s by %s", request.url, current_user.email) + + return KnowledgeDocumentUploadResponse( + id=str(doc.id), + file_name=doc.file_name, + file_size=doc.file_size, + status=doc.status.value, + message="URL scraped and queued for processing", + ) diff --git a/backend/app/api/v1/endpoints/notebook.py b/backend/app/api/v1/endpoints/notebook.py deleted file mode 100644 index 70952a8..0000000 --- a/backend/app/api/v1/endpoints/notebook.py +++ /dev/null @@ -1,745 +0,0 @@ -""" -Notebook Mode API Endpoints -Proxy layer between our frontend and external NotebookLlama service - -Flow: -1. User creates session → We create Conversation + NotebookSession + External Notebook -2. User uploads file → We save locally + Upload to NotebookLlama + Poll status -3. User chats → WebSocket connection via client → SSE stream to frontend -4. User deletes session → Delete external notebook + Local files + DB records -""" -from fastapi import APIRouter, Depends, HTTPException, status, UploadFile, File, Query -from fastapi.responses import StreamingResponse -from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy import select -from typing import Optional -from uuid import UUID, uuid4 -from datetime import datetime, timedelta -import os -import shutil -import logging - -from app.database import get_db -from app.core.dependencies import get_current_user, get_current_user_flexible -from app.models.user import User -from app.models.conversation import Conversation, ConversationMode -from app.models.notebook import NotebookSession, UploadedFile, ProcessingStatus -from app.core.notebook_client import ( - NotebookLlamaClient, - NotebookLlamaAuthError, - NotebookLlamaAPIError -) -from app.config import settings - -logger = logging.getLogger(__name__) -router = APIRouter() - -# Constants -MAX_FILE_SIZE = settings.MAX_UPLOAD_SIZE_MB * 1024 * 1024 -UPLOAD_DIR = settings.UPLOAD_DIR - - -# ==================== SESSION MANAGEMENT ==================== - -@router.get("/") -async def list_notebook_sessions( - current_user: User = Depends(get_current_user), - db: AsyncSession = Depends(get_db) -): - """ - List all notebook sessions for the current user - - Returns sessions sorted by updated/created date (most recent first) - - Returns: - list: Array of session summaries - """ - result = await db.execute( - select(NotebookSession) - .where(NotebookSession.user_id == current_user.id) - .order_by(NotebookSession.created_at.desc()) - ) - sessions = result.scalars().all() - - return [ - { - "session_id": str(session.id), - "conversation_id": str(session.conversation_id), - "title": session.title, - "is_pinned": session.is_pinned, - "total_file_size": session.total_file_size, - "notebookllama_notebook_id": session.notebookllama_notebook_id, - "expires_at": session.expires_at.isoformat() if session.expires_at else None, - "created_at": session.created_at.isoformat() - } - for session in sessions - ] - - -@router.post("/create") -async def create_notebook_session( - title: Optional[str] = None, - current_user: User = Depends(get_current_user), - db: AsyncSession = Depends(get_db) -): - """ - Create a new notebook session - - This creates: - 1. A conversation record in our DB - 2. A notebook session record in our DB - 3. An external notebook on NotebookLlama service - - Returns: - dict: Session details including IDs and expiration - """ - try: - # Generate title if not provided - if not title: - title = f"Notebook - {datetime.utcnow().strftime('%Y-%m-%d %H:%M')}" - - # 1. Create conversation - conversation = Conversation( - id=uuid4(), - user_id=current_user.id, - mode="notebook", # Use string directly, not enum - title=title - ) - db.add(conversation) - await db.flush() - - # 2. Create external notebook via NotebookLlama - client = NotebookLlamaClient() - try: - nb_response = await client.create_notebook( - title=title, - description=f"Created by {current_user.email}", - model_type="openai" - ) - except NotebookLlamaAuthError as e: - await db.rollback() - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail=f"NotebookLlama authentication failed: {str(e)}" - ) - except NotebookLlamaAPIError as e: - await db.rollback() - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail=f"NotebookLlama API error: {str(e)}" - ) - - # 3. Create our session record - session = NotebookSession( - id=uuid4(), - user_id=current_user.id, - conversation_id=conversation.id, - title=title, - notebookllama_notebook_id=nb_response["id"], # Store INTEGER id - is_pinned=False, - total_file_size=0, - expires_at=datetime.utcnow() + timedelta(hours=24) - ) - db.add(session) - await db.commit() - await db.refresh(session) - - logger.info( - f"Created notebook session: session_id={session.id}, " - f"notebook_id={session.notebookllama_notebook_id}" - ) - - return { - "session_id": str(session.id), - "conversation_id": str(conversation.id), - "notebookllama_notebook_id": session.notebookllama_notebook_id, - "title": session.title, - "is_pinned": session.is_pinned, - "expires_at": session.expires_at.isoformat() if session.expires_at else None, - "created_at": session.created_at.isoformat() - } - - except HTTPException: - raise - except Exception as e: - await db.rollback() - logger.error(f"Failed to create notebook session: {str(e)}") - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=f"Failed to create session: {str(e)}" - ) - - -# ==================== FILE UPLOAD ==================== - -@router.post("/{session_id}/upload") -async def upload_file( - session_id: UUID, - file: UploadFile = File(...), - wait_for_completion: bool = Query( - default=False, - description="If true, wait for processing to complete before returning" - ), - current_user: User = Depends(get_current_user), - db: AsyncSession = Depends(get_db) -): - """ - Upload a file to a notebook session - - Flow: - 1. Validate session and file size - 2. Save file locally - 3. Upload to NotebookLlama (returns task_id) - 4. Save upload record to DB - 5. Optionally poll until processing completes - - Args: - session_id: Notebook session UUID - file: File to upload - wait_for_completion: If true, poll until processing completes - - Returns: - dict: Upload confirmation with task_id and status - """ - # Verify session - result = await db.execute( - select(NotebookSession).where( - NotebookSession.id == session_id, - NotebookSession.user_id == current_user.id - ) - ) - session = result.scalar_one_or_none() - - if not session: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="Session not found" - ) - - if not session.notebookllama_notebook_id: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="Session has no associated notebook" - ) - - # Check expiration - if session.expires_at and session.expires_at < datetime.utcnow(): - raise HTTPException( - status_code=status.HTTP_410_GONE, - detail="Session has expired" - ) - - # Read and validate file - content = await file.read() - file_size = len(content) - - if file_size > MAX_FILE_SIZE: - raise HTTPException( - status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, - detail=f"File exceeds {settings.MAX_UPLOAD_SIZE_MB}MB limit" - ) - - if session.total_file_size + file_size > MAX_FILE_SIZE: - raise HTTPException( - status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, - detail="Session storage quota exceeded" - ) - - # Save file locally - session_dir = os.path.join(UPLOAD_DIR, str(session_id)) - os.makedirs(session_dir, exist_ok=True) - - file_id = str(uuid4()) - file_ext = os.path.splitext(file.filename)[1] if file.filename else "" - storage_path = os.path.join(session_dir, f"{file_id}{file_ext}") - - try: - with open(storage_path, 'wb') as f: - f.write(content) - - # Upload to NotebookLlama - client = NotebookLlamaClient() - upload_result = await client.upload_file( - notebook_id=session.notebookllama_notebook_id, - file_path=storage_path, - file_name=file.filename or f"file{file_ext}" - ) - - task_id = upload_result["task_id"] - - # Create upload record - uploaded_file = UploadedFile( - id=uuid4(), - session_id=session.id, - file_name=file.filename or f"file{file_ext}", - file_size=file_size, - file_type=file_ext[1:] if file_ext else "unknown", - storage_path=storage_path, - notebookllama_task_id=task_id, - processing_status=ProcessingStatus.QUEUED.value - ) - db.add(uploaded_file) - - # Update session file size - session.total_file_size += file_size - await db.commit() - await db.refresh(uploaded_file) - - logger.info( - f"File uploaded: session={session_id}, file={file.filename}, task_id={task_id}" - ) - - # Optionally wait for completion - if wait_for_completion: - try: - final_status = await client.poll_until_complete(task_id, timeout=300) - - # Update our record - uploaded_file.processing_status = ProcessingStatus.COMPLETED.value - uploaded_file.processed_at = datetime.utcnow() - await db.commit() - - return { - "file_id": str(uploaded_file.id), - "file_name": uploaded_file.file_name, - "file_size": file_size, - "task_id": task_id, - "processing_status": "completed", - "message": "File uploaded and processed successfully" - } - - except TimeoutError: - logger.warning(f"Task {task_id} processing timeout") - return { - "file_id": str(uploaded_file.id), - "file_name": uploaded_file.file_name, - "file_size": file_size, - "task_id": task_id, - "processing_status": "processing", - "message": "File uploaded, still processing (timeout waiting)" - } - except NotebookLlamaAPIError as e: - # Update our record with error - uploaded_file.processing_status = ProcessingStatus.FAILED.value - uploaded_file.processing_error = str(e) - await db.commit() - - return { - "file_id": str(uploaded_file.id), - "file_name": uploaded_file.file_name, - "file_size": file_size, - "task_id": task_id, - "processing_status": "failed", - "error": str(e) - } - - # Return immediately without waiting - return { - "file_id": str(uploaded_file.id), - "file_name": uploaded_file.file_name, - "file_size": file_size, - "task_id": task_id, - "processing_status": "queued", - "message": "File uploaded, processing in background" - } - - except NotebookLlamaAPIError as e: - # Clean up file - if os.path.exists(storage_path): - os.remove(storage_path) - await db.rollback() - - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail=f"NotebookLlama upload failed: {str(e)}" - ) - except Exception as e: - # Clean up file - if os.path.exists(storage_path): - os.remove(storage_path) - await db.rollback() - - logger.error(f"Upload failed: {str(e)}") - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=f"Upload failed: {str(e)}" - ) - - -# ==================== TASK STATUS ==================== - -@router.get("/{session_id}/files/{file_id}/status") -async def get_file_status( - session_id: UUID, - file_id: UUID, - current_user: User = Depends(get_current_user), - db: AsyncSession = Depends(get_db) -): - """ - Get processing status of an uploaded file - - Returns: - dict: Processing status from NotebookLlama - """ - # Verify file belongs to user's session - result = await db.execute( - select(UploadedFile).where( - UploadedFile.id == file_id, - UploadedFile.session_id == session_id - ) - ) - uploaded_file = result.scalar_one_or_none() - - if not uploaded_file: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="File not found" - ) - - # Verify session ownership - result = await db.execute( - select(NotebookSession).where( - NotebookSession.id == session_id, - NotebookSession.user_id == current_user.id - ) - ) - session = result.scalar_one_or_none() - - if not session: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="Session not found" - ) - - # Get status from NotebookLlama - if uploaded_file.notebookllama_task_id: - try: - client = NotebookLlamaClient() - task_status = await client.get_task_status(uploaded_file.notebookllama_task_id) - - # Update our record if status changed - if task_status["status"] == "completed" and uploaded_file.processing_status != ProcessingStatus.COMPLETED.value: - uploaded_file.processing_status = ProcessingStatus.COMPLETED.value - uploaded_file.processed_at = datetime.utcnow() - await db.commit() - elif task_status["status"] == "failed" and uploaded_file.processing_status != ProcessingStatus.FAILED.value: - uploaded_file.processing_status = ProcessingStatus.FAILED.value - uploaded_file.processing_error = task_status.get("error_message") - await db.commit() - - return { - "file_id": str(uploaded_file.id), - "file_name": uploaded_file.file_name, - "processing_status": task_status["status"], - "task_id": uploaded_file.notebookllama_task_id, - "created_at": task_status.get("created_at"), - "completed_at": task_status.get("completed_at"), - "error_message": task_status.get("error_message") - } - - except NotebookLlamaAPIError as e: - logger.error(f"Failed to get task status: {str(e)}") - # Return our local status as fallback - return { - "file_id": str(uploaded_file.id), - "file_name": uploaded_file.file_name, - "processing_status": uploaded_file.processing_status, - "task_id": uploaded_file.notebookllama_task_id, - "error": str(e) - } - - # No task ID, return local status - return { - "file_id": str(uploaded_file.id), - "file_name": uploaded_file.file_name, - "processing_status": uploaded_file.processing_status, - "error_message": uploaded_file.processing_error - } - - -# ==================== CHAT ==================== - -@router.get("/{session_id}/chat", response_class=StreamingResponse) -async def chat( - session_id: UUID, - message: str = Query(..., description="User's chat message"), - current_user: User = Depends(get_current_user_flexible), - db: AsyncSession = Depends(get_db) -): - """ - Stream chat response from NotebookLlama (GET for EventSource compatibility) - - Connects to NotebookLlama WebSocket and streams response as SSE events - - Args: - session_id: Notebook session UUID - message: User's question (query parameter) - - Returns: - StreamingResponse: SSE events with tokens, sources, completion, errors - """ - # Verify session - result = await db.execute( - select(NotebookSession).where( - NotebookSession.id == session_id, - NotebookSession.user_id == current_user.id - ) - ) - session = result.scalar_one_or_none() - - if not session: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="Session not found" - ) - - if not session.notebookllama_notebook_id: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="Session has no associated notebook" - ) - - # Check expiration - if session.expires_at and session.expires_at < datetime.utcnow(): - raise HTTPException( - status_code=status.HTTP_410_GONE, - detail="Session has expired" - ) - - # Stream response from NotebookLlama - async def event_generator(): - """SSE event generator""" - try: - client = NotebookLlamaClient() - - # Create chat session in NotebookLlama - chat_session = await client.create_chat_session( - notebook_id=session.notebookllama_notebook_id, - title=f"Chat - {datetime.utcnow().strftime('%Y-%m-%d %H:%M')}" - ) - - logger.info( - f"Created NotebookLlama chat session: id={chat_session['id']}, " - f"notebook={session.notebookllama_notebook_id}" - ) - - # Stream from WebSocket (client converts to SSE format) - async for sse_event in client.chat_stream( - notebook_id=session.notebookllama_notebook_id, - message=message, - chat_session_id=chat_session['id'] - ): - yield sse_event - - except Exception as e: - logger.error(f"Chat streaming error: {str(e)}") - import json - yield f"data: {json.dumps({'error': str(e)})}\n\n" - - return StreamingResponse( - event_generator(), - media_type="text/event-stream", - headers={ - "Cache-Control": "no-cache", - "Connection": "keep-alive", - "X-Accel-Buffering": "no", - "Access-Control-Allow-Origin": "http://localhost:3000", # CORS for EventSource - "Access-Control-Allow-Credentials": "true" - } - ) - - -# ==================== SESSION DETAILS ==================== - -@router.get("/{session_id}") -async def get_session( - session_id: UUID, - current_user: User = Depends(get_current_user), - db: AsyncSession = Depends(get_db) -): - """ - Get notebook session details including uploaded files - - Returns: - dict: Session information with files list - """ - # Verify session - result = await db.execute( - select(NotebookSession).where( - NotebookSession.id == session_id, - NotebookSession.user_id == current_user.id - ) - ) - session = result.scalar_one_or_none() - - if not session: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="Session not found" - ) - - # Get uploaded files - result = await db.execute( - select(UploadedFile).where(UploadedFile.session_id == session.id) - ) - files = result.scalars().all() - - return { - "session_id": str(session.id), - "conversation_id": str(session.conversation_id), - "notebookllama_notebook_id": session.notebookllama_notebook_id, - "title": session.title, - "is_pinned": session.is_pinned, - "total_file_size": session.total_file_size, - "expires_at": session.expires_at.isoformat() if session.expires_at else None, - "created_at": session.created_at.isoformat(), - "files": [ - { - "file_id": str(f.id), - "file_name": f.file_name, - "file_size": f.file_size, - "file_type": f.file_type, - "processing_status": f.processing_status, - "processing_error": f.processing_error, - "uploaded_at": f.uploaded_at.isoformat(), - "processed_at": f.processed_at.isoformat() if f.processed_at else None - } - for f in files - ] - } - - -# ==================== PIN/UNPIN ==================== - -@router.post("/{session_id}/pin") -async def pin_session( - session_id: UUID, - current_user: User = Depends(get_current_user), - db: AsyncSession = Depends(get_db) -): - """Pin session to prevent expiration""" - result = await db.execute( - select(NotebookSession).where( - NotebookSession.id == session_id, - NotebookSession.user_id == current_user.id - ) - ) - session = result.scalar_one_or_none() - - if not session: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="Session not found" - ) - - session.is_pinned = True - session.expires_at = None - await db.commit() - - return { - "session_id": str(session.id), - "is_pinned": True, - "expires_at": None - } - - -@router.post("/{session_id}/unpin") -async def unpin_session( - session_id: UUID, - current_user: User = Depends(get_current_user), - db: AsyncSession = Depends(get_db) -): - """Unpin session and set expiration""" - result = await db.execute( - select(NotebookSession).where( - NotebookSession.id == session_id, - NotebookSession.user_id == current_user.id - ) - ) - session = result.scalar_one_or_none() - - if not session: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="Session not found" - ) - - session.is_pinned = False - session.expires_at = datetime.utcnow() + timedelta(hours=24) - await db.commit() - - return { - "session_id": str(session.id), - "is_pinned": False, - "expires_at": session.expires_at.isoformat() - } - - -# ==================== DELETE SESSION ==================== - -@router.delete("/{session_id}") -async def delete_session( - session_id: UUID, - current_user: User = Depends(get_current_user), - db: AsyncSession = Depends(get_db) -): - """ - Delete notebook session - - Deletes: - 1. External notebook from NotebookLlama - 2. Local files - 3. Database records (cascades to uploaded_files) - - Returns: - dict: Deletion confirmation - """ - result = await db.execute( - select(NotebookSession).where( - NotebookSession.id == session_id, - NotebookSession.user_id == current_user.id - ) - ) - session = result.scalar_one_or_none() - - if not session: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="Session not found" - ) - - try: - # 1. Delete from NotebookLlama - if session.notebookllama_notebook_id: - try: - client = NotebookLlamaClient() - await client.delete_notebook(session.notebookllama_notebook_id) - logger.info(f"Deleted external notebook: {session.notebookllama_notebook_id}") - except NotebookLlamaAPIError as e: - logger.warning(f"Failed to delete external notebook: {str(e)}") - # Continue anyway to clean up local resources - - # 2. Delete local files - session_dir = os.path.join(UPLOAD_DIR, str(session_id)) - if os.path.exists(session_dir): - shutil.rmtree(session_dir) - logger.info(f"Deleted local files: {session_dir}") - - # 3. Delete from database (cascade handles uploaded_files) - await db.delete(session) - await db.commit() - - logger.info(f"Deleted session: {session_id}") - - return { - "status": "deleted", - "session_id": str(session_id) - } - - except Exception as e: - await db.rollback() - logger.error(f"Session deletion failed: {str(e)}") - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=f"Deletion failed: {str(e)}" - ) diff --git a/backend/app/core/dependencies.py b/backend/app/core/dependencies.py index d8a9d6a..f657688 100644 --- a/backend/app/core/dependencies.py +++ b/backend/app/core/dependencies.py @@ -132,6 +132,19 @@ async def require_admin(current_user: User = Depends(get_current_user)) -> User: return current_user +def require_feature(feature: str): + """Dependency factory: require user to have a specific feature access.""" + async def checker(current_user: User = Depends(get_current_user)): + allowed = current_user.allowed_features or [] + if allowed and feature not in allowed: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail=f"Access to {feature} not granted" + ) + return current_user + return checker + + async def require_super_admin(current_user: User = Depends(get_current_user)) -> User: """ Dependency to require super admin role diff --git a/backend/app/core/document_processor.py b/backend/app/core/document_processor.py index bdda8af..2909a64 100644 --- a/backend/app/core/document_processor.py +++ b/backend/app/core/document_processor.py @@ -190,14 +190,17 @@ class DocumentProcessor: """Dispatch to the appropriate extractor based on file type.""" try: ft = file_type.lower().lstrip(".") - if ft == "pdf": - return self._extract_pdf(file_bytes) - elif ft in ("docx", "doc"): - return self._extract_docx(file_bytes) - elif ft in ("xlsx", "xls"): - return self._extract_xlsx(file_bytes) - elif ft == "txt": + if ft == "txt": return file_bytes.decode("utf-8", errors="replace") + elif ft == "pdf" and settings.LLAMAPARSE_API_KEY: + # Use LlamaParse for PDFs when API key is available + try: + return self._extract_with_llamaparse(file_bytes, file_name) + except Exception as exc: + logger.warning("LlamaParse failed for '%s', falling back to MarkItDown: %s", file_name, exc) + return self._extract_with_markitdown(file_bytes, file_name, ft) + elif ft in ("pdf", "docx", "doc", "xlsx", "xls", "pptx", "ppt", "csv"): + return self._extract_with_markitdown(file_bytes, file_name, ft) else: raise DocumentProcessingError( f"Unsupported file type '{file_type}' for '{file_name}'" @@ -209,38 +212,33 @@ class DocumentProcessor: f"Failed to extract text from '{file_name}': {exc}" ) from exc - def _extract_pdf(self, file_bytes: bytes) -> str: - """Extract text from PDF bytes using PyPDF2.""" - from PyPDF2 import PdfReader + def _extract_with_markitdown(self, file_bytes: bytes, file_name: str, file_type: str) -> str: + """Extract text from any supported format using MarkItDown.""" + import tempfile + from markitdown import MarkItDown - reader = PdfReader(io.BytesIO(file_bytes)) - pages = [] - for page in reader.pages: - page_text = page.extract_text() - if page_text: - pages.append(page_text) - return "\n\n".join(pages) + md = MarkItDown() + suffix = f".{file_type}" + with tempfile.NamedTemporaryFile(suffix=suffix, delete=True) as tmp: + tmp.write(file_bytes) + tmp.flush() + result = md.convert(tmp.name) + return result.text_content or "" - def _extract_docx(self, file_bytes: bytes) -> str: - """Extract text from DOCX bytes using python-docx.""" - import docx + def _extract_with_llamaparse(self, file_bytes: bytes, file_name: str) -> str: + """Extract text from PDF using LlamaParse (cloud API, better for tables).""" + import tempfile + from llama_parse import LlamaParse - doc = docx.Document(io.BytesIO(file_bytes)) - paragraphs = [p.text for p in doc.paragraphs if p.text.strip()] - return "\n\n".join(paragraphs) - - def _extract_xlsx(self, file_bytes: bytes) -> str: - """Extract text from XLSX bytes using openpyxl.""" - import openpyxl - - wb = openpyxl.load_workbook(io.BytesIO(file_bytes), data_only=True) - rows = [] - for sheet in wb.worksheets: - for row in sheet.iter_rows(values_only=True): - row_text = " | ".join(str(cell) for cell in row if cell is not None) - if row_text.strip(): - rows.append(row_text) - return "\n".join(rows) + parser = LlamaParse( + api_key=settings.LLAMAPARSE_API_KEY, + result_type="markdown", + ) + with tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) as tmp: + tmp.write(file_bytes) + tmp.flush() + documents = parser.load_data(tmp.name) + return "\n\n".join(doc.text for doc in documents) # ========================================================================= # Embeddings diff --git a/backend/app/core/notebook_client.py b/backend/app/core/notebook_client.py deleted file mode 100644 index 8a5f98a..0000000 --- a/backend/app/core/notebook_client.py +++ /dev/null @@ -1,542 +0,0 @@ -""" -NotebookLlama Client - Integration with External NotebookLlama API - -This client handles all communication with the external NotebookLlama service, -including authentication, notebook management, file uploads, and WebSocket chat. - -Architecture: -- JWT authentication with automatic token refresh -- WebSocket → SSE conversion for chat streaming -- Comprehensive error handling and logging -- Task status polling for async document processing -""" -import httpx -import websockets -import json -import logging -from typing import AsyncGenerator, Optional, Dict, Any -from datetime import datetime, timedelta -from app.config import settings - -logger = logging.getLogger(__name__) - - -class NotebookLlamaAuthError(Exception): - """Authentication failed with NotebookLlama""" - pass - - -class NotebookLlamaAPIError(Exception): - """Generic API error from NotebookLlama""" - pass - - -class NotebookLlamaClient: - """ - Client for NotebookLlama API with JWT authentication and WebSocket support - - Manages: - - Service account authentication - - Notebook CRUD operations - - File uploads with background processing - - WebSocket chat with SSE conversion - """ - - def __init__(self): - self.base_url = settings.NOTEBOOKLLAMA_URL - self.service_email = settings.NOTEBOOKLLAMA_SERVICE_EMAIL - self.service_password = settings.NOTEBOOKLLAMA_SERVICE_PASSWORD - self.timeout = httpx.Timeout(60.0, connect=10.0) - - # Token management - self._token: Optional[str] = None - self._token_expiry: Optional[datetime] = None - - logger.info(f"NotebookLlamaClient initialized (base_url={self.base_url})") - - # ==================== AUTHENTICATION ==================== - - async def _get_token(self) -> str: - """ - Get or refresh JWT token for NotebookLlama API - - Returns: - str: Valid JWT token - - Raises: - NotebookLlamaAuthError: If authentication fails - """ - # Return cached token if still valid - if self._token and self._token_expiry and self._token_expiry > datetime.utcnow(): - return self._token - - try: - async with httpx.AsyncClient(timeout=self.timeout) as client: - response = await client.post( - f"{self.base_url}/api/auth/login", - json={ - "email": self.service_email, - "password": self.service_password - } - ) - response.raise_for_status() - data = response.json() - - self._token = data["access_token"] - # Refresh token 10 minutes before expiry (assume 1 hour expiry) - self._token_expiry = datetime.utcnow() + timedelta(minutes=50) - - logger.info("Successfully authenticated with NotebookLlama") - return self._token - - except httpx.HTTPError as e: - logger.error(f"Authentication failed: {str(e)}") - raise NotebookLlamaAuthError(f"Failed to authenticate: {str(e)}") - - async def _request( - self, - method: str, - path: str, - **kwargs - ) -> httpx.Response: - """ - Make authenticated HTTP request to NotebookLlama API - - Args: - method: HTTP method (GET, POST, DELETE, etc.) - path: API path (e.g., "/api/notebooks") - **kwargs: Additional arguments for httpx request - - Returns: - httpx.Response: Response object - - Raises: - NotebookLlamaAPIError: If request fails - """ - try: - # Get valid token - token = await self._get_token() - - # Add authorization header - headers = kwargs.pop("headers", {}) - headers["Authorization"] = f"Bearer {token}" - - # Make request - async with httpx.AsyncClient(timeout=self.timeout) as client: - response = await client.request( - method, - f"{self.base_url}{path}", - headers=headers, - **kwargs - ) - response.raise_for_status() - return response - - except httpx.HTTPStatusError as e: - logger.error(f"API request failed: {method} {path} - Status {e.response.status_code}") - raise NotebookLlamaAPIError( - f"API request failed: {e.response.status_code} - {e.response.text}" - ) - except httpx.HTTPError as e: - logger.error(f"HTTP error: {method} {path} - {str(e)}") - raise NotebookLlamaAPIError(f"HTTP error: {str(e)}") - - # ==================== NOTEBOOK MANAGEMENT ==================== - - async def create_notebook( - self, - title: str, - description: Optional[str] = None, - model_type: str = "openai" - ) -> Dict[str, Any]: - """ - Create a new notebook on NotebookLlama - - Args: - title: Notebook title - description: Optional description - model_type: LLM model to use ("openai", "gemini", "claude") - - Returns: - dict: Notebook data with integer 'id' field - - Example response: - { - "id": 123, - "name": "My Notebook", - "description": null, - "model_type": "openai", - "pipeline_id": "llamaindex-abc123", - "created_at": "2024-...", - "updated_at": "2024-..." - } - """ - try: - response = await self._request( - "POST", - "/api/notebooks/", - json={ - "name": title, - "description": description, - "model_type": model_type - } - ) - data = response.json() - - logger.info(f"Created notebook: id={data['id']}, title={title}") - return data - - except Exception as e: - logger.error(f"Failed to create notebook: {str(e)}") - raise - - async def get_notebook(self, notebook_id: int) -> Dict[str, Any]: - """ - Get notebook details from NotebookLlama - - Args: - notebook_id: Integer notebook ID - - Returns: - dict: Notebook details including documents - """ - try: - response = await self._request("GET", f"/api/notebooks/{notebook_id}") - return response.json() - - except Exception as e: - logger.error(f"Failed to get notebook {notebook_id}: {str(e)}") - raise - - async def delete_notebook(self, notebook_id: int) -> Dict[str, str]: - """ - Delete a notebook from NotebookLlama - - Args: - notebook_id: Integer notebook ID - - Returns: - dict: Deletion confirmation - """ - try: - response = await self._request("DELETE", f"/api/notebooks/{notebook_id}") - data = response.json() - - logger.info(f"Deleted notebook: id={notebook_id}") - return data - - except Exception as e: - logger.error(f"Failed to delete notebook {notebook_id}: {str(e)}") - raise - - # ==================== FILE UPLOAD & PROCESSING ==================== - - async def upload_file( - self, - notebook_id: int, - file_path: str, - file_name: str, - user_id: int = 1 - ) -> Dict[str, Any]: - """ - Upload a file to a NotebookLlama notebook - - Args: - notebook_id: Target notebook ID - file_path: Local path to file - file_name: Original filename - user_id: User ID for tracking (default=1) - - Returns: - dict: Upload response with task_id for background processing - - Example response: - { - "task_id": 456, - "filename": "document.pdf", - "status": "queued", - "message": "Document uploaded and queued for processing" - } - """ - try: - # Get authenticated token - token = await self._get_token() - - # Prepare multipart upload - async with httpx.AsyncClient(timeout=self.timeout) as client: - with open(file_path, 'rb') as f: - files = { - 'file': (file_name, f, 'application/octet-stream') - } - - response = await client.post( - f"{self.base_url}/api/documents/upload/{notebook_id}", - files=files, - params={"user_id": user_id}, - headers={"Authorization": f"Bearer {token}"} - ) - response.raise_for_status() - data = response.json() - - logger.info( - f"Uploaded file to notebook {notebook_id}: " - f"file={file_name}, task_id={data['task_id']}" - ) - return data - - except httpx.HTTPError as e: - logger.error(f"File upload failed: {str(e)}") - raise NotebookLlamaAPIError(f"File upload failed: {str(e)}") - except IOError as e: - logger.error(f"Failed to read file {file_path}: {str(e)}") - raise - - async def get_task_status(self, task_id: int) -> Dict[str, Any]: - """ - Check the status of a document processing task - - Args: - task_id: Task ID from upload response - - Returns: - dict: Task status information - - Example response: - { - "id": 456, - "status": "completed", // "queued", "processing", "failed" - "filename": "document.pdf", - "created_at": "2024-...", - "completed_at": "2024-...", - "error_message": null - } - """ - try: - response = await self._request("GET", f"/api/documents/task/{task_id}") - return response.json() - - except Exception as e: - logger.error(f"Failed to get task status {task_id}: {str(e)}") - raise - - async def poll_until_complete( - self, - task_id: int, - timeout: int = 300, - poll_interval: int = 2 - ) -> Dict[str, Any]: - """ - Poll task status until completion or timeout - - Args: - task_id: Task ID to poll - timeout: Maximum wait time in seconds (default: 5 minutes) - poll_interval: Seconds between polls (default: 2) - - Returns: - dict: Final task status - - Raises: - TimeoutError: If task doesn't complete within timeout - NotebookLlamaAPIError: If task fails - """ - import asyncio - - start_time = datetime.utcnow() - max_time = start_time + timedelta(seconds=timeout) - - while datetime.utcnow() < max_time: - status = await self.get_task_status(task_id) - - if status["status"] == "completed": - logger.info(f"Task {task_id} completed successfully") - return status - elif status["status"] == "failed": - error = status.get("error_message", "Unknown error") - logger.error(f"Task {task_id} failed: {error}") - raise NotebookLlamaAPIError(f"Task processing failed: {error}") - - # Still processing, wait and retry - await asyncio.sleep(poll_interval) - - # Timeout reached - raise TimeoutError(f"Task {task_id} did not complete within {timeout} seconds") - - # ==================== CHAT SESSION MANAGEMENT ==================== - - async def create_chat_session( - self, - notebook_id: int, - title: Optional[str] = None - ) -> Dict[str, Any]: - """ - Create a new chat session for a notebook - - Args: - notebook_id: Notebook ID - title: Optional session title - - Returns: - dict: Chat session data with id field - - Example response: - { - "id": 123, - "notebook_id": 456, - "title": "Chat Session", - "created_at": "2024-..." - } - """ - try: - params = {} - if title: - params["title"] = title - - response = await self._request( - "POST", - f"/api/chat/{notebook_id}/sessions", - params=params - ) - data = response.json() - - logger.info(f"Created chat session: id={data['id']}, notebook={notebook_id}") - return data - - except Exception as e: - logger.error(f"Failed to create chat session for notebook {notebook_id}: {str(e)}") - raise - - # ==================== WEBSOCKET CHAT ==================== - - async def chat_stream( - self, - notebook_id: int, - message: str, - chat_session_id: Optional[int] = None - ) -> AsyncGenerator[str, None]: - """ - Stream chat response from NotebookLlama via WebSocket - - Connects to NotebookLlama WebSocket, sends message, and yields - responses in SSE format compatible with our frontend. - - Args: - notebook_id: Notebook ID to query - message: User's question - chat_session_id: Optional chat session ID for history - - Yields: - str: SSE-formatted events for frontend - - SSE Event Format: - data: {"token": "Hello"} - data: {"sources": "## Sources\n..."} - data: {"done": true} - data: {"error": "Error message"} - """ - try: - # Get JWT token - token = await self._get_token() - - # Construct WebSocket URL - ws_url = self.base_url.replace('http', 'ws').replace('https', 'wss') - ws_path = f"/api/chat/ws/{notebook_id}" - - if chat_session_id: - ws_path += f"?session_id={chat_session_id}" - - full_ws_url = f"{ws_url}{ws_path}" - - logger.info(f"Connecting to WebSocket: {full_ws_url}") - - async with websockets.connect( - full_ws_url, - extra_headers={"Authorization": f"Bearer {token}"} - ) as websocket: - # Wait for connection confirmation - conn_msg = await websocket.recv() - conn_data = json.loads(conn_msg) - - if conn_data.get("type") != "connected": - logger.error(f"Unexpected connection response: {conn_data}") - yield f"data: {json.dumps({'error': 'Failed to connect to chat'})}\n\n" - return - - logger.info( - f"WebSocket connected: notebook={conn_data.get('notebook_id')}, " - f"session={conn_data.get('session_id')}" - ) - - # Send question - await websocket.send(json.dumps({"question": message})) - - # Stream responses - async for ws_msg in websocket: - data = json.loads(ws_msg) - msg_type = data.get("type") - - if msg_type == "processing": - # Acknowledge processing (optional) - logger.debug("Processing message received") - continue - - elif msg_type == "response": - # Stream the main response - response_text = data.get("response", "") - if response_text: - yield f"data: {json.dumps({'token': response_text})}\n\n" - - # Stream sources separately if present - sources = data.get("sources") - if sources: - yield f"data: {json.dumps({'sources': sources})}\n\n" - - # Send completion event - yield f"data: {json.dumps({'done': True})}\n\n" - break - - elif msg_type == "error": - error_msg = data.get("message", "Unknown error") - logger.error(f"WebSocket error: {error_msg}") - yield f"data: {json.dumps({'error': error_msg})}\n\n" - break - - else: - logger.warning(f"Unknown message type: {msg_type}") - - except websockets.exceptions.WebSocketException as e: - logger.error(f"WebSocket connection failed: {str(e)}") - yield f"data: {json.dumps({'error': f'Connection failed: {str(e)}'})}\n\n" - - except Exception as e: - logger.error(f"Chat stream error: {str(e)}") - yield f"data: {json.dumps({'error': f'Stream error: {str(e)}'})}\n\n" - - # ==================== CHAT HISTORY ==================== - - async def get_chat_history( - self, - notebook_id: int, - limit: int = 50 - ) -> list[Dict[str, Any]]: - """ - Get chat history for a notebook - - Args: - notebook_id: Notebook ID - limit: Maximum number of messages to retrieve - - Returns: - list: Chat messages with role, content, sources, etc. - """ - try: - response = await self._request( - "GET", - f"/api/chat/{notebook_id}/history", - params={"limit": limit} - ) - return response.json() - - except Exception as e: - logger.error(f"Failed to get chat history for notebook {notebook_id}: {str(e)}") - raise diff --git a/backend/app/core/web_scraper.py b/backend/app/core/web_scraper.py new file mode 100644 index 0000000..2cb18f6 --- /dev/null +++ b/backend/app/core/web_scraper.py @@ -0,0 +1,67 @@ +""" +Web Scraper — extract clean text from URLs using trafilatura +""" +import logging +from typing import Optional +from urllib.parse import urlparse + +import trafilatura + +logger = logging.getLogger(__name__) + +MAX_CONTENT_LENGTH = 500_000 # ~500KB of text +TIMEOUT = 30 # seconds + + +class WebScraperError(Exception): + """Raised when URL scraping fails.""" + + +def validate_url(url: str) -> str: + """Validate and normalize URL.""" + parsed = urlparse(url) + if parsed.scheme not in ("http", "https"): + raise WebScraperError(f"Invalid URL scheme: {parsed.scheme}. Only http/https allowed.") + if not parsed.netloc: + raise WebScraperError("Invalid URL: no host found.") + return url + + +def scrape_url(url: str, output_format: str = "markdown") -> str: + """ + Fetch and extract main content from a URL. + + Args: + url: The URL to scrape + output_format: "markdown" or "text" + + Returns: + Extracted content as markdown or plain text + + Raises: + WebScraperError: If fetching or extraction fails + """ + url = validate_url(url) + logger.info("Scraping URL: %s", url) + + downloaded = trafilatura.fetch_url(url) + if not downloaded: + raise WebScraperError(f"Failed to fetch URL: {url}") + + result = trafilatura.extract( + downloaded, + output_format=output_format, + include_links=True, + include_tables=True, + include_images=False, + ) + + if not result: + raise WebScraperError(f"No content extracted from URL: {url}") + + if len(result) > MAX_CONTENT_LENGTH: + logger.warning("Content truncated from %d to %d chars for URL: %s", len(result), MAX_CONTENT_LENGTH, url) + result = result[:MAX_CONTENT_LENGTH] + + logger.info("Extracted %d chars from %s", len(result), url) + return result diff --git a/backend/app/models/conversation.py b/backend/app/models/conversation.py index 3ba5a9b..c8a39fc 100644 --- a/backend/app/models/conversation.py +++ b/backend/app/models/conversation.py @@ -14,7 +14,6 @@ class ConversationMode(enum.Enum): """Conversation modes""" RAG = "rag" ASSISTANT = "assistant" - NOTEBOOK = "notebook" class MessageRole(enum.Enum): @@ -38,7 +37,6 @@ class Conversation(Base): # Relationships user = relationship("User", back_populates="conversations") messages = relationship("Message", back_populates="conversation", cascade="all, delete-orphan") - notebook_session = relationship("NotebookSession", back_populates="conversation", uselist=False) class Message(Base): diff --git a/backend/app/models/knowledge_document.py b/backend/app/models/knowledge_document.py new file mode 100644 index 0000000..4ce8568 --- /dev/null +++ b/backend/app/models/knowledge_document.py @@ -0,0 +1,57 @@ +""" +Knowledge Base document model for admin-uploaded RAG documents +""" +from sqlalchemy import Column, String, Boolean, DateTime, Integer, BigInteger, Text, ForeignKey, Enum as SQLEnum +from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy.orm import relationship +from app.database import Base +import uuid +from datetime import datetime +import enum + + +class DocumentStatus(str, enum.Enum): + """Knowledge document processing status""" + PENDING = "pending" + PROCESSING = "processing" + COMPLETED = "completed" + FAILED = "failed" + + +class KnowledgeDocument(Base): + """ + Tracks documents uploaded via admin panel for RAG knowledge base. + document_key maps to sharepoint_id in Qdrant (DocumentProcessor reuse). + """ + __tablename__ = "knowledge_documents" + + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + file_name = Column(String(512), nullable=False) + file_type = Column(String(50), nullable=False) # pdf, docx, xlsx, txt + file_size = Column(BigInteger, nullable=False) + + # Unique key for Qdrant mapping (maps to sharepoint_id in DocumentProcessor) + document_key = Column(String(255), unique=True, nullable=False, index=True) + + # Processing status + status = Column(SQLEnum(DocumentStatus), default=DocumentStatus.PENDING, nullable=False, index=True) + vector_count = Column(Integer, default=0, nullable=False) + error_message = Column(Text, nullable=True) + + # Metadata + description = Column(Text, nullable=True) + department_id = Column(UUID(as_uuid=True), ForeignKey("departments.id", ondelete="SET NULL"), nullable=True, index=True) + region_code = Column(String(10), nullable=True) + + # Ownership + uploaded_by = Column(UUID(as_uuid=True), ForeignKey("users.id", ondelete="SET NULL"), nullable=True, index=True) + is_active = Column(Boolean, default=True, nullable=False, index=True) + + # Timestamps + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False) + processed_at = Column(DateTime, nullable=True) + + # Relationships + uploader = relationship("User", foreign_keys=[uploaded_by]) + department = relationship("Department", foreign_keys=[department_id]) diff --git a/backend/app/models/notebook.py b/backend/app/models/notebook.py deleted file mode 100644 index fef025b..0000000 --- a/backend/app/models/notebook.py +++ /dev/null @@ -1,92 +0,0 @@ -""" -Notebook Mode Models for NotebookLlama Integration -Handles file uploads and isolated analysis sessions -""" -from sqlalchemy import Column, String, Boolean, DateTime, BigInteger, Integer, ForeignKey -from sqlalchemy.dialects.postgresql import UUID -from sqlalchemy.orm import relationship -from app.database import Base -import uuid -from datetime import datetime -import enum - - -class ProcessingStatus(enum.Enum): - """Processing status for document uploads""" - QUEUED = "queued" - PROCESSING = "processing" - COMPLETED = "completed" - FAILED = "failed" - - -class NotebookSession(Base): - """ - Notebook session table - Maps our internal session to external NotebookLlama notebook - """ - __tablename__ = "notebook_sessions" - - id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) - user_id = Column(UUID(as_uuid=True), ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True) - conversation_id = Column(UUID(as_uuid=True), ForeignKey("conversations.id", ondelete="CASCADE"), nullable=False, index=True) - title = Column(String(255), nullable=True) - - # External NotebookLlama notebook ID (INTEGER from their API) - notebookllama_notebook_id = Column(Integer, nullable=True, unique=True) - - # Pin feature: pinned sessions never expire - is_pinned = Column(Boolean, default=False, nullable=False) - - # Track total file size for quota management - total_file_size = Column(BigInteger, default=0, nullable=False) - - # Expiration: NULL if pinned, NOW() + 24h otherwise - expires_at = Column(DateTime, nullable=True) - - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) - - # Relationships - user = relationship("User", back_populates="notebook_sessions") - conversation = relationship("Conversation", back_populates="notebook_session") - uploaded_files = relationship("UploadedFile", back_populates="session", cascade="all, delete-orphan") - - def __repr__(self): - return f"" - - -class UploadedFile(Base): - """ - Uploaded files table - Tracks files uploaded to NotebookLlama sessions with processing status - """ - __tablename__ = "uploaded_files" - - id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) - session_id = Column(UUID(as_uuid=True), ForeignKey("notebook_sessions.id", ondelete="CASCADE"), nullable=False, index=True) - - file_name = Column(String(255), nullable=False) - file_size = Column(BigInteger, nullable=False) - file_type = Column(String(50), nullable=True) # pdf, docx, xlsx, etc. - - # Local storage path - storage_path = Column(String(500), nullable=False) - - # External NotebookLlama task ID (from background processing) - notebookllama_task_id = Column(Integer, nullable=True) - - # Processing status tracking - processing_status = Column( - String(50), - default=ProcessingStatus.QUEUED.value, - nullable=False - ) - processing_error = Column(String(500), nullable=True) - - uploaded_at = Column(DateTime, default=datetime.utcnow, nullable=False) - processed_at = Column(DateTime, nullable=True) - - # Relationships - session = relationship("NotebookSession", back_populates="uploaded_files") - - def __repr__(self): - return f"" diff --git a/backend/app/models/user.py b/backend/app/models/user.py index 0e8f054..aa04c15 100644 --- a/backend/app/models/user.py +++ b/backend/app/models/user.py @@ -2,7 +2,7 @@ User Model with Role-Based Access Control (RBAC) """ from sqlalchemy import Column, String, Boolean, DateTime, Enum, ForeignKey -from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy.dialects.postgresql import UUID, JSONB from sqlalchemy.orm import relationship from app.database import Base import uuid @@ -28,16 +28,18 @@ class User(Base): entra_id = Column(String, unique=True, nullable=False, index=True) # Azure AD object ID email = Column(String, unique=True, nullable=False, index=True) display_name = Column(String) - role = Column(String, default="user", nullable=False) # Changed from Enum to String for simplicity - department_id = Column(UUID(as_uuid=True), nullable=True) # Temporarily removed FK + role = Column(String, default="user", nullable=False) + department_id = Column(UUID(as_uuid=True), nullable=True) is_active = Column(Boolean, default=True, nullable=False) created_at = Column(DateTime, default=datetime.utcnow, nullable=False) last_login_at = Column(DateTime, nullable=True) - # Relationships (Department temporarily disabled) - # department = relationship("Department", back_populates="users") + # Access Control — features granted via AD Groups or admin override + allowed_features = Column(JSONB, default=list, nullable=False, server_default='[]') + features_override = Column(Boolean, default=False, nullable=False, server_default='false') + + # Relationships conversations = relationship("Conversation", back_populates="user", cascade="all, delete-orphan") - notebook_sessions = relationship("NotebookSession", back_populates="user", cascade="all, delete-orphan") def __repr__(self): - return f"" + return f"" diff --git a/backend/app/rag/retriever.py b/backend/app/rag/retriever.py index 9477c49..f1a84d6 100644 --- a/backend/app/rag/retriever.py +++ b/backend/app/rag/retriever.py @@ -174,10 +174,11 @@ class RAGRetriever: department_id: Optional[str] = None, region_code: Optional[str] = None, top_k: int = 5, - system_prompt: Optional[str] = None + system_prompt: Optional[str] = None, + conversation_history: Optional[List[Dict]] = None, ) -> AsyncGenerator[str, None]: """ - Complete RAG query pipeline with streaming response + Complete RAG query pipeline with streaming response and multi-turn support. Args: user_query: User's question @@ -185,6 +186,7 @@ class RAGRetriever: region_code: Optional region filter top_k: Number of documents to retrieve system_prompt: Optional system prompt override + conversation_history: Previous messages for multi-turn context Yields: Response tokens as they're generated @@ -210,17 +212,19 @@ If the context does not contain the answer, respond: "I don't have enough inform Always cite sources using numbered references [1], [2], etc. when you use information from the context.""" if not context: - # No documents found yield "I don't have any relevant documents to answer this question. Please try rephrasing or check if documents have been uploaded to the system." return - messages = [ - {"role": "system", "content": system_prompt}, - { - "role": "user", - "content": f"**Context:**\n{context}\n\n**Question:** {user_query}" - } - ] + messages = [{"role": "system", "content": system_prompt}] + + # Include conversation history for multi-turn + if conversation_history: + messages.extend(conversation_history) + + messages.append({ + "role": "user", + "content": f"**Context:**\n{context}\n\n**Question:** {user_query}" + }) # 5. Stream LLM response async for token in LLMFactory.stream_completion(mode="rag", messages=messages): diff --git a/backend/app/schemas/chat.py b/backend/app/schemas/chat.py index 2aa9ebc..5df1cc8 100644 --- a/backend/app/schemas/chat.py +++ b/backend/app/schemas/chat.py @@ -10,14 +10,14 @@ class ChatRequest(BaseModel): """Request schema for chat endpoint""" message: str = Field(..., min_length=1, max_length=10000, description="User's message") conversation_id: Optional[UUID] = Field(None, description="Existing conversation ID (creates new if not provided)") - mode: Optional[str] = Field("rag", description="Chat mode: 'rag', 'assistant', or 'notebook'") + mode: Optional[str] = Field("rag", description="Chat mode: 'rag', 'assistant', or 'assistant'") department_id: Optional[str] = Field(None, description="Department ID for filtering (optional)") region_code: Optional[str] = Field(None, description="Region code for filtering (optional)") class ConversationCreate(BaseModel): """Schema for creating a new conversation""" - mode: str = Field(..., description="Conversation mode: 'rag', 'assistant', or 'notebook'") + mode: str = Field(..., description="Conversation mode: 'rag', 'assistant', or 'assistant'") title: Optional[str] = Field(None, description="Conversation title") diff --git a/backend/app/schemas/knowledge_document.py b/backend/app/schemas/knowledge_document.py new file mode 100644 index 0000000..246184f --- /dev/null +++ b/backend/app/schemas/knowledge_document.py @@ -0,0 +1,41 @@ +""" +Pydantic schemas for Knowledge Base documents +""" +from pydantic import BaseModel +from typing import Optional, List +from datetime import datetime + + +class KnowledgeDocumentResponse(BaseModel): + id: str + file_name: str + file_type: str + file_size: int + document_key: str + status: str + vector_count: int + error_message: Optional[str] = None + description: Optional[str] = None + department_id: Optional[str] = None + region_code: Optional[str] = None + uploaded_by: Optional[str] = None + is_active: bool + created_at: datetime + updated_at: datetime + processed_at: Optional[datetime] = None + + class Config: + from_attributes = True + + +class KnowledgeDocumentListResponse(BaseModel): + documents: List[KnowledgeDocumentResponse] + total: int + + +class KnowledgeDocumentUploadResponse(BaseModel): + id: str + file_name: str + file_size: int + status: str + message: str diff --git a/backend/app/tasks/knowledge_processing.py b/backend/app/tasks/knowledge_processing.py new file mode 100644 index 0000000..8f1c946 --- /dev/null +++ b/backend/app/tasks/knowledge_processing.py @@ -0,0 +1,109 @@ +""" +Celery Task for processing Knowledge Base documents uploaded via admin panel. + +Reads the uploaded file from disk, runs it through DocumentProcessor +(same pipeline as SharePoint sync), and updates the DB record. +""" +import asyncio +import logging +import os +from datetime import datetime +from typing import Any, Dict +from uuid import UUID + +from sqlalchemy import select + +from app.core.document_processor import DocumentProcessor, DocumentProcessingError +from app.database import AsyncSessionLocal +from app.models.knowledge_document import KnowledgeDocument, DocumentStatus +from celery_app import celery_app + +logger = logging.getLogger(__name__) + + +@celery_app.task( + name="app.tasks.knowledge_processing.process_knowledge_document", + bind=True, + max_retries=2, + default_retry_delay=30, +) +def process_knowledge_document(self, document_id: str, file_path: str) -> Dict[str, Any]: + """ + Process an uploaded knowledge base document. + + Args: + document_id: UUID string of the KnowledgeDocument record + file_path: Path to the uploaded temp file on disk + + Returns: + Dict with processing result + """ + return asyncio.run(_async_process_knowledge_document(document_id, file_path)) + + +async def _async_process_knowledge_document(document_id: str, file_path: str) -> Dict[str, Any]: + """Async implementation of process_knowledge_document.""" + async with AsyncSessionLocal() as session: + # Load document record + result = await session.execute( + select(KnowledgeDocument).where(KnowledgeDocument.id == UUID(document_id)) + ) + doc = result.scalar_one_or_none() + + if not doc: + logger.error("KnowledgeDocument %s not found", document_id) + return {"error": "document_not_found"} + + # Update status to PROCESSING + doc.status = DocumentStatus.PROCESSING + await session.commit() + + try: + # Read file bytes + with open(file_path, "rb") as f: + file_bytes = f.read() + + # Process through DocumentProcessor (same pipeline as SharePoint) + processor = DocumentProcessor() + vector_count = await processor.process_document( + file_bytes=file_bytes, + file_name=doc.file_name, + file_type=doc.file_type, + sharepoint_id=doc.document_key, # Maps to sharepoint_id in Qdrant + file_url="", # No URL for uploaded files + source_id="knowledge_base", # Distinguishes from SharePoint docs + department_id=str(doc.department_id) if doc.department_id else None, + region_code=doc.region_code, + ) + + # Success: update record + doc.status = DocumentStatus.COMPLETED + doc.vector_count = vector_count + doc.processed_at = datetime.utcnow() + doc.error_message = None + await session.commit() + + logger.info( + "Knowledge document %s processed: %d vectors", + document_id, vector_count, + ) + return { + "document_id": document_id, + "vector_count": vector_count, + "status": "completed", + } + + except Exception as exc: + logger.exception("Failed to process knowledge document %s: %s", document_id, exc) + doc.status = DocumentStatus.FAILED + doc.error_message = str(exc)[:2000] + await session.commit() + return {"error": str(exc), "document_id": document_id} + + finally: + # Clean up temp file + try: + if os.path.exists(file_path): + os.remove(file_path) + except OSError as e: + logger.warning("Failed to remove temp file %s: %s", file_path, e) diff --git a/backend/celery_app.py b/backend/celery_app.py index 8e56aef..555fe41 100644 --- a/backend/celery_app.py +++ b/backend/celery_app.py @@ -24,7 +24,7 @@ celery_app = Celery( "nexus", broker=settings.CELERY_BROKER_URL, backend=settings.CELERY_RESULT_BACKEND, - include=["app.tasks.sharepoint_sync"], + include=["app.tasks.sharepoint_sync", "app.tasks.knowledge_processing"], ) celery_app.conf.update( @@ -40,6 +40,7 @@ celery_app.conf.update( # Task routing — all SharePoint tasks go to a dedicated queue task_routes={ "app.tasks.sharepoint_sync.*": {"queue": "sharepoint"}, + "app.tasks.knowledge_processing.*": {"queue": "default"}, }, # Result retention: keep results for 24 hours diff --git a/backend/scripts/ingest_local_file.py b/backend/scripts/ingest_local_file.py index 2b48cf0..162e4a2 100755 --- a/backend/scripts/ingest_local_file.py +++ b/backend/scripts/ingest_local_file.py @@ -22,7 +22,7 @@ from datetime import datetime # Add parent directory to path to import app modules sys.path.insert(0, str(Path(__file__).parent.parent)) -from PyPDF2 import PdfReader +from markitdown import MarkItDown from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_openai import OpenAIEmbeddings from qdrant_client import QdrantClient @@ -33,30 +33,24 @@ from dotenv import load_dotenv load_dotenv() -def extract_text_from_pdf(file_path: str) -> str: +def extract_text_from_file(file_path: str) -> str: """ - Extract text from PDF file + Extract text from any supported file using MarkItDown Args: - file_path: Path to PDF file + file_path: Path to file Returns: - Extracted text + Extracted text (markdown) """ print(f"📄 Extracting text from: {file_path}") - text_parts = [] try: - reader = PdfReader(file_path) - for i, page in enumerate(reader.pages, start=1): - page_text = page.extract_text() - if page_text: - text_parts.append(page_text) - print(f" ✓ Extracted page {i}/{len(reader.pages)}") - - full_text = "\n\n".join(text_parts) - print(f" ✓ Total characters extracted: {len(full_text)}") - return full_text + md = MarkItDown() + result = md.convert(file_path) + text = result.text_content or "" + print(f" ✓ Total characters extracted: {len(text)}") + return text except Exception as e: print(f" ✗ Error extracting text: {e}") @@ -214,7 +208,7 @@ def upload_to_qdrant( "text": chunk, "department_id": department_id, "region_code": region_code, - "file_type": "pdf", + "file_type": file_type, "last_modified": datetime.utcnow().isoformat(), "is_active": True } @@ -245,8 +239,8 @@ async def main(): description="Ingest a local PDF file into Qdrant for RAG testing" ) parser.add_argument( - "pdf_path", - help="Path to the PDF file to ingest" + "file_path", + help="Path to the file to ingest (PDF, DOCX, PPTX, XLSX, CSV, TXT)" ) parser.add_argument( "--title", @@ -279,8 +273,8 @@ async def main(): args = parser.parse_args() # Validate file exists - if not os.path.exists(args.pdf_path): - print(f"❌ Error: File not found: {args.pdf_path}") + if not os.path.exists(args.file_path): + print(f"❌ Error: File not found: {args.file_path}") sys.exit(1) # Get environment variables @@ -291,14 +285,17 @@ async def main(): print("❌ Error: OPENAI_API_KEY environment variable not set") sys.exit(1) - # Extract file name - file_name = args.title or Path(args.pdf_path).name + # Extract file name and type + file_path = Path(args.file_path) + file_name = args.title or file_path.name + file_type = file_path.suffix.lstrip(".").lower() print("=" * 80) - print("📚 PDF Ingestion Script") + print("📚 Document Ingestion Script") print("=" * 80) - print(f"File: {args.pdf_path}") + print(f"File: {args.file_path}") print(f"Title: {file_name}") + print(f"Type: {file_type}") print(f"Department ID: {args.department_id or 'None'}") print(f"Region: {args.region_code or 'None'}") print(f"Qdrant URL: {qdrant_url}") @@ -307,7 +304,7 @@ async def main(): try: # 1. Extract text - text = extract_text_from_pdf(args.pdf_path) + text = extract_text_from_file(args.file_path) # 2. Chunk text chunks = chunk_text(text, args.chunk_size, args.chunk_overlap) diff --git a/docker-compose.yml b/docker-compose.yml index 8867a7a..f7f3498 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -69,9 +69,7 @@ services: JWT_ALGORITHM: ${JWT_ALGORITHM:-HS256} JWT_EXPIRATION_MINUTES: ${JWT_EXPIRATION_MINUTES:-15} REFRESH_TOKEN_EXPIRATION_DAYS: ${REFRESH_TOKEN_EXPIRATION_DAYS:-7} - NOTEBOOKLLAMA_URL: ${NOTEBOOKLLAMA_URL} - NOTEBOOKLLAMA_SERVICE_EMAIL: ${NOTEBOOKLLAMA_SERVICE_EMAIL} - NOTEBOOKLLAMA_SERVICE_PASSWORD: ${NOTEBOOKLLAMA_SERVICE_PASSWORD} + LLAMAPARSE_API_KEY: ${LLAMAPARSE_API_KEY:-} MAX_UPLOAD_SIZE_MB: ${MAX_UPLOAD_SIZE_MB:-100} ports: - "8000:8000" diff --git a/frontend/app/admin/page.tsx b/frontend/app/admin/page.tsx index 7ca3517..042ad86 100644 --- a/frontend/app/admin/page.tsx +++ b/frontend/app/admin/page.tsx @@ -1,10 +1,6 @@ 'use client'; -// ============================================ -// Admin Dashboard Page -// ============================================ - -import { useEffect, useState } from 'react'; +import { useEffect, useState, useCallback, useRef } from 'react'; import { useRouter } from 'next/navigation'; import { useAuthStore } from '@/store/useAuthStore'; import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs'; @@ -14,8 +10,13 @@ import { Input } from '@/components/ui/input'; import { Label } from '@/components/ui/label'; import { Badge } from '@/components/ui/badge'; import apiClient from '@/lib/api-client'; -import { Shield, Users, Settings, BarChart3, Loader2, Save } from 'lucide-react'; -import type { User, UserRole } from '@/types'; +import { + Shield, Users, Settings, BarChart3, Loader2, Save, FileText, Trash2, + Globe, BookOpenCheck, Sparkles, Check, X, Puzzle, +} from 'lucide-react'; +import type { User, UserRole, KnowledgeDocument } from '@/types'; +import { KnowledgeUploader } from '@/components/admin/knowledge-uploader'; +import { IntegrationsTab } from '@/components/admin/integrations-tab'; export default function AdminPage() { const router = useRouter(); @@ -30,27 +31,30 @@ export default function AdminPage() { const [saving, setSaving] = useState(false); const [message, setMessage] = useState<{ type: 'success' | 'error'; text: string } | null>(null); - // Role protection + // Knowledge Base state + const [knowledgeDocs, setKnowledgeDocs] = useState([]); + const [knowledgeTotal, setKnowledgeTotal] = useState(0); + const [knowledgeLoading, setKnowledgeLoading] = useState(false); + const [deletingId, setDeletingId] = useState(null); + const pollIntervalRef = useRef | null>(null); + + // URL scraping state + const [scrapeUrl, setScrapeUrl] = useState(''); + const [scrapeDescription, setScrapeDescription] = useState(''); + const [scraping, setScraping] = useState(false); + + const isAdmin = user?.role === 'super_admin' || user?.role === 'content_manager'; + const isSuperAdmin = user?.role === 'super_admin'; + useEffect(() => { - if (!user) { - router.push('/login'); - return; - } - - if (user.role !== 'super_admin') { - router.push('/chat'); - return; - } - + if (!user) { router.push('/login'); return; } + if (!isAdmin) { router.push('/chat'); return; } setLoading(false); - }, [user, router]); + }, [user, router, isAdmin]); - // Fetch users useEffect(() => { - if (!loading && user?.role === 'super_admin') { - fetchUsers(); - } - }, [loading, user]); + if (!loading && isSuperAdmin) fetchUsers(); + }, [loading, isSuperAdmin]); const fetchUsers = async () => { try { @@ -61,49 +65,117 @@ export default function AdminPage() { } }; + const fetchKnowledgeDocs = useCallback(async () => { + try { + const { data } = await apiClient.get('/admin/knowledge/documents'); + setKnowledgeDocs(data.documents || []); + setKnowledgeTotal(data.total || 0); + return data.documents || []; + } catch (error) { + console.error('Failed to fetch knowledge documents:', error); + return []; + } + }, []); + + useEffect(() => { + if (!loading && isAdmin) { + setKnowledgeLoading(true); + fetchKnowledgeDocs().finally(() => setKnowledgeLoading(false)); + } + }, [loading, isAdmin, fetchKnowledgeDocs]); + + useEffect(() => { + const hasPending = knowledgeDocs.some((d) => d.status === 'pending' || d.status === 'processing'); + if (hasPending && !pollIntervalRef.current) { + pollIntervalRef.current = setInterval(() => fetchKnowledgeDocs(), 5000); + } else if (!hasPending && pollIntervalRef.current) { + clearInterval(pollIntervalRef.current); + pollIntervalRef.current = null; + } + return () => { if (pollIntervalRef.current) { clearInterval(pollIntervalRef.current); pollIntervalRef.current = null; } }; + }, [knowledgeDocs, fetchKnowledgeDocs]); + + const handleDeleteKnowledgeDoc = async (docId: string) => { + if (!confirm('Delete this document and its vectors?')) return; + setDeletingId(docId); + try { + await apiClient.delete(`/admin/knowledge/documents/${docId}`); + await fetchKnowledgeDocs(); + } catch (error) { + console.error('Failed to delete document:', error); + } finally { + setDeletingId(null); + } + }; + + const handleScrapeUrl = async () => { + if (!scrapeUrl.trim()) return; + setScraping(true); + try { + await apiClient.post('/admin/knowledge/scrape', { + url: scrapeUrl.trim(), + description: scrapeDescription.trim() || undefined, + }); + setScrapeUrl(''); + setScrapeDescription(''); + await fetchKnowledgeDocs(); + } catch (error: unknown) { + console.error('Scrape error:', error); + const msg = error instanceof Error ? error.message : 'Failed to scrape URL'; + setMessage({ type: 'error', text: msg }); + } finally { + setScraping(false); + } + }; + + const handleToggleFeature = async (userId: string, feature: string, currentFeatures: string[]) => { + const newFeatures = currentFeatures.includes(feature) + ? currentFeatures.filter((f) => f !== feature) + : [...currentFeatures, feature]; + try { + await apiClient.patch(`/admin/users/${userId}`, { + allowed_features: newFeatures, + features_override: true, + }); + await fetchUsers(); + } catch (error) { + console.error('Failed to update user features:', error); + } + }; + const handleSaveLLMConfig = async () => { setSaving(true); setMessage(null); - try { await apiClient.post('/admin/config', llmConfig); - setMessage({ type: 'success', text: 'LLM configuration saved successfully!' }); + setMessage({ type: 'success', text: 'LLM configuration saved!' }); setTimeout(() => setMessage(null), 3000); } catch (error: unknown) { - const errorMessage = - error instanceof Error - ? error.message - : 'Failed to save configuration'; - setMessage({ - type: 'error', - text: errorMessage, - }); + const msg = error instanceof Error ? error.message : 'Failed to save configuration'; + setMessage({ type: 'error', text: msg }); } finally { setSaving(false); } }; - // Show loading or access denied if (loading) { return (
- +
); } - if (user?.role !== 'super_admin') { + if (!isAdmin) { return (
-
- +
+
-

Access Denied

-

- You need Super Admin privileges to access this page. -

- +

Access Denied

+

You need admin privileges to access this page.

+
); @@ -111,234 +183,344 @@ export default function AdminPage() { const getRoleBadge = (role: UserRole) => { const variants: Record = { - super_admin: { label: 'Super Admin', className: 'bg-purple-100 text-purple-700' }, - content_manager: { label: 'Content Manager', className: 'bg-blue-100 text-blue-700' }, - user: { label: 'User', className: 'bg-gray-100 text-gray-700' }, + super_admin: { label: 'Super Admin', className: 'bg-primary/10 text-primary' }, + content_manager: { label: 'Content Manager', className: 'bg-accent/10 text-accent' }, + user: { label: 'User', className: 'bg-muted text-muted-foreground' }, }; - const variant = variants[role]; + const v = variants[role]; + return {v.label}; + }; + + const getStatusBadge = (status: KnowledgeDocument['status']) => { + const config = { + pending: { label: 'Pending', className: 'bg-yellow-100 text-yellow-700 dark:bg-yellow-900/30 dark:text-yellow-400' }, + processing: { label: 'Processing', className: 'bg-blue-100 text-blue-700 dark:bg-blue-900/30 dark:text-blue-400' }, + completed: { label: 'Completed', className: 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400' }, + failed: { label: 'Failed', className: 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400' }, + }; + const c = config[status]; return ( - - {variant.label} + + {status === 'processing' && } + {c.label} ); }; + const formatFileSize = (bytes: number) => { + if (bytes < 1024) return `${bytes} B`; + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; + return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; + }; + + const defaultTab = isSuperAdmin ? 'users' : 'knowledge'; + return (
- {/* Header */} -
+
-
- +
+
-

Admin Dashboard

-

Manage users, configuration, and analytics

+

Admin Dashboard

+

+ {isSuperAdmin ? 'Manage users, configuration, and knowledge base' : 'Manage knowledge base documents'} +

- {/* Content */}
- + - - - Users - - - - LLM Config - - - - Analytics - + {isSuperAdmin && ( + <> + Users + LLM Config + + )} + Knowledge Base + {isSuperAdmin && ( + Integrations + )} + {isSuperAdmin && ( + Analytics + )} {/* Users Tab */} - - -
-

User Management

- {users.length} Total Users -
+ {isSuperAdmin && ( + + +
+

User Management

+ {users.length} Users +
-
- - - - - - - - - - - {users.length === 0 ? ( - - +
+
NameEmailRoleLast Login
- No users found -
+ + + + + + + - ) : ( - users.map((u) => ( - - - - - - - )) - )} - -
NameEmailRoleFeaturesLast Login
- {u.display_name} - {u.email}{getRoleBadge(u.role)} - {u.last_login_at - ? new Date(u.last_login_at).toLocaleDateString() - : 'Never'} -
-
-
-
+ + + {users.length === 0 ? ( + No users found + ) : ( + users.map((u) => ( + + {u.display_name} + {u.email} + {getRoleBadge(u.role)} + +
+ + +
+ + + {u.last_login_at ? new Date(u.last_login_at).toLocaleDateString() : 'Never'} + + + )) + )} + + +
+ + + )} {/* LLM Config Tab */} - - -
-

- LLM Provider Configuration -

-

- Update API keys for OpenAI, Azure, and Anthropic -

-
- -
-
- - - setLlmConfig({ ...llmConfig, openai_api_key: e.target.value }) - } - className="mt-2" - /> -

- Used for RAG embeddings and chat completions -

+ {isSuperAdmin && ( + + +
+

LLM Provider Configuration

+

Update API keys for LLM providers

-
- - - setLlmConfig({ ...llmConfig, azure_api_key: e.target.value }) - } - className="mt-2" - /> -

- Enterprise Azure OpenAI endpoint -

-
- -
- - - setLlmConfig({ ...llmConfig, anthropic_api_key: e.target.value }) - } - className="mt-2" - /> -

- Used for Claude models (optional) -

-
- - {message && ( -
- {message.text} +
+
+ + setLlmConfig({ ...llmConfig, openai_api_key: e.target.value })} + className="mt-1.5" + /> +
+
+ + setLlmConfig({ ...llmConfig, azure_api_key: e.target.value })} + className="mt-1.5" + /> +
+
+ + setLlmConfig({ ...llmConfig, anthropic_api_key: e.target.value })} + className="mt-1.5" + />
- )} -
- +
+
+ + + )} + + {/* Knowledge Base Tab */} + +
+ {/* Upload Section */} + +

Upload Document

+ fetchKnowledgeDocs()} + onUploadError={(err) => console.error('Upload error:', err)} + /> +
+ + {/* URL Scraping Section */} + +

+ + Add URL to Knowledge Base +

+
+
+ + setScrapeUrl(e.target.value)} + className="mt-1.5" + /> +
+
+ + setScrapeDescription(e.target.value)} + className="mt-1.5" + /> +
+
-
- + + + {/* Documents Table */} + +
+

Documents

+ {knowledgeTotal} Total +
+ + {knowledgeLoading ? ( +
+ +
+ ) : ( +
+ + + + + + + + + + + + + + {knowledgeDocs.length === 0 ? ( + + ) : ( + knowledgeDocs.map((doc) => ( + + + + + + + + + + )) + )} + +
File NameTypeSizeStatusChunksDateActions
No documents uploaded yet.
+ {doc.file_name} + {doc.file_type}{formatFileSize(doc.file_size)} +
+ {getStatusBadge(doc.status)} + {doc.status === 'failed' && doc.error_message && ( +

+ {doc.error_message} +

+ )} +
+
{doc.vector_count}{new Date(doc.created_at).toLocaleDateString()} + +
+
+ )} +
+
+ {/* Integrations Tab */} + {isSuperAdmin && ( + + + + )} + {/* Analytics Tab */} - - -

Usage Analytics

- -
-
-
Total Conversations
-
0
-
All modes
+ {isSuperAdmin && ( + + +

Usage Analytics

+
+
+
Total Conversations
+
0
+
+
+
Total Messages
+
0
+
+
+
Active Users
+
{users.length}
+
- -
-
Total Messages
-
0
-
User + Assistant
+
+ Coming Soon: Detailed analytics with charts, usage trends, and performance metrics.
- -
-
Active Users
-
{users.length}
-
Last 30 days
-
-
- -
- Coming Soon: Detailed analytics with charts, usage trends, and - performance metrics. -
-
-
+ + + )}
diff --git a/frontend/app/globals.css b/frontend/app/globals.css index 204ffcc..1a92e13 100644 --- a/frontend/app/globals.css +++ b/frontend/app/globals.css @@ -8,71 +8,128 @@ } } - - @layer base { :root { - --background: 0 0% 100%; - --foreground: 0 0% 3.9%; + --background: 0 0% 98%; + --foreground: 222 47% 11%; --card: 0 0% 100%; - --card-foreground: 0 0% 3.9%; + --card-foreground: 222 47% 11%; --popover: 0 0% 100%; - --popover-foreground: 0 0% 3.9%; - --primary: 0 0% 9%; - --primary-foreground: 0 0% 98%; - --secondary: 0 0% 96.1%; - --secondary-foreground: 0 0% 9%; - --muted: 0 0% 96.1%; - --muted-foreground: 0 0% 45.1%; - --accent: 0 0% 96.1%; - --accent-foreground: 0 0% 9%; - --destructive: 0 84.2% 60.2%; - --destructive-foreground: 0 0% 98%; - --border: 0 0% 89.8%; - --input: 0 0% 89.8%; - --ring: 0 0% 3.9%; - --chart-1: 12 76% 61%; - --chart-2: 173 58% 39%; - --chart-3: 197 37% 24%; - --chart-4: 43 74% 66%; - --chart-5: 27 87% 67%; - --radius: 0.5rem; + --popover-foreground: 222 47% 11%; + --primary: 221 83% 53%; + --primary-foreground: 210 40% 98%; + --secondary: 210 40% 96%; + --secondary-foreground: 222 47% 11%; + --muted: 210 40% 96%; + --muted-foreground: 215 16% 47%; + --accent: 262 83% 58%; + --accent-foreground: 210 40% 98%; + --destructive: 0 84% 60%; + --destructive-foreground: 210 40% 98%; + --border: 214 32% 91%; + --input: 214 32% 91%; + --ring: 221 83% 53%; + --chart-1: 221 83% 53%; + --chart-2: 262 83% 58%; + --chart-3: 142 71% 45%; + --chart-4: 43 96% 56%; + --chart-5: 0 84% 60%; + --radius: 0.75rem; + + /* Custom brand tokens */ + --brand-rag: 221 83% 53%; + --brand-assistant: 262 83% 58%; + --brand-gradient-from: 221 83% 53%; + --brand-gradient-to: 262 83% 58%; + --sidebar-width: 280px; } + .dark { - --background: 0 0% 3.9%; - --foreground: 0 0% 98%; - --card: 0 0% 3.9%; - --card-foreground: 0 0% 98%; - --popover: 0 0% 3.9%; - --popover-foreground: 0 0% 98%; - --primary: 0 0% 98%; - --primary-foreground: 0 0% 9%; - --secondary: 0 0% 14.9%; - --secondary-foreground: 0 0% 98%; - --muted: 0 0% 14.9%; - --muted-foreground: 0 0% 63.9%; - --accent: 0 0% 14.9%; - --accent-foreground: 0 0% 98%; - --destructive: 0 62.8% 30.6%; - --destructive-foreground: 0 0% 98%; - --border: 0 0% 14.9%; - --input: 0 0% 14.9%; - --ring: 0 0% 83.1%; - --chart-1: 220 70% 50%; - --chart-2: 160 60% 45%; - --chart-3: 30 80% 55%; - --chart-4: 280 65% 60%; - --chart-5: 340 75% 55%; + --background: 222 47% 5%; + --foreground: 210 40% 98%; + --card: 222 47% 8%; + --card-foreground: 210 40% 98%; + --popover: 222 47% 8%; + --popover-foreground: 210 40% 98%; + --primary: 217 91% 60%; + --primary-foreground: 222 47% 5%; + --secondary: 217 33% 17%; + --secondary-foreground: 210 40% 98%; + --muted: 217 33% 17%; + --muted-foreground: 215 20% 65%; + --accent: 263 70% 50%; + --accent-foreground: 210 40% 98%; + --destructive: 0 63% 31%; + --destructive-foreground: 210 40% 98%; + --border: 217 33% 17%; + --input: 217 33% 17%; + --ring: 217 91% 60%; + --chart-1: 217 91% 60%; + --chart-2: 263 70% 50%; + --chart-3: 142 71% 45%; + --chart-4: 43 96% 56%; + --chart-5: 0 63% 31%; + + --brand-rag: 217 91% 60%; + --brand-assistant: 263 70% 50%; + --brand-gradient-from: 217 91% 60%; + --brand-gradient-to: 263 70% 50%; } } - - @layer base { * { @apply border-border; } body { - @apply bg-background text-foreground; + @apply bg-background text-foreground antialiased; + font-feature-settings: "rlig" 1, "calt" 1; } } + +/* Scrollbar styling */ +@layer utilities { + .scrollbar-thin::-webkit-scrollbar { + width: 6px; + } + .scrollbar-thin::-webkit-scrollbar-track { + background: transparent; + } + .scrollbar-thin::-webkit-scrollbar-thumb { + background-color: hsl(var(--border)); + border-radius: 3px; + } + .scrollbar-thin::-webkit-scrollbar-thumb:hover { + background-color: hsl(var(--muted-foreground)); + } +} + +/* Typing indicator animation */ +@keyframes typing-dot { + 0%, 60%, 100% { opacity: 0.3; transform: translateY(0); } + 30% { opacity: 1; transform: translateY(-4px); } +} + +.typing-dot { + animation: typing-dot 1.4s infinite ease-in-out; +} +.typing-dot:nth-child(2) { animation-delay: 0.2s; } +.typing-dot:nth-child(3) { animation-delay: 0.4s; } + +/* Gradient text */ +.text-gradient { + background: linear-gradient(135deg, hsl(var(--brand-gradient-from)), hsl(var(--brand-gradient-to))); + -webkit-background-clip: text; + -webkit-text-fill-color: transparent; + background-clip: text; +} + +/* Smooth page transitions */ +.page-transition { + animation: fadeIn 0.2s ease-in-out; +} + +@keyframes fadeIn { + from { opacity: 0; transform: translateY(4px); } + to { opacity: 1; transform: translateY(0); } +} diff --git a/frontend/app/login/page.tsx b/frontend/app/login/page.tsx index cef4f72..2cc9e33 100644 --- a/frontend/app/login/page.tsx +++ b/frontend/app/login/page.tsx @@ -1,9 +1,5 @@ 'use client'; -// ============================================ -// Login Page with Dev Mode Support -// ============================================ - import { useEffect, useState } from 'react'; import { useRouter } from 'next/navigation'; import { LoginButton } from '@/components/auth/login-button'; @@ -11,6 +7,7 @@ import { useAuthStore } from '@/store/useAuthStore'; import { loginDev } from '@/lib/auth'; import { Button } from '@/components/ui/button'; import { Separator } from '@/components/ui/separator'; +import { BookOpenCheck, Sparkles, Shield } from 'lucide-react'; export default function LoginPage() { const router = useRouter(); @@ -18,11 +15,8 @@ export default function LoginPage() { const [isLoading, setIsLoading] = useState(false); const [error, setError] = useState(null); - // Redirect if already authenticated useEffect(() => { - if (isAuthenticated) { - router.push('/chat'); - } + if (isAuthenticated) router.push('/chat'); }, [isAuthenticated, router]); const handleDevLogin = async (role: string) => { @@ -30,13 +24,11 @@ export default function LoginPage() { setError(null); try { - // Map role to email for consistency const emailMap: Record = { super_admin: 'admin@nexus.dev', content_manager: 'manager@nexus.dev', user: 'user@nexus.dev', }; - const displayNameMap: Record = { super_admin: 'Admin User', content_manager: 'Content Manager', @@ -45,264 +37,167 @@ export default function LoginPage() { const email = emailMap[role]; const displayName = displayNameMap[role]; - - // Call dev login endpoint const user = await loginDev(email, role, displayName); - // Get tokens from localStorage (set by loginDev) const accessToken = localStorage.getItem('access_token'); const refreshToken = localStorage.getItem('refresh_token'); + if (!accessToken || !refreshToken) throw new Error('Failed to retrieve tokens'); - if (!accessToken || !refreshToken) { - throw new Error('Failed to retrieve tokens'); - } - - // Update Zustand store login(user, accessToken, refreshToken); - - // Redirect to chat router.push('/chat'); } catch (err: unknown) { console.error('Dev login error:', err); let errorMessage = 'Failed to login'; - if (err instanceof Error) { - errorMessage = err.message; - } else if (typeof err === 'object' && err !== null && 'response' in err) { - const errObj = err as Record; - if (errObj.response && typeof errObj.response === 'object') { - const response = errObj.response as Record; - if (response.data && typeof response.data === 'object') { - const data = response.data as Record; - if (typeof data.detail === 'string') { - errorMessage = data.detail; - } - } - } - } + if (err instanceof Error) errorMessage = err.message; setError(errorMessage); setIsLoading(false); } }; return ( -
-
- {/* Card */} -
- {/* Logo/Icon */} -
-
- - +
+ {/* Left Panel — Branding */} +
+
+
+
+ +
-

- Welcome to Nexus -

-

Enterprise AI Hub

-
- - {/* Description */} -
-

- Access powerful AI capabilities: -

-
    -
  • - - - - - RAG Chat - Query your corporate knowledge - base - -
  • -
  • - - - - - Notebooks - Analyze documents with AI - -
  • -
  • - - - - - Assistant - Productivity tools & automation - -
  • -
-
- - {/* Production Login Section */} -
-
- - Production Login - - - Not Configured - -
- { - e.preventDefault(); - alert('Azure AD is not configured yet. Use Dev Login below.'); - }} - /> -

- Microsoft Entra ID integration coming soon -

-
- - - - {/* Dev Login Section */} -
-
- - Development Login - - - Dev Only - -
- - {/* Error Display */} - {error && ( -
- {error} -
- )} - - {/* Dev Login Buttons */} -
- - - - - -
- -

- Choose a role to quickly access the app in development mode -

+ Nexus AI
- {/* Additional Info */} -

- By signing in, you agree to our Terms of Service and Privacy Policy -

+
+
+

+ Your enterprise
AI companion +

+

+ Two powerful assistants working together to boost your productivity. +

+
+ +
+
+
+ +
+
+

Oliver Process Helper

+

+ Query your corporate knowledge base with AI-powered search and citations. +

+
+
+ +
+
+ +
+
+

Personal Assistant

+

+ Your personal AI for writing, brainstorming, analysis, and general tasks. +

+
+
+
+
+ +

Enterprise AI Hub by Oliver Solutions

+
+ + {/* Right Panel — Login Form */} +
+
+ {/* Mobile Logo */} +
+
+ + + +
+

Nexus AI

+

Enterprise AI Hub

+
+ +
+
+

Welcome back

+

Sign in to continue to Nexus

+
+ + {/* Microsoft SSO */} +
+ { + e.preventDefault(); + alert('Azure AD SSO not configured yet. Use Development Login below.'); + }} + /> +
+ + + + {/* Dev Login */} +
+

+ Development Login +

+ + {error && ( +
+ {error} +
+ )} + +
+ + + + + +
+
+
+ +

+ By signing in, you agree to our Terms of Service and Privacy Policy +

+
); diff --git a/frontend/app/notebooks/page.tsx b/frontend/app/notebooks/page.tsx deleted file mode 100644 index 9e255de..0000000 --- a/frontend/app/notebooks/page.tsx +++ /dev/null @@ -1,313 +0,0 @@ -'use client'; - -// ============================================ -// Notebook Mode Page with Two-Column Layout -// ============================================ - -import { useEffect } from 'react'; -import { useNotebookStore } from '@/store/useNotebookStore'; -import { FileUploader } from '@/components/notebook/file-uploader'; -import { FileList } from '@/components/notebook/file-list'; -import { NotebookChat } from '@/components/notebook/notebook-chat'; -import { SessionSelector } from '@/components/notebook/session-selector'; -import { - Card, - CardDescription, - CardHeader, - CardTitle, -} from '@/components/ui/card'; -import { Button } from '@/components/ui/button'; -import { Badge } from '@/components/ui/badge'; -import { Pin, PinOff, Trash2, Clock } from 'lucide-react'; -import apiClient from '@/lib/api-client'; -import type { FileUploadResponse } from '@/types'; - -export default function NotebooksPage() { - const { - sessionId, - title, - isPinned, - expiresAt, - files, - setSession, - updateSessionPin, - addFile, - updateFile, - setFiles, - clearSession, - } = useNotebookStore(); - - // Load session data when sessionId changes - useEffect(() => { - const loadSession = async () => { - if (sessionId) { - try { - const { data } = await apiClient.get(`/notebook/${sessionId}`); - - setSession( - data.session_id, - data.conversation_id, - data.title, - data.is_pinned, - data.expires_at - ); - - // Load files - if (data.files && data.files.length > 0) { - setFiles(data.files); - } else { - setFiles([]); - } - } catch (error) { - console.error('Failed to load notebook session:', error); - } - } - }; - - loadSession(); - }, [sessionId, setSession, setFiles]); - - const handleSelectSession = async (selectedSessionId: string) => { - try { - const { data } = await apiClient.get(`/notebook/${selectedSessionId}`); - - setSession( - data.session_id, - data.conversation_id, - data.title, - data.is_pinned, - data.expires_at - ); - - if (data.files && data.files.length > 0) { - setFiles(data.files); - } else { - setFiles([]); - } - } catch (error) { - console.error('Failed to load session:', error); - } - }; - - const handleNewSession = async () => { - try { - const { data } = await apiClient.post('/notebook/create'); - - setSession( - data.session_id, - data.conversation_id, - data.title, - data.is_pinned, - data.expires_at - ); - - setFiles([]); - } catch (error) { - console.error('Failed to create notebook session:', error); - } - }; - - const handleUploadComplete = (uploadResponse: FileUploadResponse) => { - // Add file to state - addFile({ - file_id: uploadResponse.file_id, - file_name: uploadResponse.file_name, - file_size: uploadResponse.file_size, - file_type: uploadResponse.file_name.split('.').pop() || 'unknown', - processing_status: uploadResponse.processing_status, - processing_error: null, - uploaded_at: new Date().toISOString(), - processed_at: null, - }); - }; - - const handlePinToggle = async () => { - if (!sessionId) return; - - try { - const endpoint = isPinned - ? `/notebook/${sessionId}/unpin` - : `/notebook/${sessionId}/pin`; - - const { data } = await apiClient.post(endpoint); - - updateSessionPin(data.is_pinned, data.expires_at); - } catch (error) { - console.error('Failed to toggle pin:', error); - } - }; - - const handleDeleteSession = async () => { - if (!sessionId) return; - - if ( - !confirm('Are you sure you want to delete this notebook session? All files and chat history will be lost.') - ) { - return; - } - - try { - await apiClient.delete(`/notebook/${sessionId}`); - clearSession(); - } catch (error) { - console.error('Failed to delete session:', error); - } - }; - - const getTimeUntilExpiry = (): string | null => { - if (!expiresAt || isPinned) return null; - - const now = new Date(); - const expiry = new Date(expiresAt); - const diffMs = expiry.getTime() - now.getTime(); - - if (diffMs < 0) return 'Expired'; - - const hours = Math.floor(diffMs / (1000 * 60 * 60)); - const minutes = Math.floor((diffMs % (1000 * 60 * 60)) / (1000 * 60)); - - if (hours > 0) { - return `${hours}h ${minutes}m`; - } - return `${minutes}m`; - }; - - if (!sessionId) { - return ( -
-
-
- - - -
-

- No Session Selected -

-

- Select an existing notebook session or create a new one -

-
- -
-
-
- ); - } - - return ( -
- {/* Left Column: File Management (30%) */} -
- {/* Session Selector */} - - - {/* Session Header */} - - -
-
- {title} - - {isPinned ? ( - - - Pinned - - ) : ( - - - Expires in {getTimeUntilExpiry()} - - )} - -
-
- - -
-
-
-
- - {/* File Uploader */} - - - {/* File List */} - -
- - {/* Right Column: Chat Interface (70%) */} -
- {/* Header */} -
-
-
-

- Document Analysis -

-

- Ask questions about your uploaded documents -

-
- - {files.filter((f) => f.processing_status === 'completed').length} /{' '} - {files.length} Ready - -
-
- - {/* Chat */} - -
-
- ); -} diff --git a/frontend/app/profile/page.tsx b/frontend/app/profile/page.tsx index 6b37c40..b1879fc 100644 --- a/frontend/app/profile/page.tsx +++ b/frontend/app/profile/page.tsx @@ -1,14 +1,10 @@ 'use client'; -// ============================================ -// Profile Page -// ============================================ - import { useAuthStore } from '@/store/useAuthStore'; import { Card } from '@/components/ui/card'; import { Avatar, AvatarFallback } from '@/components/ui/avatar'; import { Badge } from '@/components/ui/badge'; -import { User as UserIcon, Mail, Shield, Calendar } from 'lucide-react'; +import { User as UserIcon, Mail, Shield, Calendar, BookOpenCheck, Sparkles, Wrench } from 'lucide-react'; export default function ProfilePage() { const { user } = useAuthStore(); @@ -16,80 +12,64 @@ export default function ProfilePage() { const getUserInitials = () => { if (!user?.display_name) return 'U'; const names = user.display_name.split(' '); - if (names.length >= 2) { - return `${names[0][0]}${names[1][0]}`.toUpperCase(); - } + if (names.length >= 2) return `${names[0][0]}${names[1][0]}`.toUpperCase(); return user.display_name[0].toUpperCase(); }; const getRoleBadge = () => { const roleMap: Record = { - super_admin: { label: 'Super Admin', className: 'bg-purple-100 text-purple-700' }, - content_manager: { label: 'Content Manager', className: 'bg-blue-100 text-blue-700' }, - user: { label: 'User', className: 'bg-gray-100 text-gray-700' }, + super_admin: { label: 'Super Admin', className: 'bg-primary/10 text-primary border-primary/20' }, + content_manager: { label: 'Content Manager', className: 'bg-accent/10 text-accent border-accent/20' }, + user: { label: 'User', className: 'bg-muted text-muted-foreground' }, }; const role = roleMap[user?.role || 'user']; - return ( - - {role.label} - - ); + return {role.label}; }; + const features = user?.allowed_features || []; + const hasAllAccess = !features.length; + return (
- {/* Header */} -
+
-
- +
+
-

My Profile

-

View your account information

+

My Profile

+

View your account information

- {/* Content */}
- {/* Profile Card */}
- - + + {getUserInitials()} -
-

- {user?.display_name || 'User'} -

+

{user?.display_name || 'User'}

{getRoleBadge()}
- -
-
+
+
{user?.email || 'No email'}
- -
+
- - Role: {user?.role.replace('_', ' ').toUpperCase() || 'USER'} - + Role: {(user?.role || 'user').replace('_', ' ').toUpperCase()}
- {user?.last_login_at && ( -
+
- - Last login: {new Date(user.last_login_at).toLocaleString()} - + Last login: {new Date(user.last_login_at).toLocaleString()}
)}
@@ -97,76 +77,47 @@ export default function ProfilePage() {
- {/* Additional Info */} -

Account Details

- -
-
- User ID - {user?.id.slice(0, 8)}... +

Feature Access

+
+
+
+ + Oliver Process Helper (RAG)
- -
- Account Created - - {user?.created_at - ? new Date(user.created_at).toLocaleDateString() - : 'Unknown'} - +
+
+ + Personal Assistant
- - {user?.department_id && ( -
- Department - - {user.department_id.slice(0, 8)}... - -
- )} - - {user?.country_id && ( -
- Country - {user.country_id.slice(0, 8)}... +
+
+ + Productivity Tools +
+ {(user?.role === 'super_admin' || user?.role === 'content_manager') && ( +
+
+ + Admin Dashboard
)}
- {/* Permissions */} -

Permissions

- -
-
-
- Access RAG Chat +

Account Details

+
+
+ User ID + {user?.id?.slice(0, 8)}...
-
-
- Access Notebooks +
+ Account Created + + {user?.created_at ? new Date(user.created_at).toLocaleDateString() : 'Unknown'} +
-
-
- Access Assistant Tools -
- {user?.role === 'super_admin' && ( -
-
- - Admin Dashboard Access - -
- )} - {user?.role === 'content_manager' && ( -
-
- - Content Management Access - -
- )}
diff --git a/frontend/components/notebook/file-uploader.tsx b/frontend/components/admin/knowledge-uploader.tsx similarity index 54% rename from frontend/components/notebook/file-uploader.tsx rename to frontend/components/admin/knowledge-uploader.tsx index dd66c1b..79f8bf1 100644 --- a/frontend/components/notebook/file-uploader.tsx +++ b/frontend/components/admin/knowledge-uploader.tsx @@ -1,27 +1,21 @@ 'use client'; -// ============================================ -// File Uploader Component with Dropzone -// ============================================ - import { useCallback, useState } from 'react'; import { useDropzone } from 'react-dropzone'; import { Upload, FileText, X } from 'lucide-react'; import { cn } from '@/lib/utils'; import { Progress } from '@/components/ui/progress'; -import type { FileUploadResponse } from '@/types'; +import type { KnowledgeDocumentUploadResponse } from '@/types'; -interface FileUploaderProps { - sessionId: string; - onUploadComplete: (file: FileUploadResponse) => void; +interface KnowledgeUploaderProps { + onUploadComplete: (doc: KnowledgeDocumentUploadResponse) => void; onUploadError?: (error: string) => void; } -export function FileUploader({ - sessionId, +export function KnowledgeUploader({ onUploadComplete, onUploadError, -}: FileUploaderProps) { +}: KnowledgeUploaderProps) { const [uploading, setUploading] = useState(false); const [uploadProgress, setUploadProgress] = useState(0); const [currentFileName, setCurrentFileName] = useState(null); @@ -31,38 +25,32 @@ export function FileUploader({ async (acceptedFiles: File[]) => { if (acceptedFiles.length === 0) return; - const file = acceptedFiles[0]; // Only handle one file at a time + const file = acceptedFiles[0]; setUploading(true); setUploadProgress(0); setCurrentFileName(file.name); setError(null); - // Validate file size (100MB) const maxSize = 100 * 1024 * 1024; if (file.size > maxSize) { setError('File size exceeds 100MB limit'); setUploading(false); setCurrentFileName(null); - if (onUploadError) { - onUploadError('File size exceeds 100MB limit'); - } + onUploadError?.('File size exceeds 100MB limit'); return; } try { const formData = new FormData(); formData.append('file', file); - const token = localStorage.getItem('access_token'); + setUploadProgress(30); - // Upload file with progress tracking const response = await fetch( - `${process.env.NEXT_PUBLIC_API_URL}/notebook/${sessionId}/upload`, + `${process.env.NEXT_PUBLIC_API_URL}/admin/knowledge/upload`, { method: 'POST', - headers: { - Authorization: `Bearer ${token}`, - }, + headers: { Authorization: `Bearer ${token}` }, body: formData, } ); @@ -72,33 +60,25 @@ export function FileUploader({ throw new Error(typeof errorData.detail === 'string' ? errorData.detail : 'Upload failed'); } - const data: FileUploadResponse = await response.json(); - - // Simulate upload progress (since we can't track real progress with fetch) + const data: KnowledgeDocumentUploadResponse = await response.json(); setUploadProgress(100); - - // Call success callback onUploadComplete(data); - // Reset state setTimeout(() => { setUploading(false); setCurrentFileName(null); setUploadProgress(0); }, 500); } catch (err: unknown) { - console.error('Upload error:', err); const errorMessage = err instanceof Error ? err.message : 'Failed to upload file'; setError(errorMessage); - if (onUploadError) { - onUploadError(errorMessage); - } + onUploadError?.(errorMessage); setUploading(false); setCurrentFileName(null); setUploadProgress(0); } }, - [sessionId, onUploadComplete, onUploadError] + [onUploadComplete, onUploadError] ); const { getRootProps, getInputProps, isDragActive } = useDropzone({ @@ -107,91 +87,69 @@ export function FileUploader({ disabled: uploading, accept: { 'application/pdf': ['.pdf'], - 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': - ['.docx'], + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx'], 'application/msword': ['.doc'], - 'application/vnd.openxmlformats-officedocument.presentationml.presentation': - ['.pptx'], - 'application/vnd.ms-powerpoint': ['.ppt'], - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': - ['.xlsx'], + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'], 'application/vnd.ms-excel': ['.xls'], + 'application/vnd.openxmlformats-officedocument.presentationml.presentation': ['.pptx'], + 'application/vnd.ms-powerpoint': ['.ppt'], 'text/plain': ['.txt'], 'text/csv': ['.csv'], - 'text/markdown': ['.md'], - 'image/*': ['.jpg', '.jpeg', '.png', '.gif'], }, }); return (
- {/* Dropzone */}
-
- {/* Icon */} -
+
- - {/* Text */}
-

+

{isDragActive ? 'Drop file here' : 'Upload a document'}

-

- Drag & drop or click to browse -

+

Drag & drop or click to browse

- - {/* Supported formats */} -

- PDF, DOCX, XLSX, TXT, CSV, Images (Max 100MB) +

+ PDF, DOCX, DOC, XLSX, XLS, PPTX, PPT, CSV, TXT (Max 100MB)

- {/* Upload Progress */} {uploading && currentFileName && ( -
+
- -
-

- {currentFileName} -

-

Uploading...

+ +
+

{currentFileName}

+

Uploading...

)} - {/* Error Display */} {error && !uploading && ( -
+
- +
-

Upload Failed

-

{error}

+

Upload Failed

+

{error}

diff --git a/frontend/components/chat/chat-input.tsx b/frontend/components/chat/chat-input.tsx index 3daee74..07513ba 100644 --- a/frontend/components/chat/chat-input.tsx +++ b/frontend/components/chat/chat-input.tsx @@ -1,9 +1,5 @@ 'use client'; -// ============================================ -// Chat Input Component with Auto-Resize -// ============================================ - import { useState, useRef, useEffect, KeyboardEvent } from 'react'; import { Button } from '@/components/ui/button'; import { Send, StopCircle } from 'lucide-react'; @@ -15,6 +11,7 @@ interface ChatInputProps { disabled?: boolean; isStreaming?: boolean; placeholder?: string; + mode?: 'rag' | 'assistant'; } export function ChatInput({ @@ -22,12 +19,12 @@ export function ChatInput({ onStop, disabled = false, isStreaming = false, - placeholder = 'Ask about your knowledge base...', + placeholder = 'Type a message...', + mode = 'rag', }: ChatInputProps) { const [message, setMessage] = useState(''); const textareaRef = useRef(null); - // Auto-resize textarea useEffect(() => { const textarea = textareaRef.current; if (textarea) { @@ -41,16 +38,11 @@ export function ChatInput({ if (trimmedMessage && !disabled && !isStreaming) { onSend(trimmedMessage); setMessage(''); - - // Reset textarea height - if (textareaRef.current) { - textareaRef.current.style.height = 'auto'; - } + if (textareaRef.current) textareaRef.current.style.height = 'auto'; } }; const handleKeyDown = (e: KeyboardEvent) => { - // Send on Enter (without Shift) if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); handleSend(); @@ -58,16 +50,17 @@ export function ChatInput({ }; const handleStop = () => { - if (onStop && isStreaming) { - onStop(); - } + if (onStop && isStreaming) onStop(); }; + const sendBtnColor = mode === 'rag' + ? 'bg-primary hover:bg-primary/90' + : 'bg-accent hover:bg-accent/90'; + return ( -
-
+
+
- {/* Textarea */}