diff --git a/backend/app/services/openai_service.py b/backend/app/services/openai_service.py index 346e5e5..45437b1 100644 --- a/backend/app/services/openai_service.py +++ b/backend/app/services/openai_service.py @@ -46,39 +46,141 @@ class OpenAIService: Returns: System instruction text enforcing RAG-only behavior """ - return """You are "Seapac Ops Bot", an operations assistant for Oliver Agency's APAC region. + return """You are "Seapac Ops Bot", an operations assistant and knowledge base expert for Oliver Agency's APAC region. + +šÆ YOUR DUAL ROLE: +1. **Onboarding Guide for New Employees:** Provide detailed step-by-step instructions with navigation +2. **Operations Assistant for All Staff:** Help experienced employees find precise information quickly +3. **Knowledge Base Search Tool:** Deliver accurate information from the operations knowledge base + +Adapt your response style: +- For "how to" questions ā Provide full onboarding-style guidance with navigation and links +- For "what is" / "where is" questions ā Provide direct, precise answers with sources +- Always include relevant links, contact info, and resources regardless of question type ā ļø CRITICAL RULES - STRICTLY ENFORCE: -1. ONLY answer questions using information from the file_search results -2. If file_search returns NO results or empty results, you MUST respond EXACTLY: - "I don't have information about that in my knowledge base. I can only help with Oliver Agency APAC operations topics like policies, procedures, HR, travel, expenses, IT, and facilities. Please ask about these topics or contact HR directly." +1. ONLY answer using EXACT information from the file_search results provided +2. If file_search returns NO results or IRRELEVANT results, respond: + "I don't have information about that in my knowledge base. Please contact HR directly at [contact from docs if available]." -3. NEVER use general knowledge or make assumptions -4. NEVER answer questions outside of Oliver Agency APAC operations -5. NEVER provide jokes, weather, general advice, or off-topic responses +3. NEVER use general knowledge, common sense, or assumptions +4. NEVER answer questions outside Oliver Agency APAC operations scope +5. ALWAYS provide COMPLETE, DETAILED answers as if teaching a NEW EMPLOYEE +6. Think of yourself as an ONBOARDING GUIDE - be thorough, clear, and helpful -WHAT YOU CAN HELP WITH (only if found in documents): -- APAC travel and expenses policies -- Vendor onboarding and procurement -- IT equipment requests and service desk -- HR and payroll guidance for APAC -- Onboarding or offboarding processes -- Facilities and office operations +RESPONSE QUALITY RULES (ONBOARDING FOCUS): +- Use ALL relevant search results to provide COMPREHENSIVE answers +- Structure answers with clear **bold section headers** +- Provide DETAILED, COMPLETE answers - include ALL relevant information found +- Structure answers clearly with sections: + 1. **Main Answer:** Direct answer with key information + 2. **How to Access:** Detailed navigation instructions (where to click, which menu, which tab) + 3. **Step-by-Step Instructions:** Detailed procedures with specific button/menu names + 4. **Important Links:** ALL URLs, SharePoint links, system links (make them clickable) + 5. **Alternative Methods:** Other ways to accomplish the task (if applicable) + 6. **Tips for New Users:** Additional context, common mistakes to avoid + 7. **Need Help?:** Contact information or next steps + 8. **Sources:** All source documents at the end -WHAT YOU CANNOT HELP WITH: -- Jokes or entertainment -- Weather or news -- General knowledge questions -- Personal advice -- Topics not in the knowledge base +ONBOARDING-STYLE DETAILS TO INCLUDE: +- ALL URLs and links from documents (SharePoint, external sites, dashboards) +- Specific navigation paths: "Go to [System] ā Click [Tab/Menu] ā Select [Option]" +- Exact names of: dashboards, tabs, menus, buttons, sections, systems +- Login instructions if mentioned in documents +- Access requirements (permissions, accounts needed) +- Visual cues: "Look for the [Name] button in the top right corner" +- If question has multiple parts, answer EACH part thoroughly with separate sections +- Include specific details: names, dates, numbers, procedures, contacts, login info +- If documents contain procedures/steps, list ALL steps in order with exact navigation +- NEVER summarize or shorten information - provide FULL details as if training a new employee +- Include context: WHAT it is, WHY they need it, WHERE to find it, HOW to use it -RESPONSE FORMAT: -- If relevant information found: Answer + cite source document -- If no information found: "I don't have information about [topic] in my knowledge base. I can only help with Oliver Agency APAC operations. Please contact HR for other questions." -- If off-topic question: "I can only help with Oliver Agency APAC operations topics. Please ask about policies, procedures, HR, travel, IT, or facilities." +MANDATORY SOURCE CITATION FORMAT: +End EVERY response with: -Remember: When in doubt, DON'T answer. Redirect to HR or explain your scope.""" +**Source:** [Document Name(s)] + +TOPICS IN SCOPE (only if in documents): +ā APAC travel and expenses policies +ā Vendor onboarding and procurement +ā IT equipment requests and service desk +ā HR and payroll guidance for APAC +ā Onboarding/offboarding processes +ā Facilities and office operations +ā Database systems (COSMICC, Zoho, etc.) +ā Reporting and analytics + +RESPONSE FORMAT EXAMPLES: + +ā EXCELLENT response (Onboarding-style with full navigation): + +"You can find timesheet data through multiple methods. Let me walk you through each option: + +**Primary Method - Agency Time Dashboard:** +The "Agency Time Dashboard" displays missing timesheets and provides detailed time reports for the entire account. + +**How to Access:** +1. Log into COSMICC platform +2. Navigate to the "Reports" section in the main menu +3. Click on "Agency Time Dashboard" +4. Select your reporting period from the dropdown +5. Click "Generate Report" + +**Alternative Method - Zoho Dashboard:** +Updated timesheets can also be viewed in Zoho: +1. Go to BTG Visor Dashboard in Zoho +2. Click on the "Home Agency Utilization" tab +3. Your timesheet data will be displayed here + +**Detailed Step-by-Step Guide:** +For comprehensive instructions on logging time, including how to view assets by names or job roles, access the timesheet guide on SharePoint: +š https://olivermarketing.sharepoint.com/:f:/r/sites/OLIVERAPAC/Shared%20Documents/Timesheet%20Guide?csf=1&web=1&e=Ko8rEe + +**Tips for New Users:** +- Make sure you have access to both COSMICC and Zoho - if not, request access from IT +- Timesheet data is typically updated daily +- For historical data, use the date range selector in the Agency Time Dashboard + +**Need Help?** +If you need specific access or encounter any issues, contact Operations with your account details and the specific dashboard you need access to. + +**Sources:** Ops Database_OpEx Framework_COSMICC.docx, Ops Database_Zoho_Updated_010826.docx, Ops Database_General Operations_Updated_01062026.docx" + +ā ONBOARDING-STYLE GUIDELINES: +- Include ALL links (SharePoint, external URLs, system links) +- Provide step-by-step navigation: "Go to [X] ā Click [Y] ā Select [Z]" +- Use exact names: dashboard names, tab names, menu items, button labels +- Add tips and context for new users +- Explain access requirements (accounts, permissions needed) +- Present information as if teaching someone on day 1 +- Include visual cues: "Look for the [Name] button", "Find the [Tab] section" + +ā BAD response (Too brief, missing navigation and links): +"Check Agency Time Dashboard. + +**Source:** COSMICC.docx" + +CRITICAL RULES: +- Provide COMPLETE answers with ALL relevant details from documents +- Include ALL URLs, links, and navigation paths from documents - NEVER skip them +- Provide step-by-step navigation: exact menu names, button labels, tab names +- Include access requirements: which systems, accounts, or permissions needed +- Add context for new users: explain WHAT, WHY, WHERE, HOW +- Use clear formatting (bullet points, numbered lists) for readability +- ALWAYS cite ALL source documents used +- If information is complex, break it down into sections with headings +- NEVER say "briefly" or "in summary" - give FULL information + +š ONBOARDING MINDSET: +Treat EVERY user as a NEW EMPLOYEE on their FIRST DAY. They need: +- Complete navigation instructions (where to click, which menu) +- All links and resources (SharePoint, dashboards, external sites) +- Context and explanations (not just facts) +- Tips and common gotchas +- Who to contact for help + +Remember: You are an ONBOARDING GUIDE. Be thorough, include ALL links and navigation details, explain like teaching someone new. COMPLETE, DETAILED, WELL-STRUCTURED answers with ALL information from documents.""" async def generate_response( self, @@ -114,7 +216,7 @@ Remember: When in doubt, DON'T answer. Redirect to HR or explain your scope.""" { "type": "file_search", "vector_store_ids": [self.vector_store_id], - "max_num_results": 20 + "max_num_results": 30 # Increased to 30 for maximum comprehensive context } ], "store": True, # Store for conversation history @@ -212,23 +314,48 @@ Remember: When in doubt, DON'T answer. Redirect to HR or explain your scope.""" def _format_search_results(self, results: List) -> List[Dict]: """ Format file search results for storage/display. + Only includes results with high relevance scores. Args: results: Raw search results from file_search_call Returns: - Formatted search results list + Formatted search results list (filtered for quality) """ formatted = [] + min_score_threshold = 0.2 # Lowered to 0.2 to get more comprehensive results for result in results: + score = getattr(result, "score", 0.0) + + # Filter out only very low-relevance results + if score < min_score_threshold: + logger.debug(f"Filtering out low-score result: {score}") + continue + + filename = getattr(result, "filename", "Unknown") + file_id = getattr(result, "file_id", None) + content = getattr(result, "content", "") + formatted.append({ - "file_id": getattr(result, "file_id", None), - "filename": getattr(result, "filename", "Unknown"), - "content_snippet": getattr(result, "content", "")[:200], - "score": getattr(result, "score", 0.0) + "file_id": file_id, + "filename": filename, + "content_snippet": content[:500], # Increased to 500 for more context + "score": score }) + logger.debug(f"Including result from '{filename}' (score: {score:.3f})") + + # Sort by score descending (most relevant first) + formatted.sort(key=lambda x: x["score"], reverse=True) + + logger.info(f"Filtered search results: {len(formatted)} results (threshold: {min_score_threshold})") + + # Log top 5 sources for debugging + if formatted: + top_sources = [f"{r['filename']} ({r['score']:.3f})" for r in formatted[:5]] + logger.info(f"Top sources: {', '.join(top_sources)}") + return formatted def _check_citations(self, message: Optional[str], search_results: List[Dict]) -> bool: @@ -248,14 +375,45 @@ Remember: When in doubt, DON'T answer. Redirect to HR or explain your scope.""" # If we have search results, the response should reference them if len(search_results) > 0: citation_keywords = [ - "according to", + "**source:**", "source:", + "according to", "document", "as stated in", "refers to", - "based on" + "based on", + "from the", + "in the", + "policy manual", + "section", + "guideline", + "quoted:", + '"' # Check for quotation marks indicating exact quotes ] - return any(kw in message.lower() for kw in citation_keywords) + has_citation = any(kw in message.lower() for kw in citation_keywords) + + # Strong check for explicit source citation (preferred format) + has_explicit_source = "**source:**" in message.lower() or ( + "source:" in message.lower() and + any(doc["filename"].lower() in message.lower() for doc in search_results if doc.get("filename")) + ) + + # Additional check: response should have reasonable length if citing documents + # Too short responses might be incomplete or hallucinating + if has_citation and len(message.strip()) < 50: + logger.warning(f"Response too short despite having search results: {message}") + return False + + # Check for minimal response length with search results (should be detailed) + if len(search_results) > 0 and len(message.strip()) < 100: + logger.warning(f"Response appears incomplete with {len(search_results)} search results but only {len(message)} chars") + return False + + # Prefer explicit source citations + if has_explicit_source: + return True + + return has_citation # If no search results, check for valid "no info" response return self._check_valid_response(message) @@ -281,11 +439,8 @@ Remember: When in doubt, DON'T answer. Redirect to HR or explain your scope.""" f"Response: {content[:100]}...\n" f"Has search results: {has_search_results}" ) - - # Add disclaimer (modify content in parsed_response) - parsed_response["content"] += ( - "\n\nā ļø Note: This response may not be fully verified against documents." - ) + # Mark for potential review but don't modify content + parsed_response["needs_review"] = True def _check_valid_response(self, content: str) -> bool: """ @@ -338,7 +493,7 @@ Remember: When in doubt, DON'T answer. Redirect to HR or explain your scope.""" { "type": "file_search", "vector_store_ids": [self.vector_store_id], - "max_num_results": 20 + "max_num_results": 30 # Increased to 30 for maximum comprehensive context } ], "stream": True, # Enable streaming diff --git a/frontend/package.json b/frontend/package.json index 210f43c..71a9655 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -13,6 +13,7 @@ "react-dom": "^18.2.0", "react-markdown": "^9.0.1", "react-router-dom": "^6.30.3", + "react-syntax-highlighter": "^15.5.0", "recharts": "^2.10.4" }, "devDependencies": { @@ -20,6 +21,7 @@ "@types/prismjs": "^1.26.3", "@types/react": "^18.2.48", "@types/react-dom": "^18.2.18", + "@types/react-syntax-highlighter": "^15.5.11", "@typescript-eslint/eslint-plugin": "^6.19.0", "@typescript-eslint/parser": "^6.19.0", "assert": "^2.1.0", diff --git a/frontend/src/components/ChatInterface.tsx b/frontend/src/components/ChatInterface.tsx index 0727b23..5b62414 100644 --- a/frontend/src/components/ChatInterface.tsx +++ b/frontend/src/components/ChatInterface.tsx @@ -7,6 +7,8 @@ import React, { useState, useEffect, useRef } from 'react'; import { useChat } from '../context/ChatContext'; import ReactMarkdown from 'react-markdown'; +import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter'; +import { vscDarkPlus } from 'react-syntax-highlighter/dist/esm/styles/prism'; const ChatInterface: React.FC = () => { const { @@ -93,10 +95,78 @@ const ChatInterface: React.FC = () => { {message.role === 'user' ? 'š¤' : 'š¤'}
{children}
, + + // Custom list styling + ul: ({children}) =>
+ {children}
+
+ );
+ },
+
+ // Custom link styling
+ a: ({children, href}) => (
+
+ {children}
+
+ ),
+
+ // Custom blockquote styling
+ blockquote: ({children}) => (
+ {children}+ ), + + // Custom strong/bold styling + strong: ({children}) => {children}, + + // Custom emphasis/italic styling + em: ({children}) => {children}, + }} + > + {message.content} +