103 lines
No EOL
4.1 KiB
Python
103 lines
No EOL
4.1 KiB
Python
# netflix_chatbot/config.py
|
|
import os
|
|
from pathlib import Path
|
|
from dotenv import load_dotenv
|
|
import sys
|
|
|
|
# Load environment variables from .env file
|
|
env_path = Path(__file__).resolve().parent / '.env'
|
|
if env_path.exists():
|
|
# Force reload to ensure environment variables are set
|
|
load_dotenv(dotenv_path=env_path, override=True)
|
|
print(f"Loaded environment variables from {env_path}")
|
|
else:
|
|
print(f"WARNING: .env file not found at {env_path}", file=sys.stderr)
|
|
|
|
# --- Directory Paths ---
|
|
BASE_DIR = Path(__file__).resolve().parent
|
|
|
|
UPLOAD_DIR = BASE_DIR / 'uploads'
|
|
CHUNK_FOLDER = UPLOAD_DIR / 'chunks'
|
|
UPLOAD_METADATA_FOLDER = UPLOAD_DIR / 'metadata'
|
|
IMAGES_DIRECTORY = UPLOAD_DIR / 'images'
|
|
|
|
SUPPORTING_FILES_DIR = BASE_DIR / 'supporting_files'
|
|
NETFLIX_DOCS_FOLDER = SUPPORTING_FILES_DIR / 'files_for_rag_store'
|
|
|
|
INDEX_STORAGE_DIR = BASE_DIR / 'index_storage'
|
|
INDEX_PERSIST_PATH = INDEX_STORAGE_DIR / "netflix_docs_index"
|
|
|
|
LOG_FILE_PATH = BASE_DIR / 'app.log'
|
|
|
|
# Create necessary directories
|
|
os.makedirs(CHUNK_FOLDER, exist_ok=True)
|
|
os.makedirs(UPLOAD_METADATA_FOLDER, exist_ok=True)
|
|
os.makedirs(INDEX_STORAGE_DIR, exist_ok=True)
|
|
os.makedirs(IMAGES_DIRECTORY, exist_ok=True)
|
|
|
|
# --- Application Settings ---
|
|
ALLOWED_EXTENSIONS = {'pdf', 'doc', 'docx', 'txt', 'xls', 'xlsx', 'ppt', 'pptx', 'eml'}
|
|
APPLICATION_ROOT = os.environ.get('APPLICATION_ROOT', '') # For running behind proxy
|
|
MAX_CONTENT_LENGTH = 500 * 1024 * 1024 # 500MB limit (adjust as needed)
|
|
|
|
# --- API Keys ---
|
|
# Load from environment variables or use defaults (replace placeholders or set env vars)
|
|
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
|
|
ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY", "")
|
|
LLAMA_CLOUD_API_KEY = os.environ.get("LLAMA_CLOUD_API_KEY", "")
|
|
|
|
# Ensure required keys are set
|
|
if not OPENAI_API_KEY:
|
|
print("ERROR: OPENAI_API_KEY not set in environment or .env file. This is required.", file=sys.stderr)
|
|
print("Please add OPENAI_API_KEY=your_key to your .env file.", file=sys.stderr)
|
|
print(f"Current environment keys: {list(filter(lambda k: 'key' in k.lower(), os.environ.keys()))}", file=sys.stderr)
|
|
|
|
# Always set environment variables, even if empty - the error messages will be handled by the code
|
|
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
|
os.environ["ANTHROPIC_API_KEY"] = ANTHROPIC_API_KEY
|
|
os.environ["LLAMA_CLOUD_API_KEY"] = LLAMA_CLOUD_API_KEY
|
|
|
|
# Print API key status for debugging
|
|
print(f"OpenAI API key {'is set' if OPENAI_API_KEY else 'is NOT set'}", file=sys.stderr)
|
|
|
|
# --- AI Model Configuration ---
|
|
LLM_MODEL = "gpt-4.1"
|
|
EMBEDDING_MODEL = "text-embedding-3-small"
|
|
LLM_TEMPERATURE = 0.3
|
|
LLM_TIMEOUT = 300.0 # 5 minutes
|
|
AGENT_TIMEOUT = 600.0 # 10 minutes for the agent run
|
|
TOOL_EXECUTION_TIMEOUT = 300.0 # 5 minutes for individual tool calls
|
|
|
|
# --- LlamaParse Configuration ---
|
|
LLAMA_PARSE_VENDOR_MODEL = "openai-gpt4o" # Verify model name
|
|
LLAMA_PARSE_MAX_TIMEOUT = 3600 # 1 hour
|
|
|
|
# --- Indexing Configuration ---
|
|
# NODE_PARSER_CHUNK_SIZE = 2048 # Example if using SentenceSplitter
|
|
# NODE_PARSER_CHUNK_OVERLAP = 20
|
|
# Use Semantic Splitter by default (see ai_core.py)
|
|
SIMILARITY_TOP_K = 10
|
|
SIMILARITY_CUTOFF = 0.0 # Adjust if needed
|
|
|
|
# --- CORS Configuration ---
|
|
CORS_ALLOWED_ORIGINS = ["http://localhost:5173", "https://ai-sandbox.oliver.solutions"] # Add production frontend URL
|
|
CORS_SUPPORTS_CREDENTIALS = True
|
|
|
|
# --- Server Configuration ---
|
|
SERVER_HOST = "0.0.0.0" if os.environ.get("PRODUCTION", "false").lower() == "true" else "localhost"
|
|
SERVER_PORT = int(os.environ.get("PORT", "6175")) # Changed to 6175
|
|
USE_RELOADER = os.environ.get("PRODUCTION", "false").lower() != "true"
|
|
LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO") # Changed default to INFO
|
|
|
|
# Hypercorn specific timeouts (in seconds)
|
|
KEEP_ALIVE_TIMEOUT = 300
|
|
READ_TIMEOUT = 300
|
|
WRITE_TIMEOUT = 300
|
|
|
|
# --- MongoDB Configuration ---
|
|
# Assumes mongodb_utils handles connection details (e.g., via environment variables)
|
|
|
|
# --- Neo4j Configuration ---
|
|
NEO4J_URL = os.environ.get("NEO4J_URL", "bolt://localhost:7687")
|
|
NEO4J_USERNAME = os.environ.get("NEO4J_USERNAME", "neo4j")
|
|
NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD", "tavern-easy-museum-arthur-coconut-3483") # Default password from graphRAG.py |