diff --git a/backend/.env.example b/backend/.env.example index 6f63b0f..3dbdc5a 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -27,9 +27,11 @@ REDIS_URL=redis://localhost:6379/0 RATE_LIMIT_PER_MINUTE=30 RATE_LIMIT_PER_DAY=1000 -# Token Costs (USD per 1K tokens) - UPDATE WITH REAL PRICES FROM OPENAI PRICING PAGE -# Example prices (UPDATE THESE): -# For gpt-4o: Input $2.50 per 1M tokens = 0.0025 per 1K, Output $10.00 per 1M = 0.010 per 1K -# For gpt-4o-mini: Input $0.15 per 1M = 0.00015 per 1K, Output $0.60 per 1M = 0.0006 per 1K -PROMPT_TOKEN_COST=0.0001 # TODO: Update with actual price for your model -COMPLETION_TOKEN_COST=0.0002 # TODO: Update with actual price for your model +# Token Costs (USD per 1K tokens) +# gpt-5-nano-2025-08-07 pricing: +# - Input: $0.05 per 1M tokens = $0.00005 per 1K tokens +# - Cached input: $0.005 per 1M tokens = $0.000005 per 1K tokens +# - Output: $0.40 per 1M tokens = $0.0004 per 1K tokens +PROMPT_TOKEN_COST=0.00005 +CACHED_PROMPT_TOKEN_COST=0.000005 +COMPLETION_TOKEN_COST=0.0004 diff --git a/backend/alembic/versions/db52d151a2a7_add_cached_tokens_to_token_usage.py b/backend/alembic/versions/db52d151a2a7_add_cached_tokens_to_token_usage.py new file mode 100644 index 0000000..7fe7d69 --- /dev/null +++ b/backend/alembic/versions/db52d151a2a7_add_cached_tokens_to_token_usage.py @@ -0,0 +1,26 @@ +"""add_cached_tokens_to_token_usage + +Revision ID: db52d151a2a7 +Revises: 001_initial +Create Date: 2026-01-27 20:16:32.601549 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'db52d151a2a7' +down_revision = '001_initial' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # Add cached_tokens column to token_usage table + op.add_column('token_usage', sa.Column('cached_tokens', sa.Integer(), nullable=False, server_default='0')) + + +def downgrade() -> None: + # Remove cached_tokens column from token_usage table + op.drop_column('token_usage', 'cached_tokens') diff --git a/backend/app/config.py b/backend/app/config.py index 2b99526..90cddf4 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -40,8 +40,10 @@ class Settings(BaseSettings): RATE_LIMIT_PER_DAY: int = 1000 # Token Costs (USD per 1K tokens) - PROMPT_TOKEN_COST: float = 0.0001 - COMPLETION_TOKEN_COST: float = 0.0002 + # gpt-5-nano-2025-08-07 pricing + PROMPT_TOKEN_COST: float = 0.00005 + CACHED_PROMPT_TOKEN_COST: float = 0.000005 + COMPLETION_TOKEN_COST: float = 0.0004 @property def cors_origins_list(self) -> List[str]: diff --git a/backend/app/models/token_usage.py b/backend/app/models/token_usage.py index 79bd51e..570ab22 100644 --- a/backend/app/models/token_usage.py +++ b/backend/app/models/token_usage.py @@ -38,6 +38,7 @@ class TokenUsage(Base): # Token counts prompt_tokens = Column(Integer, default=0, nullable=False) + cached_tokens = Column(Integer, default=0, nullable=False) # Cached input tokens (charged at lower rate) completion_tokens = Column(Integer, default=0, nullable=False) total_tokens = Column(Integer, default=0, nullable=False) diff --git a/backend/app/repositories/token_usage_repository.py b/backend/app/repositories/token_usage_repository.py index 45ae6dd..7b0658d 100644 --- a/backend/app/repositories/token_usage_repository.py +++ b/backend/app/repositories/token_usage_repository.py @@ -32,6 +32,7 @@ class TokenUsageRepository(BaseRepository[TokenUsage]): model: str, cost_usd: Decimal, operation_type: str = "chat", + cached_tokens: int = 0, metadata: Optional[dict] = None ) -> TokenUsage: """ @@ -47,6 +48,7 @@ class TokenUsageRepository(BaseRepository[TokenUsage]): model: Model name cost_usd: Cost in USD operation_type: Type of operation + cached_tokens: Number of cached input tokens metadata: Additional metadata Returns: @@ -57,12 +59,13 @@ class TokenUsageRepository(BaseRepository[TokenUsage]): conversation_id=conversation_id, message_id=message_id, prompt_tokens=prompt_tokens, + cached_tokens=cached_tokens, completion_tokens=completion_tokens, total_tokens=total_tokens, model=model, cost_usd=cost_usd, operation_type=operation_type, - metadata=metadata or {}, + meta_data=metadata or {}, ) async def get_user_total_tokens( diff --git a/backend/app/services/chat_service.py b/backend/app/services/chat_service.py index 30ac746..c5cd6f9 100644 --- a/backend/app/services/chat_service.py +++ b/backend/app/services/chat_service.py @@ -219,7 +219,8 @@ class ChatService: # 6. Record token usage cost_usd = self._calculate_cost( prompt_tokens=openai_response["usage"]["prompt_tokens"], - completion_tokens=openai_response["usage"]["completion_tokens"] + completion_tokens=openai_response["usage"]["completion_tokens"], + cached_tokens=openai_response["usage"].get("cached_tokens", 0) ) await self.token_repo.record_usage( @@ -227,6 +228,7 @@ class ChatService: conversation_id=conversation_id, message_id=assistant_message.id, prompt_tokens=openai_response["usage"]["prompt_tokens"], + cached_tokens=openai_response["usage"].get("cached_tokens", 0), completion_tokens=openai_response["usage"]["completion_tokens"], total_tokens=openai_response["usage"]["total_tokens"], model=settings.OPENAI_MODEL, @@ -391,18 +393,29 @@ class ChatService: # Rough estimate: ~4 characters per token return len(text) // 4 - def _calculate_cost(self, prompt_tokens: int, completion_tokens: int) -> Decimal: + def _calculate_cost( + self, + prompt_tokens: int, + completion_tokens: int, + cached_tokens: int = 0 + ) -> Decimal: """ Calculate cost in USD Args: - prompt_tokens: Number of prompt tokens + prompt_tokens: Number of prompt tokens (total input tokens) completion_tokens: Number of completion tokens + cached_tokens: Number of cached input tokens (charged at lower rate) Returns: Total cost in USD """ - prompt_cost = Decimal(str(prompt_tokens)) * Decimal(str(settings.PROMPT_TOKEN_COST)) / Decimal("1000") + # Calculate non-cached prompt tokens + non_cached_prompt_tokens = prompt_tokens - cached_tokens + + # Calculate costs + prompt_cost = Decimal(str(non_cached_prompt_tokens)) * Decimal(str(settings.PROMPT_TOKEN_COST)) / Decimal("1000") + cached_cost = Decimal(str(cached_tokens)) * Decimal(str(settings.CACHED_PROMPT_TOKEN_COST)) / Decimal("1000") completion_cost = Decimal(str(completion_tokens)) * Decimal(str(settings.COMPLETION_TOKEN_COST)) / Decimal("1000") - return prompt_cost + completion_cost + return prompt_cost + cached_cost + completion_cost diff --git a/backend/app/services/openai_service.py b/backend/app/services/openai_service.py index 275217b..d20cf11 100644 --- a/backend/app/services/openai_service.py +++ b/backend/app/services/openai_service.py @@ -171,11 +171,17 @@ Remember: When in doubt, DON'T answer. Say "I don't have this information in my output_item.results ) + # Extract cached tokens if available + cached_tokens = 0 + if hasattr(usage, 'input_tokens_details') and usage.input_tokens_details: + cached_tokens = getattr(usage.input_tokens_details, 'cached_tokens', 0) + return { "response_id": response_id, "content": assistant_message or "", "usage": { "prompt_tokens": usage.input_tokens, + "cached_tokens": cached_tokens, "completion_tokens": usage.output_tokens, "total_tokens": usage.total_tokens }, diff --git a/frontend/src/components/ChatInterface.tsx b/frontend/src/components/ChatInterface.tsx index 8029542..0727b23 100644 --- a/frontend/src/components/ChatInterface.tsx +++ b/frontend/src/components/ChatInterface.tsx @@ -19,12 +19,21 @@ const ChatInterface: React.FC = () => { const [messageText, setMessageText] = useState(''); const messagesEndRef = useRef(null); + const textareaRef = useRef(null); // Auto-scroll to bottom when new messages arrive useEffect(() => { messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' }); }, [messages]); + // Auto-resize textarea + useEffect(() => { + if (textareaRef.current) { + textareaRef.current.style.height = 'auto'; + textareaRef.current.style.height = textareaRef.current.scrollHeight + 'px'; + } + }, [messageText]); + const handleSend = async () => { if (!messageText.trim() || isSending) return; @@ -41,7 +50,7 @@ const ChatInterface: React.FC = () => { } }; - const handleKeyPress = (e: React.KeyboardEvent) => { + const handleKeyDown = (e: React.KeyboardEvent) => { if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); handleSend(); @@ -113,13 +122,15 @@ const ChatInterface: React.FC = () => {