Update pricing for gpt-5-nano and fix chat interface

- Update token pricing with actual gpt-5-nano-2025-08-07 prices: * Input: $0.05 per 1M = $0.00005 per 1K * Cached: $0.005 per 1M = $0.000005 per 1K * Output: $0.40 per 1M = $0.0004 per 1K - Add cached_tokens support in OpenAI service - Update cost calculation to use cached token pricing - Add cached_tokens column to token_usage table (migration) - Fix chat interface keyboard handling: * Send message on Enter key * New line on Shift+Enter * Change onKeyPress to onKeyDown for better support - Add textarea auto-resize with maxHeight limit - Improve responsive styles for mobile devices - Add iOS-specific fixes (prevent zoom on input focus)
2026-01-27 20:18:42 +00:00 · 2026-01-27 20:18:42 +00:00 · c15f35a1df
commit c15f35a1df
parent d3aa58716d
9 changed files with 123 additions and 16 deletions
--- a/backend/.env.example
+++ b/backend/.env.example
@ -27,9 +27,11 @@ REDIS_URL=redis://localhost:6379/0
 RATE_LIMIT_PER_MINUTE=30
 RATE_LIMIT_PER_DAY=1000

-# Token Costs (USD per 1K tokens) - UPDATE WITH REAL PRICES FROM OPENAI PRICING PAGE
-# Example prices (UPDATE THESE):
-# For gpt-4o: Input $2.50 per 1M tokens = 0.0025 per 1K, Output $10.00 per 1M = 0.010 per 1K
-# For gpt-4o-mini: Input $0.15 per 1M = 0.00015 per 1K, Output $0.60 per 1M = 0.0006 per 1K
-PROMPT_TOKEN_COST=0.0001      # TODO: Update with actual price for your model
-COMPLETION_TOKEN_COST=0.0002  # TODO: Update with actual price for your model
+# Token Costs (USD per 1K tokens)
+# gpt-5-nano-2025-08-07 pricing:
+# - Input: $0.05 per 1M tokens = $0.00005 per 1K tokens
+# - Cached input: $0.005 per 1M tokens = $0.000005 per 1K tokens
+# - Output: $0.40 per 1M tokens = $0.0004 per 1K tokens
+PROMPT_TOKEN_COST=0.00005
+CACHED_PROMPT_TOKEN_COST=0.000005
+COMPLETION_TOKEN_COST=0.0004
--- a/backend/alembic/versions/db52d151a2a7_add_cached_tokens_to_token_usage.py
+++ b/backend/alembic/versions/db52d151a2a7_add_cached_tokens_to_token_usage.py
@ -0,0 +1,26 @@
+"""add_cached_tokens_to_token_usage
+
+Revision ID: db52d151a2a7
+Revises: 001_initial
+Create Date: 2026-01-27 20:16:32.601549
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = 'db52d151a2a7'
+down_revision = '001_initial'
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Add cached_tokens column to token_usage table
+    op.add_column('token_usage', sa.Column('cached_tokens', sa.Integer(), nullable=False, server_default='0'))
+
+
+def downgrade() -> None:
+    # Remove cached_tokens column from token_usage table
+    op.drop_column('token_usage', 'cached_tokens')
--- a/backend/app/config.py
+++ b/backend/app/config.py
@ -40,8 +40,10 @@ class Settings(BaseSettings):
    RATE_LIMIT_PER_DAY: int = 1000

    # Token Costs (USD per 1K tokens)
-    PROMPT_TOKEN_COST: float = 0.0001
-    COMPLETION_TOKEN_COST: float = 0.0002
+    # gpt-5-nano-2025-08-07 pricing
+    PROMPT_TOKEN_COST: float = 0.00005
+    CACHED_PROMPT_TOKEN_COST: float = 0.000005
+    COMPLETION_TOKEN_COST: float = 0.0004

    @property
    def cors_origins_list(self) -> List[str]:
--- a/backend/app/models/token_usage.py
+++ b/backend/app/models/token_usage.py
@ -38,6 +38,7 @@ class TokenUsage(Base):

    # Token counts
    prompt_tokens = Column(Integer, default=0, nullable=False)
+    cached_tokens = Column(Integer, default=0, nullable=False)  # Cached input tokens (charged at lower rate)
    completion_tokens = Column(Integer, default=0, nullable=False)
    total_tokens = Column(Integer, default=0, nullable=False)

--- a/backend/app/repositories/token_usage_repository.py
+++ b/backend/app/repositories/token_usage_repository.py
@ -32,6 +32,7 @@ class TokenUsageRepository(BaseRepository[TokenUsage]):
        model: str,
        cost_usd: Decimal,
        operation_type: str = "chat",
+        cached_tokens: int = 0,
        metadata: Optional[dict] = None
    ) -> TokenUsage:
        """
@ -47,6 +48,7 @@ class TokenUsageRepository(BaseRepository[TokenUsage]):
            model: Model name
            cost_usd: Cost in USD
            operation_type: Type of operation
+            cached_tokens: Number of cached input tokens
            metadata: Additional metadata

        Returns:
@ -57,12 +59,13 @@ class TokenUsageRepository(BaseRepository[TokenUsage]):
            conversation_id=conversation_id,
            message_id=message_id,
            prompt_tokens=prompt_tokens,
+            cached_tokens=cached_tokens,
            completion_tokens=completion_tokens,
            total_tokens=total_tokens,
            model=model,
            cost_usd=cost_usd,
            operation_type=operation_type,
-            metadata=metadata or {},
+            meta_data=metadata or {},
        )

    async def get_user_total_tokens(
--- a/backend/app/services/chat_service.py
+++ b/backend/app/services/chat_service.py
@ -219,7 +219,8 @@ class ChatService:
        # 6. Record token usage
        cost_usd = self._calculate_cost(
            prompt_tokens=openai_response["usage"]["prompt_tokens"],
-            completion_tokens=openai_response["usage"]["completion_tokens"]
+            completion_tokens=openai_response["usage"]["completion_tokens"],
+            cached_tokens=openai_response["usage"].get("cached_tokens", 0)
        )

        await self.token_repo.record_usage(
@ -227,6 +228,7 @@ class ChatService:
            conversation_id=conversation_id,
            message_id=assistant_message.id,
            prompt_tokens=openai_response["usage"]["prompt_tokens"],
+            cached_tokens=openai_response["usage"].get("cached_tokens", 0),
            completion_tokens=openai_response["usage"]["completion_tokens"],
            total_tokens=openai_response["usage"]["total_tokens"],
            model=settings.OPENAI_MODEL,
@ -391,18 +393,29 @@ class ChatService:
        # Rough estimate: ~4 characters per token
        return len(text) // 4

-    def _calculate_cost(self, prompt_tokens: int, completion_tokens: int) -> Decimal:
+    def _calculate_cost(
+        self,
+        prompt_tokens: int,
+        completion_tokens: int,
+        cached_tokens: int = 0
+    ) -> Decimal:
        """
        Calculate cost in USD

        Args:
-            prompt_tokens: Number of prompt tokens
+            prompt_tokens: Number of prompt tokens (total input tokens)
            completion_tokens: Number of completion tokens
+            cached_tokens: Number of cached input tokens (charged at lower rate)

        Returns:
            Total cost in USD
        """
-        prompt_cost = Decimal(str(prompt_tokens)) * Decimal(str(settings.PROMPT_TOKEN_COST)) / Decimal("1000")
+        # Calculate non-cached prompt tokens
+        non_cached_prompt_tokens = prompt_tokens - cached_tokens
+
+        # Calculate costs
+        prompt_cost = Decimal(str(non_cached_prompt_tokens)) * Decimal(str(settings.PROMPT_TOKEN_COST)) / Decimal("1000")
+        cached_cost = Decimal(str(cached_tokens)) * Decimal(str(settings.CACHED_PROMPT_TOKEN_COST)) / Decimal("1000")
        completion_cost = Decimal(str(completion_tokens)) * Decimal(str(settings.COMPLETION_TOKEN_COST)) / Decimal("1000")

-        return prompt_cost + completion_cost
+        return prompt_cost + cached_cost + completion_cost
--- a/backend/app/services/openai_service.py
+++ b/backend/app/services/openai_service.py
@ -171,11 +171,17 @@ Remember: When in doubt, DON'T answer. Say "I don't have this information in my
                        output_item.results
                    )

+        # Extract cached tokens if available
+        cached_tokens = 0
+        if hasattr(usage, 'input_tokens_details') and usage.input_tokens_details:
+            cached_tokens = getattr(usage.input_tokens_details, 'cached_tokens', 0)
+
        return {
            "response_id": response_id,
            "content": assistant_message or "",
            "usage": {
                "prompt_tokens": usage.input_tokens,
+                "cached_tokens": cached_tokens,
                "completion_tokens": usage.output_tokens,
                "total_tokens": usage.total_tokens
            },
--- a/frontend/src/components/ChatInterface.tsx
+++ b/frontend/src/components/ChatInterface.tsx
@ -19,12 +19,21 @@ const ChatInterface: React.FC = () => {

  const [messageText, setMessageText] = useState('');
  const messagesEndRef = useRef<HTMLDivElement>(null);
+  const textareaRef = useRef<HTMLTextAreaElement>(null);

  // Auto-scroll to bottom when new messages arrive
  useEffect(() => {
    messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' });
  }, [messages]);

+  // Auto-resize textarea
+  useEffect(() => {
+    if (textareaRef.current) {
+      textareaRef.current.style.height = 'auto';
+      textareaRef.current.style.height = textareaRef.current.scrollHeight + 'px';
+    }
+  }, [messageText]);
+
  const handleSend = async () => {
    if (!messageText.trim() || isSending) return;

@ -41,7 +50,7 @@ const ChatInterface: React.FC = () => {
    }
  };

-  const handleKeyPress = (e: React.KeyboardEvent) => {
+  const handleKeyDown = (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
    if (e.key === 'Enter' && !e.shiftKey) {
      e.preventDefault();
      handleSend();
@ -113,13 +122,15 @@ const ChatInterface: React.FC = () => {
      <div className="chat-input">
        <div className="input-wrapper">
          <textarea
+            ref={textareaRef}
            id="message"
            placeholder="Ask me about operations, policies, procedures..."
            rows={1}
            value={messageText}
            onChange={(e) => setMessageText(e.target.value)}
-            onKeyPress={handleKeyPress}
+            onKeyDown={handleKeyDown}
            disabled={isSending}
+            style={{ maxHeight: '200px', overflow: 'auto' }}
          />
        </div>
        <button
--- a/frontend/src/styles/theme.css
+++ b/frontend/src/styles/theme.css
@ -291,4 +291,47 @@ body {
  .header-info h1 {
    font-size: var(--font-size-xl);
  }
+
+  .chat-body {
+    padding: var(--spacing-md);
+  }
+
+  .chat-input {
+    padding: var(--spacing-md);
+    gap: var(--spacing-sm);
+  }
+
+  .input-wrapper textarea {
+    font-size: 16px; /* Prevents zoom on iOS */
+  }
+
+  .chat-input button {
+    padding: var(--spacing-md);
+    min-width: 70px;
+  }
+
+  .welcome-message {
+    padding: var(--spacing-lg);
+  }
+
+  .welcome-message h2 {
+    font-size: var(--font-size-xl);
+  }
+}
+
+/* Extra small devices */
+@media (max-width: 480px) {
+  .chat-input {
+    flex-direction: row;
+    align-items: flex-end;
+  }
+
+  .input-wrapper {
+    flex: 1;
+  }
+
+  .chat-input button {
+    height: 44px;
+    min-height: 44px;
+  }
 }