diff --git a/backend/.env.example b/backend/.env.example
index 6f63b0f..3dbdc5a 100644
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -27,9 +27,11 @@ REDIS_URL=redis://localhost:6379/0
 RATE_LIMIT_PER_MINUTE=30
 RATE_LIMIT_PER_DAY=1000
 
-# Token Costs (USD per 1K tokens) - UPDATE WITH REAL PRICES FROM OPENAI PRICING PAGE
-# Example prices (UPDATE THESE):
-# For gpt-4o: Input $2.50 per 1M tokens = 0.0025 per 1K, Output $10.00 per 1M = 0.010 per 1K
-# For gpt-4o-mini: Input $0.15 per 1M = 0.00015 per 1K, Output $0.60 per 1M = 0.0006 per 1K
-PROMPT_TOKEN_COST=0.0001      # TODO: Update with actual price for your model
-COMPLETION_TOKEN_COST=0.0002  # TODO: Update with actual price for your model
+# Token Costs (USD per 1K tokens)
+# gpt-5-nano-2025-08-07 pricing:
+# - Input: $0.05 per 1M tokens = $0.00005 per 1K tokens
+# - Cached input: $0.005 per 1M tokens = $0.000005 per 1K tokens
+# - Output: $0.40 per 1M tokens = $0.0004 per 1K tokens
+PROMPT_TOKEN_COST=0.00005
+CACHED_PROMPT_TOKEN_COST=0.000005
+COMPLETION_TOKEN_COST=0.0004
diff --git a/backend/alembic/versions/db52d151a2a7_add_cached_tokens_to_token_usage.py b/backend/alembic/versions/db52d151a2a7_add_cached_tokens_to_token_usage.py
new file mode 100644
index 0000000..7fe7d69
--- /dev/null
+++ b/backend/alembic/versions/db52d151a2a7_add_cached_tokens_to_token_usage.py
@@ -0,0 +1,26 @@
+"""add_cached_tokens_to_token_usage
+
+Revision ID: db52d151a2a7
+Revises: 001_initial
+Create Date: 2026-01-27 20:16:32.601549
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = 'db52d151a2a7'
+down_revision = '001_initial'
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Add cached_tokens column to token_usage table
+    op.add_column('token_usage', sa.Column('cached_tokens', sa.Integer(), nullable=False, server_default='0'))
+
+
+def downgrade() -> None:
+    # Remove cached_tokens column from token_usage table
+    op.drop_column('token_usage', 'cached_tokens')
diff --git a/backend/app/config.py b/backend/app/config.py
index 2b99526..90cddf4 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -40,8 +40,10 @@ class Settings(BaseSettings):
     RATE_LIMIT_PER_DAY: int = 1000
 
     # Token Costs (USD per 1K tokens)
-    PROMPT_TOKEN_COST: float = 0.0001
-    COMPLETION_TOKEN_COST: float = 0.0002
+    # gpt-5-nano-2025-08-07 pricing
+    PROMPT_TOKEN_COST: float = 0.00005
+    CACHED_PROMPT_TOKEN_COST: float = 0.000005
+    COMPLETION_TOKEN_COST: float = 0.0004
 
     @property
     def cors_origins_list(self) -> List[str]:
diff --git a/backend/app/models/token_usage.py b/backend/app/models/token_usage.py
index 79bd51e..570ab22 100644
--- a/backend/app/models/token_usage.py
+++ b/backend/app/models/token_usage.py
@@ -38,6 +38,7 @@ class TokenUsage(Base):
 
     # Token counts
     prompt_tokens = Column(Integer, default=0, nullable=False)
+    cached_tokens = Column(Integer, default=0, nullable=False)  # Cached input tokens (charged at lower rate)
     completion_tokens = Column(Integer, default=0, nullable=False)
     total_tokens = Column(Integer, default=0, nullable=False)
 
diff --git a/backend/app/repositories/token_usage_repository.py b/backend/app/repositories/token_usage_repository.py
index 45ae6dd..7b0658d 100644
--- a/backend/app/repositories/token_usage_repository.py
+++ b/backend/app/repositories/token_usage_repository.py
@@ -32,6 +32,7 @@ class TokenUsageRepository(BaseRepository[TokenUsage]):
         model: str,
         cost_usd: Decimal,
         operation_type: str = "chat",
+        cached_tokens: int = 0,
         metadata: Optional[dict] = None
     ) -> TokenUsage:
         """
@@ -47,6 +48,7 @@ class TokenUsageRepository(BaseRepository[TokenUsage]):
             model: Model name
             cost_usd: Cost in USD
             operation_type: Type of operation
+            cached_tokens: Number of cached input tokens
             metadata: Additional metadata
 
         Returns:
@@ -57,12 +59,13 @@ class TokenUsageRepository(BaseRepository[TokenUsage]):
             conversation_id=conversation_id,
             message_id=message_id,
             prompt_tokens=prompt_tokens,
+            cached_tokens=cached_tokens,
             completion_tokens=completion_tokens,
             total_tokens=total_tokens,
             model=model,
             cost_usd=cost_usd,
             operation_type=operation_type,
-            metadata=metadata or {},
+            meta_data=metadata or {},
         )
 
     async def get_user_total_tokens(
diff --git a/backend/app/services/chat_service.py b/backend/app/services/chat_service.py
index 30ac746..c5cd6f9 100644
--- a/backend/app/services/chat_service.py
+++ b/backend/app/services/chat_service.py
@@ -219,7 +219,8 @@ class ChatService:
         # 6. Record token usage
         cost_usd = self._calculate_cost(
             prompt_tokens=openai_response["usage"]["prompt_tokens"],
-            completion_tokens=openai_response["usage"]["completion_tokens"]
+            completion_tokens=openai_response["usage"]["completion_tokens"],
+            cached_tokens=openai_response["usage"].get("cached_tokens", 0)
         )
 
         await self.token_repo.record_usage(
@@ -227,6 +228,7 @@ class ChatService:
             conversation_id=conversation_id,
             message_id=assistant_message.id,
             prompt_tokens=openai_response["usage"]["prompt_tokens"],
+            cached_tokens=openai_response["usage"].get("cached_tokens", 0),
             completion_tokens=openai_response["usage"]["completion_tokens"],
             total_tokens=openai_response["usage"]["total_tokens"],
             model=settings.OPENAI_MODEL,
@@ -391,18 +393,29 @@ class ChatService:
         # Rough estimate: ~4 characters per token
         return len(text) // 4
 
-    def _calculate_cost(self, prompt_tokens: int, completion_tokens: int) -> Decimal:
+    def _calculate_cost(
+        self,
+        prompt_tokens: int,
+        completion_tokens: int,
+        cached_tokens: int = 0
+    ) -> Decimal:
         """
         Calculate cost in USD
 
         Args:
-            prompt_tokens: Number of prompt tokens
+            prompt_tokens: Number of prompt tokens (total input tokens)
             completion_tokens: Number of completion tokens
+            cached_tokens: Number of cached input tokens (charged at lower rate)
 
         Returns:
             Total cost in USD
         """
-        prompt_cost = Decimal(str(prompt_tokens)) * Decimal(str(settings.PROMPT_TOKEN_COST)) / Decimal("1000")
+        # Calculate non-cached prompt tokens
+        non_cached_prompt_tokens = prompt_tokens - cached_tokens
+
+        # Calculate costs
+        prompt_cost = Decimal(str(non_cached_prompt_tokens)) * Decimal(str(settings.PROMPT_TOKEN_COST)) / Decimal("1000")
+        cached_cost = Decimal(str(cached_tokens)) * Decimal(str(settings.CACHED_PROMPT_TOKEN_COST)) / Decimal("1000")
         completion_cost = Decimal(str(completion_tokens)) * Decimal(str(settings.COMPLETION_TOKEN_COST)) / Decimal("1000")
 
-        return prompt_cost + completion_cost
+        return prompt_cost + cached_cost + completion_cost
diff --git a/backend/app/services/openai_service.py b/backend/app/services/openai_service.py
index 275217b..d20cf11 100644
--- a/backend/app/services/openai_service.py
+++ b/backend/app/services/openai_service.py
@@ -171,11 +171,17 @@ Remember: When in doubt, DON'T answer. Say "I don't have this information in my
                         output_item.results
                     )
 
+        # Extract cached tokens if available
+        cached_tokens = 0
+        if hasattr(usage, 'input_tokens_details') and usage.input_tokens_details:
+            cached_tokens = getattr(usage.input_tokens_details, 'cached_tokens', 0)
+
         return {
             "response_id": response_id,
             "content": assistant_message or "",
             "usage": {
                 "prompt_tokens": usage.input_tokens,
+                "cached_tokens": cached_tokens,
                 "completion_tokens": usage.output_tokens,
                 "total_tokens": usage.total_tokens
             },
diff --git a/frontend/src/components/ChatInterface.tsx b/frontend/src/components/ChatInterface.tsx
index 8029542..0727b23 100644
--- a/frontend/src/components/ChatInterface.tsx
+++ b/frontend/src/components/ChatInterface.tsx
@@ -19,12 +19,21 @@ const ChatInterface: React.FC = () => {
 
   const [messageText, setMessageText] = useState('');
   const messagesEndRef = useRef<HTMLDivElement>(null);
+  const textareaRef = useRef<HTMLTextAreaElement>(null);
 
   // Auto-scroll to bottom when new messages arrive
   useEffect(() => {
     messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' });
   }, [messages]);
 
+  // Auto-resize textarea
+  useEffect(() => {
+    if (textareaRef.current) {
+      textareaRef.current.style.height = 'auto';
+      textareaRef.current.style.height = textareaRef.current.scrollHeight + 'px';
+    }
+  }, [messageText]);
+
   const handleSend = async () => {
     if (!messageText.trim() || isSending) return;
 
@@ -41,7 +50,7 @@ const ChatInterface: React.FC = () => {
     }
   };
 
-  const handleKeyPress = (e: React.KeyboardEvent) => {
+  const handleKeyDown = (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
     if (e.key === 'Enter' && !e.shiftKey) {
       e.preventDefault();
       handleSend();
@@ -113,13 +122,15 @@ const ChatInterface: React.FC = () => {
       <div className="chat-input">
         <div className="input-wrapper">
           <textarea
+            ref={textareaRef}
             id="message"
             placeholder="Ask me about operations, policies, procedures..."
             rows={1}
             value={messageText}
             onChange={(e) => setMessageText(e.target.value)}
-            onKeyPress={handleKeyPress}
+            onKeyDown={handleKeyDown}
             disabled={isSending}
+            style={{ maxHeight: '200px', overflow: 'auto' }}
           />
         </div>
         <button
diff --git a/frontend/src/styles/theme.css b/frontend/src/styles/theme.css
index 170365f..5c788c5 100644
--- a/frontend/src/styles/theme.css
+++ b/frontend/src/styles/theme.css
@@ -291,4 +291,47 @@ body {
   .header-info h1 {
     font-size: var(--font-size-xl);
   }
+
+  .chat-body {
+    padding: var(--spacing-md);
+  }
+
+  .chat-input {
+    padding: var(--spacing-md);
+    gap: var(--spacing-sm);
+  }
+
+  .input-wrapper textarea {
+    font-size: 16px; /* Prevents zoom on iOS */
+  }
+
+  .chat-input button {
+    padding: var(--spacing-md);
+    min-width: 70px;
+  }
+
+  .welcome-message {
+    padding: var(--spacing-lg);
+  }
+
+  .welcome-message h2 {
+    font-size: var(--font-size-xl);
+  }
+}
+
+/* Extra small devices */
+@media (max-width: 480px) {
+  .chat-input {
+    flex-direction: row;
+    align-items: flex-end;
+  }
+
+  .input-wrapper {
+    flex: 1;
+  }
+
+  .chat-input button {
+    height: 44px;
+    min-height: 44px;
+  }
 }