Update pricing for gpt-5-nano and fix chat interface

- Update token pricing with actual gpt-5-nano-2025-08-07 prices:
  * Input: $0.05 per 1M = $0.00005 per 1K
  * Cached: $0.005 per 1M = $0.000005 per 1K
  * Output: $0.40 per 1M = $0.0004 per 1K
- Add cached_tokens support in OpenAI service
- Update cost calculation to use cached token pricing
- Add cached_tokens column to token_usage table (migration)
- Fix chat interface keyboard handling:
  * Send message on Enter key
  * New line on Shift+Enter
  * Change onKeyPress to onKeyDown for better support
- Add textarea auto-resize with maxHeight limit
- Improve responsive styles for mobile devices
- Add iOS-specific fixes (prevent zoom on input focus)
This commit is contained in:
SamoilenkoVadym 2026-01-27 20:18:42 +00:00
parent d3aa58716d
commit c15f35a1df
9 changed files with 123 additions and 16 deletions

View file

@ -27,9 +27,11 @@ REDIS_URL=redis://localhost:6379/0
RATE_LIMIT_PER_MINUTE=30
RATE_LIMIT_PER_DAY=1000
# Token Costs (USD per 1K tokens) - UPDATE WITH REAL PRICES FROM OPENAI PRICING PAGE
# Example prices (UPDATE THESE):
# For gpt-4o: Input $2.50 per 1M tokens = 0.0025 per 1K, Output $10.00 per 1M = 0.010 per 1K
# For gpt-4o-mini: Input $0.15 per 1M = 0.00015 per 1K, Output $0.60 per 1M = 0.0006 per 1K
PROMPT_TOKEN_COST=0.0001 # TODO: Update with actual price for your model
COMPLETION_TOKEN_COST=0.0002 # TODO: Update with actual price for your model
# Token Costs (USD per 1K tokens)
# gpt-5-nano-2025-08-07 pricing:
# - Input: $0.05 per 1M tokens = $0.00005 per 1K tokens
# - Cached input: $0.005 per 1M tokens = $0.000005 per 1K tokens
# - Output: $0.40 per 1M tokens = $0.0004 per 1K tokens
PROMPT_TOKEN_COST=0.00005
CACHED_PROMPT_TOKEN_COST=0.000005
COMPLETION_TOKEN_COST=0.0004

View file

@ -0,0 +1,26 @@
"""add_cached_tokens_to_token_usage
Revision ID: db52d151a2a7
Revises: 001_initial
Create Date: 2026-01-27 20:16:32.601549
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = 'db52d151a2a7'
down_revision = '001_initial'
branch_labels = None
depends_on = None
def upgrade() -> None:
# Add cached_tokens column to token_usage table
op.add_column('token_usage', sa.Column('cached_tokens', sa.Integer(), nullable=False, server_default='0'))
def downgrade() -> None:
# Remove cached_tokens column from token_usage table
op.drop_column('token_usage', 'cached_tokens')

View file

@ -40,8 +40,10 @@ class Settings(BaseSettings):
RATE_LIMIT_PER_DAY: int = 1000
# Token Costs (USD per 1K tokens)
PROMPT_TOKEN_COST: float = 0.0001
COMPLETION_TOKEN_COST: float = 0.0002
# gpt-5-nano-2025-08-07 pricing
PROMPT_TOKEN_COST: float = 0.00005
CACHED_PROMPT_TOKEN_COST: float = 0.000005
COMPLETION_TOKEN_COST: float = 0.0004
@property
def cors_origins_list(self) -> List[str]:

View file

@ -38,6 +38,7 @@ class TokenUsage(Base):
# Token counts
prompt_tokens = Column(Integer, default=0, nullable=False)
cached_tokens = Column(Integer, default=0, nullable=False) # Cached input tokens (charged at lower rate)
completion_tokens = Column(Integer, default=0, nullable=False)
total_tokens = Column(Integer, default=0, nullable=False)

View file

@ -32,6 +32,7 @@ class TokenUsageRepository(BaseRepository[TokenUsage]):
model: str,
cost_usd: Decimal,
operation_type: str = "chat",
cached_tokens: int = 0,
metadata: Optional[dict] = None
) -> TokenUsage:
"""
@ -47,6 +48,7 @@ class TokenUsageRepository(BaseRepository[TokenUsage]):
model: Model name
cost_usd: Cost in USD
operation_type: Type of operation
cached_tokens: Number of cached input tokens
metadata: Additional metadata
Returns:
@ -57,12 +59,13 @@ class TokenUsageRepository(BaseRepository[TokenUsage]):
conversation_id=conversation_id,
message_id=message_id,
prompt_tokens=prompt_tokens,
cached_tokens=cached_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
model=model,
cost_usd=cost_usd,
operation_type=operation_type,
metadata=metadata or {},
meta_data=metadata or {},
)
async def get_user_total_tokens(

View file

@ -219,7 +219,8 @@ class ChatService:
# 6. Record token usage
cost_usd = self._calculate_cost(
prompt_tokens=openai_response["usage"]["prompt_tokens"],
completion_tokens=openai_response["usage"]["completion_tokens"]
completion_tokens=openai_response["usage"]["completion_tokens"],
cached_tokens=openai_response["usage"].get("cached_tokens", 0)
)
await self.token_repo.record_usage(
@ -227,6 +228,7 @@ class ChatService:
conversation_id=conversation_id,
message_id=assistant_message.id,
prompt_tokens=openai_response["usage"]["prompt_tokens"],
cached_tokens=openai_response["usage"].get("cached_tokens", 0),
completion_tokens=openai_response["usage"]["completion_tokens"],
total_tokens=openai_response["usage"]["total_tokens"],
model=settings.OPENAI_MODEL,
@ -391,18 +393,29 @@ class ChatService:
# Rough estimate: ~4 characters per token
return len(text) // 4
def _calculate_cost(self, prompt_tokens: int, completion_tokens: int) -> Decimal:
def _calculate_cost(
self,
prompt_tokens: int,
completion_tokens: int,
cached_tokens: int = 0
) -> Decimal:
"""
Calculate cost in USD
Args:
prompt_tokens: Number of prompt tokens
prompt_tokens: Number of prompt tokens (total input tokens)
completion_tokens: Number of completion tokens
cached_tokens: Number of cached input tokens (charged at lower rate)
Returns:
Total cost in USD
"""
prompt_cost = Decimal(str(prompt_tokens)) * Decimal(str(settings.PROMPT_TOKEN_COST)) / Decimal("1000")
# Calculate non-cached prompt tokens
non_cached_prompt_tokens = prompt_tokens - cached_tokens
# Calculate costs
prompt_cost = Decimal(str(non_cached_prompt_tokens)) * Decimal(str(settings.PROMPT_TOKEN_COST)) / Decimal("1000")
cached_cost = Decimal(str(cached_tokens)) * Decimal(str(settings.CACHED_PROMPT_TOKEN_COST)) / Decimal("1000")
completion_cost = Decimal(str(completion_tokens)) * Decimal(str(settings.COMPLETION_TOKEN_COST)) / Decimal("1000")
return prompt_cost + completion_cost
return prompt_cost + cached_cost + completion_cost

View file

@ -171,11 +171,17 @@ Remember: When in doubt, DON'T answer. Say "I don't have this information in my
output_item.results
)
# Extract cached tokens if available
cached_tokens = 0
if hasattr(usage, 'input_tokens_details') and usage.input_tokens_details:
cached_tokens = getattr(usage.input_tokens_details, 'cached_tokens', 0)
return {
"response_id": response_id,
"content": assistant_message or "",
"usage": {
"prompt_tokens": usage.input_tokens,
"cached_tokens": cached_tokens,
"completion_tokens": usage.output_tokens,
"total_tokens": usage.total_tokens
},

View file

@ -19,12 +19,21 @@ const ChatInterface: React.FC = () => {
const [messageText, setMessageText] = useState('');
const messagesEndRef = useRef<HTMLDivElement>(null);
const textareaRef = useRef<HTMLTextAreaElement>(null);
// Auto-scroll to bottom when new messages arrive
useEffect(() => {
messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' });
}, [messages]);
// Auto-resize textarea
useEffect(() => {
if (textareaRef.current) {
textareaRef.current.style.height = 'auto';
textareaRef.current.style.height = textareaRef.current.scrollHeight + 'px';
}
}, [messageText]);
const handleSend = async () => {
if (!messageText.trim() || isSending) return;
@ -41,7 +50,7 @@ const ChatInterface: React.FC = () => {
}
};
const handleKeyPress = (e: React.KeyboardEvent) => {
const handleKeyDown = (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
handleSend();
@ -113,13 +122,15 @@ const ChatInterface: React.FC = () => {
<div className="chat-input">
<div className="input-wrapper">
<textarea
ref={textareaRef}
id="message"
placeholder="Ask me about operations, policies, procedures..."
rows={1}
value={messageText}
onChange={(e) => setMessageText(e.target.value)}
onKeyPress={handleKeyPress}
onKeyDown={handleKeyDown}
disabled={isSending}
style={{ maxHeight: '200px', overflow: 'auto' }}
/>
</div>
<button

View file

@ -291,4 +291,47 @@ body {
.header-info h1 {
font-size: var(--font-size-xl);
}
.chat-body {
padding: var(--spacing-md);
}
.chat-input {
padding: var(--spacing-md);
gap: var(--spacing-sm);
}
.input-wrapper textarea {
font-size: 16px; /* Prevents zoom on iOS */
}
.chat-input button {
padding: var(--spacing-md);
min-width: 70px;
}
.welcome-message {
padding: var(--spacing-lg);
}
.welcome-message h2 {
font-size: var(--font-size-xl);
}
}
/* Extra small devices */
@media (max-width: 480px) {
.chat-input {
flex-direction: row;
align-items: flex-end;
}
.input-wrapper {
flex: 1;
}
.chat-input button {
height: 44px;
min-height: 44px;
}
}