Update pricing for gpt-5-nano and fix chat interface
- Update token pricing with actual gpt-5-nano-2025-08-07 prices: * Input: $0.05 per 1M = $0.00005 per 1K * Cached: $0.005 per 1M = $0.000005 per 1K * Output: $0.40 per 1M = $0.0004 per 1K - Add cached_tokens support in OpenAI service - Update cost calculation to use cached token pricing - Add cached_tokens column to token_usage table (migration) - Fix chat interface keyboard handling: * Send message on Enter key * New line on Shift+Enter * Change onKeyPress to onKeyDown for better support - Add textarea auto-resize with maxHeight limit - Improve responsive styles for mobile devices - Add iOS-specific fixes (prevent zoom on input focus)
This commit is contained in:
parent
d3aa58716d
commit
c15f35a1df
9 changed files with 123 additions and 16 deletions
|
|
@ -27,9 +27,11 @@ REDIS_URL=redis://localhost:6379/0
|
|||
RATE_LIMIT_PER_MINUTE=30
|
||||
RATE_LIMIT_PER_DAY=1000
|
||||
|
||||
# Token Costs (USD per 1K tokens) - UPDATE WITH REAL PRICES FROM OPENAI PRICING PAGE
|
||||
# Example prices (UPDATE THESE):
|
||||
# For gpt-4o: Input $2.50 per 1M tokens = 0.0025 per 1K, Output $10.00 per 1M = 0.010 per 1K
|
||||
# For gpt-4o-mini: Input $0.15 per 1M = 0.00015 per 1K, Output $0.60 per 1M = 0.0006 per 1K
|
||||
PROMPT_TOKEN_COST=0.0001 # TODO: Update with actual price for your model
|
||||
COMPLETION_TOKEN_COST=0.0002 # TODO: Update with actual price for your model
|
||||
# Token Costs (USD per 1K tokens)
|
||||
# gpt-5-nano-2025-08-07 pricing:
|
||||
# - Input: $0.05 per 1M tokens = $0.00005 per 1K tokens
|
||||
# - Cached input: $0.005 per 1M tokens = $0.000005 per 1K tokens
|
||||
# - Output: $0.40 per 1M tokens = $0.0004 per 1K tokens
|
||||
PROMPT_TOKEN_COST=0.00005
|
||||
CACHED_PROMPT_TOKEN_COST=0.000005
|
||||
COMPLETION_TOKEN_COST=0.0004
|
||||
|
|
|
|||
|
|
@ -0,0 +1,26 @@
|
|||
"""add_cached_tokens_to_token_usage
|
||||
|
||||
Revision ID: db52d151a2a7
|
||||
Revises: 001_initial
|
||||
Create Date: 2026-01-27 20:16:32.601549
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = 'db52d151a2a7'
|
||||
down_revision = '001_initial'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Add cached_tokens column to token_usage table
|
||||
op.add_column('token_usage', sa.Column('cached_tokens', sa.Integer(), nullable=False, server_default='0'))
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Remove cached_tokens column from token_usage table
|
||||
op.drop_column('token_usage', 'cached_tokens')
|
||||
|
|
@ -40,8 +40,10 @@ class Settings(BaseSettings):
|
|||
RATE_LIMIT_PER_DAY: int = 1000
|
||||
|
||||
# Token Costs (USD per 1K tokens)
|
||||
PROMPT_TOKEN_COST: float = 0.0001
|
||||
COMPLETION_TOKEN_COST: float = 0.0002
|
||||
# gpt-5-nano-2025-08-07 pricing
|
||||
PROMPT_TOKEN_COST: float = 0.00005
|
||||
CACHED_PROMPT_TOKEN_COST: float = 0.000005
|
||||
COMPLETION_TOKEN_COST: float = 0.0004
|
||||
|
||||
@property
|
||||
def cors_origins_list(self) -> List[str]:
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ class TokenUsage(Base):
|
|||
|
||||
# Token counts
|
||||
prompt_tokens = Column(Integer, default=0, nullable=False)
|
||||
cached_tokens = Column(Integer, default=0, nullable=False) # Cached input tokens (charged at lower rate)
|
||||
completion_tokens = Column(Integer, default=0, nullable=False)
|
||||
total_tokens = Column(Integer, default=0, nullable=False)
|
||||
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ class TokenUsageRepository(BaseRepository[TokenUsage]):
|
|||
model: str,
|
||||
cost_usd: Decimal,
|
||||
operation_type: str = "chat",
|
||||
cached_tokens: int = 0,
|
||||
metadata: Optional[dict] = None
|
||||
) -> TokenUsage:
|
||||
"""
|
||||
|
|
@ -47,6 +48,7 @@ class TokenUsageRepository(BaseRepository[TokenUsage]):
|
|||
model: Model name
|
||||
cost_usd: Cost in USD
|
||||
operation_type: Type of operation
|
||||
cached_tokens: Number of cached input tokens
|
||||
metadata: Additional metadata
|
||||
|
||||
Returns:
|
||||
|
|
@ -57,12 +59,13 @@ class TokenUsageRepository(BaseRepository[TokenUsage]):
|
|||
conversation_id=conversation_id,
|
||||
message_id=message_id,
|
||||
prompt_tokens=prompt_tokens,
|
||||
cached_tokens=cached_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=total_tokens,
|
||||
model=model,
|
||||
cost_usd=cost_usd,
|
||||
operation_type=operation_type,
|
||||
metadata=metadata or {},
|
||||
meta_data=metadata or {},
|
||||
)
|
||||
|
||||
async def get_user_total_tokens(
|
||||
|
|
|
|||
|
|
@ -219,7 +219,8 @@ class ChatService:
|
|||
# 6. Record token usage
|
||||
cost_usd = self._calculate_cost(
|
||||
prompt_tokens=openai_response["usage"]["prompt_tokens"],
|
||||
completion_tokens=openai_response["usage"]["completion_tokens"]
|
||||
completion_tokens=openai_response["usage"]["completion_tokens"],
|
||||
cached_tokens=openai_response["usage"].get("cached_tokens", 0)
|
||||
)
|
||||
|
||||
await self.token_repo.record_usage(
|
||||
|
|
@ -227,6 +228,7 @@ class ChatService:
|
|||
conversation_id=conversation_id,
|
||||
message_id=assistant_message.id,
|
||||
prompt_tokens=openai_response["usage"]["prompt_tokens"],
|
||||
cached_tokens=openai_response["usage"].get("cached_tokens", 0),
|
||||
completion_tokens=openai_response["usage"]["completion_tokens"],
|
||||
total_tokens=openai_response["usage"]["total_tokens"],
|
||||
model=settings.OPENAI_MODEL,
|
||||
|
|
@ -391,18 +393,29 @@ class ChatService:
|
|||
# Rough estimate: ~4 characters per token
|
||||
return len(text) // 4
|
||||
|
||||
def _calculate_cost(self, prompt_tokens: int, completion_tokens: int) -> Decimal:
|
||||
def _calculate_cost(
|
||||
self,
|
||||
prompt_tokens: int,
|
||||
completion_tokens: int,
|
||||
cached_tokens: int = 0
|
||||
) -> Decimal:
|
||||
"""
|
||||
Calculate cost in USD
|
||||
|
||||
Args:
|
||||
prompt_tokens: Number of prompt tokens
|
||||
prompt_tokens: Number of prompt tokens (total input tokens)
|
||||
completion_tokens: Number of completion tokens
|
||||
cached_tokens: Number of cached input tokens (charged at lower rate)
|
||||
|
||||
Returns:
|
||||
Total cost in USD
|
||||
"""
|
||||
prompt_cost = Decimal(str(prompt_tokens)) * Decimal(str(settings.PROMPT_TOKEN_COST)) / Decimal("1000")
|
||||
# Calculate non-cached prompt tokens
|
||||
non_cached_prompt_tokens = prompt_tokens - cached_tokens
|
||||
|
||||
# Calculate costs
|
||||
prompt_cost = Decimal(str(non_cached_prompt_tokens)) * Decimal(str(settings.PROMPT_TOKEN_COST)) / Decimal("1000")
|
||||
cached_cost = Decimal(str(cached_tokens)) * Decimal(str(settings.CACHED_PROMPT_TOKEN_COST)) / Decimal("1000")
|
||||
completion_cost = Decimal(str(completion_tokens)) * Decimal(str(settings.COMPLETION_TOKEN_COST)) / Decimal("1000")
|
||||
|
||||
return prompt_cost + completion_cost
|
||||
return prompt_cost + cached_cost + completion_cost
|
||||
|
|
|
|||
|
|
@ -171,11 +171,17 @@ Remember: When in doubt, DON'T answer. Say "I don't have this information in my
|
|||
output_item.results
|
||||
)
|
||||
|
||||
# Extract cached tokens if available
|
||||
cached_tokens = 0
|
||||
if hasattr(usage, 'input_tokens_details') and usage.input_tokens_details:
|
||||
cached_tokens = getattr(usage.input_tokens_details, 'cached_tokens', 0)
|
||||
|
||||
return {
|
||||
"response_id": response_id,
|
||||
"content": assistant_message or "",
|
||||
"usage": {
|
||||
"prompt_tokens": usage.input_tokens,
|
||||
"cached_tokens": cached_tokens,
|
||||
"completion_tokens": usage.output_tokens,
|
||||
"total_tokens": usage.total_tokens
|
||||
},
|
||||
|
|
|
|||
|
|
@ -19,12 +19,21 @@ const ChatInterface: React.FC = () => {
|
|||
|
||||
const [messageText, setMessageText] = useState('');
|
||||
const messagesEndRef = useRef<HTMLDivElement>(null);
|
||||
const textareaRef = useRef<HTMLTextAreaElement>(null);
|
||||
|
||||
// Auto-scroll to bottom when new messages arrive
|
||||
useEffect(() => {
|
||||
messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' });
|
||||
}, [messages]);
|
||||
|
||||
// Auto-resize textarea
|
||||
useEffect(() => {
|
||||
if (textareaRef.current) {
|
||||
textareaRef.current.style.height = 'auto';
|
||||
textareaRef.current.style.height = textareaRef.current.scrollHeight + 'px';
|
||||
}
|
||||
}, [messageText]);
|
||||
|
||||
const handleSend = async () => {
|
||||
if (!messageText.trim() || isSending) return;
|
||||
|
||||
|
|
@ -41,7 +50,7 @@ const ChatInterface: React.FC = () => {
|
|||
}
|
||||
};
|
||||
|
||||
const handleKeyPress = (e: React.KeyboardEvent) => {
|
||||
const handleKeyDown = (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
e.preventDefault();
|
||||
handleSend();
|
||||
|
|
@ -113,13 +122,15 @@ const ChatInterface: React.FC = () => {
|
|||
<div className="chat-input">
|
||||
<div className="input-wrapper">
|
||||
<textarea
|
||||
ref={textareaRef}
|
||||
id="message"
|
||||
placeholder="Ask me about operations, policies, procedures..."
|
||||
rows={1}
|
||||
value={messageText}
|
||||
onChange={(e) => setMessageText(e.target.value)}
|
||||
onKeyPress={handleKeyPress}
|
||||
onKeyDown={handleKeyDown}
|
||||
disabled={isSending}
|
||||
style={{ maxHeight: '200px', overflow: 'auto' }}
|
||||
/>
|
||||
</div>
|
||||
<button
|
||||
|
|
|
|||
|
|
@ -291,4 +291,47 @@ body {
|
|||
.header-info h1 {
|
||||
font-size: var(--font-size-xl);
|
||||
}
|
||||
|
||||
.chat-body {
|
||||
padding: var(--spacing-md);
|
||||
}
|
||||
|
||||
.chat-input {
|
||||
padding: var(--spacing-md);
|
||||
gap: var(--spacing-sm);
|
||||
}
|
||||
|
||||
.input-wrapper textarea {
|
||||
font-size: 16px; /* Prevents zoom on iOS */
|
||||
}
|
||||
|
||||
.chat-input button {
|
||||
padding: var(--spacing-md);
|
||||
min-width: 70px;
|
||||
}
|
||||
|
||||
.welcome-message {
|
||||
padding: var(--spacing-lg);
|
||||
}
|
||||
|
||||
.welcome-message h2 {
|
||||
font-size: var(--font-size-xl);
|
||||
}
|
||||
}
|
||||
|
||||
/* Extra small devices */
|
||||
@media (max-width: 480px) {
|
||||
.chat-input {
|
||||
flex-direction: row;
|
||||
align-items: flex-end;
|
||||
}
|
||||
|
||||
.input-wrapper {
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
.chat-input button {
|
||||
height: 44px;
|
||||
min-height: 44px;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue