feat(fastapi): uses better json loader that parses dirty json
This commit is contained in:
parent
6420d4638a
commit
3c5ba63309
11 changed files with 321 additions and 232 deletions
|
|
@ -29,7 +29,7 @@ RUN curl -fsSL https://ollama.com/install.sh | sh
|
|||
# Install dependencies for FastAPI
|
||||
RUN pip install aiohttp aiomysql aiosqlite asyncpg fastapi[standard] \
|
||||
pathvalidate pdfplumber chromadb sqlmodel \
|
||||
anthropic google-genai openai fastmcp
|
||||
anthropic google-genai openai fastmcp dirtyjson
|
||||
RUN pip install docling --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
|
||||
# Install dependencies for Next.js
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ RUN curl -fsSL http://ollama.com/install.sh | sh
|
|||
# Install dependencies for FastAPI
|
||||
RUN pip install aiohttp aiomysql aiosqlite asyncpg fastapi[standard] \
|
||||
pathvalidate pdfplumber chromadb sqlmodel \
|
||||
anthropic google-genai openai fastmcp
|
||||
anthropic google-genai openai fastmcp dirtyjson
|
||||
RUN pip install docling --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
|
||||
# Install dependencies for Next.js
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import asyncio
|
|||
import json
|
||||
import math
|
||||
import uuid
|
||||
import dirtyjson
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
|
@ -82,7 +83,7 @@ async def stream_outlines(
|
|||
presentation_outlines_text += chunk
|
||||
|
||||
try:
|
||||
presentation_outlines_json = json.loads(presentation_outlines_text)
|
||||
presentation_outlines_json = dict(dirtyjson.loads(presentation_outlines_text))
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import math
|
|||
import os
|
||||
import random
|
||||
from typing import Annotated, List, Literal, Optional
|
||||
import dirtyjson
|
||||
from fastapi import APIRouter, Body, Depends, HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
from sqlalchemy import delete
|
||||
|
|
@ -486,7 +487,7 @@ async def generate_presentation_api(
|
|||
presentation_outlines_text += chunk
|
||||
|
||||
try:
|
||||
presentation_outlines_json = json.loads(presentation_outlines_text)
|
||||
presentation_outlines_json = dict(dirtyjson.loads(presentation_outlines_text))
|
||||
except Exception as e:
|
||||
print(e)
|
||||
raise HTTPException(
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -11,6 +11,7 @@ dependencies = [
|
|||
"anthropic>=0.60.0",
|
||||
"asyncpg>=0.30.0",
|
||||
"chromadb>=1.0.15",
|
||||
"dirtyjson>=1.0.8",
|
||||
"docling>=2.43.0",
|
||||
"fastapi[standard]>=0.116.1",
|
||||
"fastmcp>=2.11.0",
|
||||
|
|
|
|||
|
|
@ -96,11 +96,15 @@ class DocumentsLoader:
|
|||
return self.docling_service.parse_to_markdown(file_path)
|
||||
|
||||
@classmethod
|
||||
def get_page_images_from_pdf(cls, file_path: str, temp_dir: str):
|
||||
def get_page_images_from_pdf(cls, file_path: str, temp_dir: str) -> List[str]:
|
||||
with pdfplumber.open(file_path) as pdf:
|
||||
images = []
|
||||
for page in pdf.pages:
|
||||
img = page.to_image(resolution=150)
|
||||
img.save(os.path.join(temp_dir, f"page_{page.page_number}.png"))
|
||||
image_path = os.path.join(temp_dir, f"page_{page.page_number}.png")
|
||||
img.save(image_path)
|
||||
images.append(image_path)
|
||||
return images
|
||||
|
||||
@classmethod
|
||||
async def get_page_images_from_pdf_async(cls, file_path: str, temp_dir: str):
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import asyncio
|
||||
import dirtyjson
|
||||
import json
|
||||
from typing import AsyncGenerator, List, Optional
|
||||
from fastapi import HTTPException
|
||||
|
|
@ -554,7 +555,7 @@ class LLMClient:
|
|||
)
|
||||
if content:
|
||||
if depth == 0:
|
||||
return json.loads(content)
|
||||
return dict(dirtyjson.loads(content))
|
||||
return content
|
||||
return None
|
||||
|
||||
|
|
@ -655,7 +656,7 @@ class LLMClient:
|
|||
)
|
||||
|
||||
if text_content:
|
||||
return json.loads(text_content)
|
||||
return dict(dirtyjson.loads(text_content))
|
||||
return None
|
||||
|
||||
async def _generate_anthropic_structured(
|
||||
|
|
|
|||
|
|
@ -39,6 +39,8 @@ def get_system_prompt(
|
|||
- Do not generate table of contents slide.
|
||||
- Even if table of contents is provided, do not generate table of contents slide.
|
||||
{"- Always make first slide a title slide." if include_title_slide else "- Do not include title slide in the presentation."}
|
||||
|
||||
**Search web to get latest information about the topic**
|
||||
"""
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -2,9 +2,11 @@ from fastapi import HTTPException
|
|||
from anthropic import APIError as AnthropicAPIError
|
||||
from openai import APIError as OpenAIAPIError
|
||||
from google.genai.errors import APIError as GoogleAPIError
|
||||
import traceback
|
||||
|
||||
|
||||
def handle_llm_client_exceptions(e: Exception) -> HTTPException:
|
||||
traceback.print_exc()
|
||||
if isinstance(e, OpenAIAPIError):
|
||||
return HTTPException(status_code=500, detail=f"OpenAI API error: {e.message}")
|
||||
if isinstance(e, GoogleAPIError):
|
||||
|
|
|
|||
11
servers/fastapi/uv.lock
generated
11
servers/fastapi/uv.lock
generated
|
|
@ -471,6 +471,15 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/50/3d/9373ad9c56321fdab5b41197068e1d8c25883b3fea29dd361f9b55116869/dill-0.4.0-py3-none-any.whl", hash = "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049", size = 119668, upload-time = "2025-04-16T00:41:47.671Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dirtyjson"
|
||||
version = "1.0.8"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/db/04/d24f6e645ad82ba0ef092fa17d9ef7a21953781663648a01c9371d9e8e98/dirtyjson-1.0.8.tar.gz", hash = "sha256:90ca4a18f3ff30ce849d100dcf4a003953c79d3a2348ef056f1d9c22231a25fd", size = 30782, upload-time = "2022-11-28T23:32:33.319Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/68/69/1bcf70f81de1b4a9f21b3a62ec0c83bdff991c88d6cc2267d02408457e88/dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53", size = 25197, upload-time = "2022-11-28T23:32:31.219Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "distro"
|
||||
version = "1.9.0"
|
||||
|
|
@ -1908,6 +1917,7 @@ dependencies = [
|
|||
{ name = "anthropic" },
|
||||
{ name = "asyncpg" },
|
||||
{ name = "chromadb" },
|
||||
{ name = "dirtyjson" },
|
||||
{ name = "docling" },
|
||||
{ name = "fastapi", extra = ["standard"] },
|
||||
{ name = "fastmcp" },
|
||||
|
|
@ -1930,6 +1940,7 @@ requires-dist = [
|
|||
{ name = "anthropic", specifier = ">=0.60.0" },
|
||||
{ name = "asyncpg", specifier = ">=0.30.0" },
|
||||
{ name = "chromadb", specifier = ">=1.0.15" },
|
||||
{ name = "dirtyjson", specifier = ">=1.0.8" },
|
||||
{ name = "docling", specifier = ">=2.43.0" },
|
||||
{ name = "fastapi", extras = ["standard"], specifier = ">=0.116.1" },
|
||||
{ name = "fastmcp", specifier = ">=2.11.0" },
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue