feat(fastapi): uses better json loader that parses dirty json

This commit is contained in:
sauravniraula 2025-09-12 01:28:59 +05:45
parent 6420d4638a
commit 3c5ba63309
No known key found for this signature in database
GPG key ID: 60FCC1B5A5E83326
11 changed files with 321 additions and 232 deletions

View file

@ -29,7 +29,7 @@ RUN curl -fsSL https://ollama.com/install.sh | sh
# Install dependencies for FastAPI
RUN pip install aiohttp aiomysql aiosqlite asyncpg fastapi[standard] \
pathvalidate pdfplumber chromadb sqlmodel \
anthropic google-genai openai fastmcp
anthropic google-genai openai fastmcp dirtyjson
RUN pip install docling --extra-index-url https://download.pytorch.org/whl/cpu
# Install dependencies for Next.js

View file

@ -29,7 +29,7 @@ RUN curl -fsSL http://ollama.com/install.sh | sh
# Install dependencies for FastAPI
RUN pip install aiohttp aiomysql aiosqlite asyncpg fastapi[standard] \
pathvalidate pdfplumber chromadb sqlmodel \
anthropic google-genai openai fastmcp
anthropic google-genai openai fastmcp dirtyjson
RUN pip install docling --extra-index-url https://download.pytorch.org/whl/cpu
# Install dependencies for Next.js

View file

@ -2,6 +2,7 @@ import asyncio
import json
import math
import uuid
import dirtyjson
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import StreamingResponse
from sqlalchemy.ext.asyncio import AsyncSession
@ -82,7 +83,7 @@ async def stream_outlines(
presentation_outlines_text += chunk
try:
presentation_outlines_json = json.loads(presentation_outlines_text)
presentation_outlines_json = dict(dirtyjson.loads(presentation_outlines_text))
except Exception as e:
raise HTTPException(
status_code=400,

View file

@ -4,6 +4,7 @@ import math
import os
import random
from typing import Annotated, List, Literal, Optional
import dirtyjson
from fastapi import APIRouter, Body, Depends, HTTPException
from fastapi.responses import StreamingResponse
from sqlalchemy import delete
@ -486,7 +487,7 @@ async def generate_presentation_api(
presentation_outlines_text += chunk
try:
presentation_outlines_json = json.loads(presentation_outlines_text)
presentation_outlines_json = dict(dirtyjson.loads(presentation_outlines_text))
except Exception as e:
print(e)
raise HTTPException(

File diff suppressed because it is too large Load diff

View file

@ -11,6 +11,7 @@ dependencies = [
"anthropic>=0.60.0",
"asyncpg>=0.30.0",
"chromadb>=1.0.15",
"dirtyjson>=1.0.8",
"docling>=2.43.0",
"fastapi[standard]>=0.116.1",
"fastmcp>=2.11.0",

View file

@ -96,11 +96,15 @@ class DocumentsLoader:
return self.docling_service.parse_to_markdown(file_path)
@classmethod
def get_page_images_from_pdf(cls, file_path: str, temp_dir: str):
def get_page_images_from_pdf(cls, file_path: str, temp_dir: str) -> List[str]:
with pdfplumber.open(file_path) as pdf:
images = []
for page in pdf.pages:
img = page.to_image(resolution=150)
img.save(os.path.join(temp_dir, f"page_{page.page_number}.png"))
image_path = os.path.join(temp_dir, f"page_{page.page_number}.png")
img.save(image_path)
images.append(image_path)
return images
@classmethod
async def get_page_images_from_pdf_async(cls, file_path: str, temp_dir: str):

View file

@ -1,4 +1,5 @@
import asyncio
import dirtyjson
import json
from typing import AsyncGenerator, List, Optional
from fastapi import HTTPException
@ -554,7 +555,7 @@ class LLMClient:
)
if content:
if depth == 0:
return json.loads(content)
return dict(dirtyjson.loads(content))
return content
return None
@ -655,7 +656,7 @@ class LLMClient:
)
if text_content:
return json.loads(text_content)
return dict(dirtyjson.loads(text_content))
return None
async def _generate_anthropic_structured(

View file

@ -39,6 +39,8 @@ def get_system_prompt(
- Do not generate table of contents slide.
- Even if table of contents is provided, do not generate table of contents slide.
{"- Always make first slide a title slide." if include_title_slide else "- Do not include title slide in the presentation."}
**Search web to get latest information about the topic**
"""

View file

@ -2,9 +2,11 @@ from fastapi import HTTPException
from anthropic import APIError as AnthropicAPIError
from openai import APIError as OpenAIAPIError
from google.genai.errors import APIError as GoogleAPIError
import traceback
def handle_llm_client_exceptions(e: Exception) -> HTTPException:
traceback.print_exc()
if isinstance(e, OpenAIAPIError):
return HTTPException(status_code=500, detail=f"OpenAI API error: {e.message}")
if isinstance(e, GoogleAPIError):

View file

@ -471,6 +471,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/50/3d/9373ad9c56321fdab5b41197068e1d8c25883b3fea29dd361f9b55116869/dill-0.4.0-py3-none-any.whl", hash = "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049", size = 119668, upload-time = "2025-04-16T00:41:47.671Z" },
]
[[package]]
name = "dirtyjson"
version = "1.0.8"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/db/04/d24f6e645ad82ba0ef092fa17d9ef7a21953781663648a01c9371d9e8e98/dirtyjson-1.0.8.tar.gz", hash = "sha256:90ca4a18f3ff30ce849d100dcf4a003953c79d3a2348ef056f1d9c22231a25fd", size = 30782, upload-time = "2022-11-28T23:32:33.319Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/68/69/1bcf70f81de1b4a9f21b3a62ec0c83bdff991c88d6cc2267d02408457e88/dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53", size = 25197, upload-time = "2022-11-28T23:32:31.219Z" },
]
[[package]]
name = "distro"
version = "1.9.0"
@ -1908,6 +1917,7 @@ dependencies = [
{ name = "anthropic" },
{ name = "asyncpg" },
{ name = "chromadb" },
{ name = "dirtyjson" },
{ name = "docling" },
{ name = "fastapi", extra = ["standard"] },
{ name = "fastmcp" },
@ -1930,6 +1940,7 @@ requires-dist = [
{ name = "anthropic", specifier = ">=0.60.0" },
{ name = "asyncpg", specifier = ">=0.30.0" },
{ name = "chromadb", specifier = ">=1.0.15" },
{ name = "dirtyjson", specifier = ">=1.0.8" },
{ name = "docling", specifier = ">=2.43.0" },
{ name = "fastapi", extras = ["standard"], specifier = ">=0.116.1" },
{ name = "fastmcp", specifier = ">=2.11.0" },