fix(fastapi): replaces chromadb with qdrant[fastembed]
This commit is contained in:
parent
eb1af33527
commit
fa895d6b94
15 changed files with 71 additions and 54 deletions
|
|
@ -22,6 +22,8 @@ from models.generate_presentation_api import (
|
|||
PresentationPathAndEditPath,
|
||||
)
|
||||
from services.get_layout_by_name import get_layout_by_name
|
||||
from services.icon_finder_service import IconFinderService
|
||||
from services.image_generation_service import ImageGenerationService
|
||||
from utils.llm_calls.generate_presentation_outlines import generate_ppt_outline
|
||||
from models.sql.slide import SlideModel
|
||||
from models.sse_response import SSECompleteResponse, SSEResponse
|
||||
|
|
@ -30,7 +32,7 @@ from services.database import get_sql_session
|
|||
from services.documents_loader import DocumentsLoader
|
||||
from models.sql.presentation import PresentationModel
|
||||
from services.pptx_presentation_creator import PptxPresentationCreator
|
||||
from utils.asset_directory_utils import get_exports_directory
|
||||
from utils.asset_directory_utils import get_exports_directory, get_images_directory
|
||||
from utils.llm_calls.generate_document_summary import generate_document_summary
|
||||
from utils.llm_calls.generate_presentation_structure import (
|
||||
generate_presentation_structure,
|
||||
|
|
@ -195,6 +197,9 @@ async def stream_presentation(presentation_id: str):
|
|||
detail="Outlines can not be empty",
|
||||
)
|
||||
|
||||
image_generation_service = ImageGenerationService(get_images_directory())
|
||||
icon_finder_service = IconFinderService()
|
||||
|
||||
async def inner():
|
||||
structure = presentation.get_structure()
|
||||
layout = presentation.get_layout()
|
||||
|
|
@ -223,7 +228,11 @@ async def stream_presentation(presentation_id: str):
|
|||
slides.append(slide)
|
||||
|
||||
# This will mutate slide
|
||||
async_assets_generation_tasks.append(process_slide_and_fetch_assets(slide))
|
||||
async_assets_generation_tasks.append(
|
||||
process_slide_and_fetch_assets(
|
||||
image_generation_service, icon_finder_service, slide
|
||||
)
|
||||
)
|
||||
|
||||
# Give control to the event loop
|
||||
await asyncio.sleep(0)
|
||||
|
|
@ -423,9 +432,13 @@ async def generate_presentation_api(
|
|||
|
||||
# Process slides to fetch assets (images, icons, etc.)
|
||||
print("Processing slides to fetch assets")
|
||||
image_generation_service = ImageGenerationService(get_images_directory())
|
||||
icon_finder_service = IconFinderService()
|
||||
for slide in slides:
|
||||
try:
|
||||
await process_slide_and_fetch_assets(slide)
|
||||
await process_slide_and_fetch_assets(
|
||||
image_generation_service, icon_finder_service, slide
|
||||
)
|
||||
print(f"Processed slide {slide.index} successfully")
|
||||
except Exception as e:
|
||||
print(f"Error processing slide {slide.index}: {e}")
|
||||
|
|
|
|||
|
|
@ -4,6 +4,9 @@ from fastapi import APIRouter, Body, HTTPException
|
|||
from models.sql.presentation import PresentationModel
|
||||
from models.sql.slide import SlideModel
|
||||
from services.database import get_sql_session
|
||||
from services.icon_finder_service import IconFinderService
|
||||
from services.image_generation_service import ImageGenerationService
|
||||
from utils.asset_directory_utils import get_images_directory
|
||||
from utils.llm_calls.edit_slide import get_edited_slide_content
|
||||
from utils.llm_calls.edit_slide_html import get_edited_slide_html
|
||||
from utils.llm_calls.select_slide_type_on_edit import get_slide_layout_from_prompt
|
||||
|
|
@ -34,9 +37,15 @@ async def edit_slide(id: Annotated[str, Body()], prompt: Annotated[str, Body()])
|
|||
prompt, slide_layout, slide, presentation.language
|
||||
)
|
||||
|
||||
image_generation_service = ImageGenerationService(get_images_directory())
|
||||
icon_finder_service = IconFinderService()
|
||||
|
||||
# This will mutate edited_slide_content
|
||||
new_assets = await process_old_and_new_slides_and_fetch_assets(
|
||||
slide.content, edited_slide_content
|
||||
image_generation_service,
|
||||
icon_finder_service,
|
||||
slide.content,
|
||||
edited_slide_content,
|
||||
)
|
||||
|
||||
# Always assign a new unique id to the slide
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
1
servers/fastapi/qdrant/.lock
Normal file
1
servers/fastapi/qdrant/.lock
Normal file
|
|
@ -0,0 +1 @@
|
|||
tmp lock file
|
||||
BIN
servers/fastapi/qdrant/collection/icons/storage.sqlite
Normal file
BIN
servers/fastapi/qdrant/collection/icons/storage.sqlite
Normal file
Binary file not shown.
1
servers/fastapi/qdrant/meta.json
Normal file
1
servers/fastapi/qdrant/meta.json
Normal file
|
|
@ -0,0 +1 @@
|
|||
{"collections": {"icons": {"vectors": {"fast-bge-small-en": {"size": 384, "distance": "Cosine", "hnsw_config": null, "quantization_config": null, "on_disk": null, "datatype": null, "multivector_config": null}}, "shard_number": null, "sharding_method": null, "replication_factor": null, "write_consistency_factor": null, "on_disk_payload": null, "hnsw_config": null, "wal_config": null, "optimizers_config": null, "init_from": null, "quantization_config": null, "sparse_vectors": null, "strict_mode_config": null}}, "aliases": {}}
|
||||
|
|
@ -12,7 +12,6 @@ cachetools==5.5.2
|
|||
certifi==2025.7.14
|
||||
cffi==1.17.1
|
||||
charset-normalizer==3.4.2
|
||||
chromadb==1.0.15
|
||||
click==8.2.1
|
||||
coloredlogs==15.0.1
|
||||
cryptography==45.0.5
|
||||
|
|
@ -23,6 +22,7 @@ email_validator==2.2.0
|
|||
fastapi==0.116.1
|
||||
fastapi-cli==0.0.8
|
||||
fastapi-cloud-cli==0.1.4
|
||||
fastembed==0.7.1
|
||||
filelock==3.18.0
|
||||
flatbuffers==25.2.10
|
||||
frozenlist==1.7.0
|
||||
|
|
@ -33,12 +33,15 @@ googleapis-common-protos==1.70.0
|
|||
greenlet==3.2.3
|
||||
grpcio==1.74.0
|
||||
h11==0.16.0
|
||||
h2==4.2.0
|
||||
hf-xet==1.1.5
|
||||
hpack==4.1.0
|
||||
httpcore==1.0.9
|
||||
httptools==0.6.4
|
||||
httpx==0.28.1
|
||||
huggingface-hub==0.34.1
|
||||
humanfriendly==10.0
|
||||
hyperframe==6.1.0
|
||||
idna==3.10
|
||||
importlib_metadata==8.7.0
|
||||
importlib_resources==6.5.2
|
||||
|
|
@ -74,6 +77,7 @@ pdfminer.six==20250506
|
|||
pdfplumber==0.11.7
|
||||
pillow==11.3.0
|
||||
pluggy==1.6.0
|
||||
portalocker==3.2.0
|
||||
posthog==5.4.0
|
||||
propcache==0.3.2
|
||||
protobuf==6.31.1
|
||||
|
|
@ -95,6 +99,7 @@ python-dotenv==1.1.1
|
|||
python-multipart==0.0.20
|
||||
python-pptx==1.0.2
|
||||
PyYAML==6.0.2
|
||||
qdrant-client==1.15.0
|
||||
redis==6.2.0
|
||||
referencing==0.36.2
|
||||
requests==2.32.4
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
import uvicorn
|
||||
import argparse
|
||||
|
||||
from services.icon_finder_service import IconFinderService
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the FastAPI server")
|
||||
|
|
|
|||
|
|
@ -1,55 +1,45 @@
|
|||
import asyncio
|
||||
import json
|
||||
import chromadb
|
||||
from chromadb.config import Settings
|
||||
from chromadb.utils import embedding_functions
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
|
||||
class IconFinderService:
|
||||
def __init__(self):
|
||||
self.collection_name = "icons"
|
||||
self.vector_store = self.get_icons_vectorstore()
|
||||
|
||||
def get_icons_vectorstore(self):
|
||||
|
||||
client = chromadb.PersistentClient(
|
||||
settings=Settings(anonymized_telemetry=False)
|
||||
)
|
||||
embedding_function = embedding_functions.ONNXMiniLM_L6_V2()
|
||||
self.client = QdrantClient(path="qdrant")
|
||||
self._initialize_icons_collection()
|
||||
|
||||
def _initialize_icons_collection(self):
|
||||
try:
|
||||
collection = client.get_collection(
|
||||
self.collection_name, embedding_function=embedding_function
|
||||
self.client.get_collection(self.collection_name)
|
||||
except Exception:
|
||||
self._populate_icons_collection()
|
||||
|
||||
def _populate_icons_collection(self):
|
||||
with open("assets/icons.json", "r") as f:
|
||||
icons = json.load(f)
|
||||
|
||||
documents = []
|
||||
metadata = []
|
||||
|
||||
for each in icons["icons"]:
|
||||
if each["name"].split("-")[-1] == "bold":
|
||||
doc_text = f"{each['name']} {each['tags']}"
|
||||
documents.append(doc_text)
|
||||
metadata.append({"name": each["name"]})
|
||||
|
||||
if documents:
|
||||
self.client.add(
|
||||
collection_name=self.collection_name,
|
||||
documents=documents,
|
||||
metadata=metadata,
|
||||
)
|
||||
except:
|
||||
collection = client.create_collection(
|
||||
self.collection_name, embedding_function=embedding_function
|
||||
)
|
||||
|
||||
with open("assets/icons.json", "r") as f:
|
||||
icons = json.load(f)
|
||||
|
||||
documents = []
|
||||
ids = []
|
||||
|
||||
for each in icons["icons"]:
|
||||
if each["name"].split("-")[-1] == "bold":
|
||||
doc_text = f"{each['name']} {each['tags']}"
|
||||
documents.append(doc_text)
|
||||
ids.append(each["name"])
|
||||
|
||||
if documents:
|
||||
collection.add(documents=documents, ids=ids)
|
||||
|
||||
return collection
|
||||
|
||||
async def search_icons(self, query: str, k: int = 1):
|
||||
result = await asyncio.to_thread(
|
||||
self.vector_store.query, query_texts=[query], n_results=k
|
||||
self.client.query,
|
||||
collection_name=self.collection_name,
|
||||
query_text=query,
|
||||
limit=k,
|
||||
)
|
||||
|
||||
icon_names = []
|
||||
if result["ids"] and result["ids"][0]:
|
||||
icon_names = result["ids"][0]
|
||||
|
||||
return [f"/static/icons/bold/{icon_name}.png" for icon_name in icon_names]
|
||||
return [f"/static/icons/bold/{each.metadata['name']}.png" for each in result]
|
||||
|
|
|
|||
|
|
@ -10,12 +10,10 @@ from utils.dict_utils import get_dict_at_path, get_dict_paths_with_key, set_dict
|
|||
|
||||
|
||||
async def process_slide_and_fetch_assets(
|
||||
image_generation_service: ImageGenerationService,
|
||||
icon_finder_service: IconFinderService,
|
||||
slide: SlideModel,
|
||||
) -> List[ImageAsset]:
|
||||
image_directory = get_images_directory()
|
||||
|
||||
image_generation_service = ImageGenerationService(image_directory)
|
||||
icon_finder_service = IconFinderService()
|
||||
|
||||
async_tasks = []
|
||||
|
||||
|
|
@ -61,6 +59,8 @@ async def process_slide_and_fetch_assets(
|
|||
|
||||
|
||||
async def process_old_and_new_slides_and_fetch_assets(
|
||||
image_generation_service: ImageGenerationService,
|
||||
icon_finder_service: IconFinderService,
|
||||
old_slide_content: dict,
|
||||
new_slide_content: dict,
|
||||
) -> List[ImageAsset]:
|
||||
|
|
@ -98,10 +98,6 @@ async def process_old_and_new_slides_and_fetch_assets(
|
|||
get_dict_at_path(new_slide_content, path) for path in new_icon_dict_paths
|
||||
]
|
||||
|
||||
# Creates services
|
||||
image_generation_service = ImageGenerationService(get_images_directory())
|
||||
icon_finder_service = IconFinderService()
|
||||
|
||||
# Creates async tasks for fetching new images
|
||||
async_image_fetch_tasks = []
|
||||
new_images_fetch_status = []
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue