fix(glossary): hard-delete glossary with cascade on archive

archive_glossary() now deletes terms, versions, and the glossary document
instead of soft-deleting. Prevents orphaned 34k-term datasets from consuming
embedding quota and storage after a glossary is removed.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Vadym Samoilenko 2026-05-13 18:44:51 +01:00
parent 6bf88474ee
commit e70a67718e
2 changed files with 17 additions and 5 deletions

View file

@ -252,7 +252,7 @@ async def reembed_version(
return {"status": "queued", "version_id": version_id}
# ── Archive (soft-delete) ─────────────────────────────────────────────────────
# ── Delete ───────────────────────────────────────────────────────────────────
@router.delete("/{glossary_id}", status_code=204)
async def archive_glossary(

View file

@ -334,12 +334,24 @@ async def activate_version(glossary_id: str, version_id: str) -> None:
async def archive_glossary(glossary_id: str) -> None:
"""Hard-delete the glossary and all its versions and terms."""
db = await get_database()
await db[_COLL_GLOSSARIES].update_one(
{"_id": ObjectId(glossary_id)},
{"$set": {"status": GlossaryStatus.ARCHIVED.value}},
)
versions = await db[_COLL_VERSIONS].find(
{"glossary_id": glossary_id}, {"_id": 1}
).to_list(length=None)
version_ids = [str(v["_id"]) for v in versions]
if version_ids:
terms_result = await db[_COLL_TERMS].delete_many({"version_id": {"$in": version_ids}})
logger.info(f"Deleted {terms_result.deleted_count} terms for glossary {glossary_id}")
await db[_COLL_VERSIONS].delete_many({"glossary_id": glossary_id})
logger.info(f"Deleted {len(version_ids)} versions for glossary {glossary_id}")
await db[_COLL_GLOSSARIES].delete_one({"_id": ObjectId(glossary_id)})
await _invalidate_cache(glossary_id)
logger.info(f"Deleted glossary {glossary_id}")
# ── Retrieval ─────────────────────────────────────────────────────────────────