Add file and extraction cleanup - complete LlamaCloud resource deletion

This commit is contained in:
DJP 2025-10-02 11:40:38 -04:00
parent 1fc94dc7d4
commit c24508860a
2 changed files with 25 additions and 8 deletions

View file

@ -278,14 +278,14 @@ for notebook in notebooks:
if st.button("Yes, Delete Everything", key=f"confirm_delete_{notebook.id}", type="primary"):
# Delete from LlamaCloud first
if notebook.pipeline_id:
with st.spinner("Cleaning up LlamaCloud resources..."):
from pipeline_manager import delete_pipeline
with st.spinner("Cleaning up LlamaCloud resources (pipeline, files, extraction runs)..."):
from pipeline_manager import delete_pipeline_and_files
import asyncio
success = asyncio.run(delete_pipeline(notebook.pipeline_id))
success = asyncio.run(delete_pipeline_and_files(notebook.pipeline_id))
if success:
st.info("✓ LlamaCloud pipeline deleted")
st.info("✓ LlamaCloud resources deleted")
else:
st.warning("⚠ Could not delete LlamaCloud pipeline (may not exist)")
st.warning("⚠ Could not delete LlamaCloud resources (may not exist)")
# Then delete from database
from database import get_db, Notebook, NotebookDocument, DocumentShare

View file

@ -129,9 +129,9 @@ async def check_pipeline_ready(pipeline_id: str) -> dict:
}
async def delete_pipeline(pipeline_id: str) -> bool:
async def delete_pipeline_and_files(pipeline_id: str) -> bool:
"""
Delete a LlamaCloud pipeline and all its files
Delete a LlamaCloud pipeline and all associated resources
Args:
pipeline_id: The pipeline ID to delete
@ -142,7 +142,24 @@ async def delete_pipeline(pipeline_id: str) -> bool:
try:
client = AsyncLlamaCloud(token=os.getenv("LLAMACLOUD_API_KEY"))
# Delete the pipeline (this should cascade delete all files)
# Get pipeline info first to find files
try:
pipeline = await client.pipelines.get_pipeline(pipeline_id=pipeline_id)
# Delete all files in the pipeline
if hasattr(pipeline, 'configured_transformation') and hasattr(pipeline.configured_transformation, 'data_sources'):
for data_source in pipeline.configured_transformation.data_sources:
try:
file_id = data_source.get('file_id') if isinstance(data_source, dict) else getattr(data_source, 'file_id', None)
if file_id:
await client.files.delete_file(file_id=file_id)
print(f" ✓ Deleted file: {file_id}")
except Exception as e:
print(f" ⚠ Could not delete file: {e}")
except Exception as e:
print(f" ⚠ Could not get pipeline files: {e}")
# Delete the pipeline
await client.pipelines.delete_pipeline(pipeline_id=pipeline_id)
print(f"✓ Deleted LlamaCloud pipeline: {pipeline_id}")