ferrero-opentext/Python-Version/venv/lib/python3.12/site-packages/llama_cloud_services/utils.py

206 lines
7.2 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import importlib.metadata
from contextlib import contextmanager
from typing import Generator
import difflib
from llama_cloud.types import StatusEnum, File
import httpx
import packaging.version
from pydantic import BaseModel
from typing import Any, Dict, List, Tuple, Type, Union, Optional
from io import BufferedIOBase, TextIOWrapper
from pathlib import Path
import secrets
# Asyncio error messages
nest_asyncio_err = "cannot be called from a running event loop"
nest_asyncio_msg = (
"The event loop is already running. "
"Add `import nest_asyncio; nest_asyncio.apply()` to your code to fix this issue."
)
def check_extra_params(
model_cls: Type[BaseModel], data: Dict[str, Any]
) -> Tuple[List[str], List[str]]:
# check if one of the parameters is unused, and warn the user
model_attributes = set(model_cls.model_fields.keys())
extra_params = [param for param in data.keys() if param not in model_attributes]
suggestions: List[str] = []
if extra_params:
# for each unused parameter, check if it is similar to a valid parameter and suggest a typo correction, else suggest to check the documentation / update the package
for param in extra_params:
similar_params = difflib.get_close_matches(
param, model_attributes, n=1, cutoff=0.8
)
if similar_params:
suggestions.append(
f"'{param}' is not a valid parameter. Did you mean '{similar_params[0]}' instead of '{param}'?"
)
else:
suggestions.append(
f"'{param}' is not a valid parameter. Please check the documentation or update the package."
)
return extra_params, suggestions
def is_terminal_status(status: StatusEnum) -> bool:
"""
Check if a status is terminal, i.e. the job is done and no more updates are expected.
Note: this must be updated if the status enum is updated.
Args:
status: The status to check
Returns:
True if the status is terminal, False otherwise
"""
return status in {
StatusEnum.SUCCESS,
StatusEnum.ERROR,
StatusEnum.CANCELLED,
StatusEnum.PARTIAL_SUCCESS,
}
async def check_for_updates(client: httpx.AsyncClient, quiet: bool = True) -> bool:
"""Check if an SDK update is available.
Args:
client: HTTPX client to use.
quiet: If False, update availability will also be printed to stdout.
Returns: True if an update is available.
Raises:
ValueError: Failed to get a valid release version from PyPI.
"""
package_name = "llama-cloud-services"
r = await client.get(f"https://pypi.org/pypi/{package_name}/json")
version = r.json().get("info", {}).get("version", "")
if not version:
raise ValueError("Failed to fetch package info from PyPI")
latest = packaging.version.parse(version)
current = packaging.version.parse(importlib.metadata.version(package_name))
if current < latest:
if not quiet:
msg = [
f"\u26A0\uFE0F {package_name} is out of date",
f"Current version: {current}|Latest: {latest}",
"To upgrade: pip install -U --force-reinstall llama-cloud-services",
]
print(os.linesep.join(msg))
return True
elif not quiet:
print(f"{package_name} is up to date")
return False
@contextmanager
def augment_async_errors() -> Generator[None, None, None]:
"""Context manager to add helpful information for errors due to nested event loops."""
try:
yield
except RuntimeError as e:
if nest_asyncio_err in str(e):
raise RuntimeError(nest_asyncio_msg)
raise
class SourceText:
"""
A wrapper class for providing text or file input with optional filename specification.
This class allows you to provide input in multiple ways:
- Direct text content via text_content parameter
- File paths as strings or Path objects
- Raw bytes
- File-like objects (BufferedIOBase, TextIOWrapper)
- Already-uploaded file ID via file_id parameter
Args:
file: The file input (bytes, file-like object, str path, or Path).
Mutually exclusive with text_content and file_id.
text_content: Raw text content to process. Mutually exclusive with file and file_id.
file_id: ID of an already-uploaded file. Mutually exclusive with file and text_content.
filename: Optional filename. Required for bytes/file-like objects without names.
If not provided, will be auto-generated for text_content or inferred from paths.
Examples:
# Direct text input
source = SourceText(text_content="Hello world")
# File path
source = SourceText(file="document.pdf")
# Bytes with filename
source = SourceText(file=b"...", filename="document.pdf")
# File-like object (will read from current position)
with open("document.pdf", "rb") as f:
source = SourceText(file=f)
# Already-uploaded file
source = SourceText(file_id="file_abc123")
"""
def __init__(
self,
*,
file: Union[bytes, BufferedIOBase, TextIOWrapper, str, Path, None] = None,
text_content: Optional[str] = None,
file_id: Optional[str] = None,
filename: Optional[str] = None,
):
self.file = file
self.filename = filename
self.text_content = text_content
self.file_id = file_id
self._validate()
def _validate(self) -> None:
"""Ensure filename is provided when needed."""
# Check that exactly one of file, text_content, or file_id is provided
provided = sum(
[
self.file is not None,
self.text_content is not None,
self.file_id is not None,
]
)
if provided == 0:
raise ValueError("One of file, text_content, or file_id must be provided.")
elif provided > 1:
raise ValueError(
"Only one of file, text_content, or file_id can be provided."
)
# If file_id is provided, we don't need filename validation
if self.file_id is not None:
return
if self.text_content is not None:
if not self.filename:
random_hex = secrets.token_hex(4)
self.filename = f"text_input_{random_hex}.txt"
return
if isinstance(self.file, (bytes, BufferedIOBase, TextIOWrapper)):
if not self.filename and hasattr(self.file, "name"):
self.filename = os.path.basename(str(self.file.name))
elif self.filename is None and not hasattr(self.file, "name"):
raise ValueError(
"filename must be provided when file is bytes or a file-like object without a name"
)
elif isinstance(self.file, (str, Path)):
if not self.filename:
self.filename = os.path.basename(str(self.file))
else:
raise ValueError(f"Unsupported file type: {type(self.file)}")
# Type alias for file input that can be used across services
FileInput = Union[str, Path, BufferedIOBase, SourceText, File]