presenton/servers/fastapi/utils/schema_utils.py
sudipnext c7860127f2 feat: add support for optional embedded Ollama and enhance database migration handling
- Updated docker-compose.yml to allow disabling embedded Ollama via environment variable.
- Refactored Dockerfile and Dockerfile.dev for improved dependency management and installation process.
- Enhanced FastAPI migration scripts to handle orphaned Alembic revisions and added new database migration logic.
- Improved error handling in background tasks and Codex authentication endpoints.
- Added support for font file uploads with better validation and extraction of font names.
- Introduced new image search functionality with support for Pexels and Pixabay APIs.
2026-04-15 15:39:35 +05:45

442 lines
16 KiB
Python

from copy import deepcopy
from typing import Any, List
from openai import NOT_GIVEN
from utils.dict_utils import (
get_dict_paths_with_key,
get_dict_at_path,
has_more_than_n_keys,
)
supported_string_formats = [
"date-time",
"time",
"date",
"duration",
"email",
"hostname",
"ipv4",
"ipv6",
"uuid",
]
def remove_fields_from_schema(schema: dict, fields_to_remove: List[str]):
schema = deepcopy(schema)
properties_paths = get_dict_paths_with_key(schema, "properties")
for path in properties_paths:
parent_obj = get_dict_at_path(schema, path)
if "properties" in parent_obj and isinstance(parent_obj["properties"], dict):
for field in fields_to_remove:
if field in parent_obj["properties"]:
del parent_obj["properties"][field]
required_paths = get_dict_paths_with_key(schema, "required")
for path in required_paths:
parent_obj = get_dict_at_path(schema, path)
if "required" in parent_obj and isinstance(parent_obj["required"], list):
parent_obj["required"] = [
field
for field in parent_obj["required"]
if field not in fields_to_remove
]
return schema
def add_field_in_schema(schema: dict, field: dict, required: bool = False) -> dict:
if not isinstance(field, dict) or len(field) != 1:
raise ValueError(
"`field` must be a dict with exactly one entry: {name: schema_dict}"
)
field_name, field_schema = next(iter(field.items()))
if not isinstance(field_name, str):
raise TypeError("Field name must be a string")
if not isinstance(field_schema, dict):
raise TypeError("Field schema must be a dictionary")
updated_schema: dict = deepcopy(schema)
root_properties = updated_schema.get("properties")
if not isinstance(root_properties, dict):
updated_schema["properties"] = {}
root_properties = updated_schema["properties"]
root_properties[field_name] = field_schema
# Update root-level required based on the flag
existing_required = updated_schema.get("required")
if not isinstance(existing_required, list):
existing_required = []
if required:
if field_name not in existing_required:
existing_required.append(field_name)
else:
if field_name in existing_required:
existing_required = [name for name in existing_required if name != field_name]
if existing_required:
updated_schema["required"] = existing_required
else:
updated_schema.pop("required", None)
return updated_schema
# From OpenAI
def ensure_strict_json_schema(
json_schema: object,
*,
path: tuple[str, ...],
root: dict[str, object],
) -> dict[str, Any]:
"""Mutates the given JSON schema to ensure it conforms to the `strict` standard
that the API expects.
"""
if not isinstance(json_schema, dict):
raise TypeError(f"Expected {json_schema} to be a dictionary; path={path}")
defs = json_schema.get("$defs")
if isinstance(defs, dict):
for def_name, def_schema in defs.items():
ensure_strict_json_schema(
def_schema, path=(*path, "$defs", def_name), root=root
)
definitions = json_schema.get("definitions")
if isinstance(definitions, dict):
for definition_name, definition_schema in definitions.items():
ensure_strict_json_schema(
definition_schema,
path=(*path, "definitions", definition_name),
root=root,
)
typ = json_schema.get("type")
if typ == "object" and "additionalProperties" not in json_schema:
json_schema["additionalProperties"] = False
# object types
# { 'type': 'object', 'properties': { 'a': {...} } }
properties = json_schema.get("properties")
if isinstance(properties, dict):
json_schema["required"] = [prop for prop in properties.keys()]
json_schema["properties"] = {
key: ensure_strict_json_schema(
prop_schema, path=(*path, "properties", key), root=root
)
for key, prop_schema in properties.items()
}
# arrays
# { 'type': 'array', 'items': {...} }
# OpenAI requires array schemas to have "items". Zod tuples may emit prefixItems only.
items = json_schema.get("items")
if isinstance(items, dict):
json_schema["items"] = ensure_strict_json_schema(
items, path=(*path, "items"), root=root
)
elif typ == "array":
prefix_items = json_schema.get("prefixItems")
if (
isinstance(prefix_items, list)
and len(prefix_items) > 0
and isinstance(prefix_items[0], dict)
):
json_schema["items"] = ensure_strict_json_schema(
prefix_items[0], path=(*path, "items"), root=root
)
json_schema.pop("prefixItems", None)
else:
json_schema["items"] = {"type": "string"}
# unions
any_of = json_schema.get("anyOf")
if isinstance(any_of, list):
json_schema["anyOf"] = [
ensure_strict_json_schema(variant, path=(*path, "anyOf", str(i)), root=root)
for i, variant in enumerate(any_of)
]
# intersections
all_of = json_schema.get("allOf")
if isinstance(all_of, list):
if len(all_of) == 1:
json_schema.update(
ensure_strict_json_schema(
all_of[0], path=(*path, "allOf", "0"), root=root
)
)
json_schema.pop("allOf")
else:
json_schema["allOf"] = [
ensure_strict_json_schema(
entry, path=(*path, "allOf", str(i)), root=root
)
for i, entry in enumerate(all_of)
]
# string
if typ == "string":
if "format" in json_schema:
if json_schema["format"] not in supported_string_formats:
del json_schema["format"]
# strip `None` defaults as there's no meaningful distinction here
# the schema will still be `nullable` and the model will default
# to using `None` anyway
if json_schema.get("default", NOT_GIVEN) is None:
json_schema.pop("default")
# we can't use `$ref`s if there are also other properties defined, e.g.
# `{"$ref": "...", "description": "my description"}`
#
# so we unravel the ref
# `{"type": "string", "description": "my description"}`
ref = json_schema.get("$ref")
if ref and has_more_than_n_keys(json_schema, 1):
assert isinstance(ref, str), f"Received non-string $ref - {ref}"
resolved = resolve_ref(root=root, ref=ref)
if not isinstance(resolved, dict):
raise ValueError(
f"Expected `$ref: {ref}` to resolved to a dictionary but got {resolved}"
)
# properties from the json schema take priority over the ones on the `$ref`
json_schema.update({**resolved, **json_schema})
json_schema.pop("$ref")
# Since the schema expanded from `$ref` might not have `additionalProperties: false` applied,
# we call `_ensure_strict_json_schema` again to fix the inlined schema and ensure it's valid.
return ensure_strict_json_schema(json_schema, path=path, root=root)
return json_schema
def resolve_ref(*, root: dict[str, object], ref: str) -> object:
if not ref.startswith("#/"):
raise ValueError(f"Unexpected $ref format {ref!r}; Does not start with #/")
path = ref[2:].split("/")
resolved = root
for key in path:
value = resolved[key]
assert isinstance(
value, dict
), f"encountered non-dictionary entry while resolving {ref} - {resolved}"
resolved = value
return resolved
# Flattens a JSON schema by inlining all $ref references and removing $defs/definitions
def flatten_json_schema(schema: dict) -> dict:
root_schema = deepcopy(schema)
def _flatten(node: Any) -> Any:
if isinstance(node, dict):
# If node is a pure $ref (or combined with extra fields), inline it
if "$ref" in node:
ref_value = node["$ref"]
assert isinstance(
ref_value, str
), f"Received non-string $ref - {ref_value}"
resolved = resolve_ref(root=root_schema, ref=ref_value)
assert isinstance(
resolved, dict
), f"Expected `$ref: {ref_value}` to resolve to a dictionary but got {type(resolved)}"
# Merge: referenced first, then overlay current (excluding $ref)
merged: dict[str, Any] = deepcopy(resolved)
for key, value in node.items():
if key == "$ref":
continue
merged[key] = value
return _flatten(merged)
flattened: dict[str, Any] = {}
for key, value in node.items():
# Drop defs/definitions in output
if key in ("$defs", "definitions"):
continue
if key == "properties" and isinstance(value, dict):
flattened[key] = {
prop_key: _flatten(prop_val)
for prop_key, prop_val in value.items()
}
elif key in ("items", "contains", "additionalProperties", "not"):
if isinstance(value, dict):
flattened[key] = _flatten(value)
elif isinstance(value, list):
flattened[key] = [_flatten(v) for v in value]
else:
flattened[key] = value
elif key in ("allOf", "anyOf", "oneOf", "prefixItems") and isinstance(
value, list
):
flattened[key] = [_flatten(v) for v in value]
else:
flattened[key] = (
_flatten(value) if isinstance(value, (dict, list)) else value
)
return flattened
if isinstance(node, list):
return [_flatten(v) for v in node]
return node
result = _flatten(schema)
# Ensure top-level cleanup just in case
if isinstance(result, dict):
result.pop("$defs", None)
result.pop("definitions", None)
return result
def ensure_array_schemas_have_items(schema: dict) -> dict[str, Any]:
"""
Recursively ensure every JSON schema node with type="array" has an "items" key.
Codex Responses API requires array schemas to specify items. Mutates a deep copy.
"""
result = deepcopy(schema)
def _is_array_schema_type(type_value: Any) -> bool:
if type_value == "array":
return True
if isinstance(type_value, list):
return "array" in type_value
return False
def _ensure(node: Any) -> Any:
if isinstance(node, dict):
if _is_array_schema_type(node.get("type")) and "items" not in node:
node["items"] = {"type": "string"}
for key, value in list(node.items()):
node[key] = _ensure(value)
elif isinstance(node, list):
for idx, value in enumerate(node):
node[idx] = _ensure(value)
return node
return _ensure(result)
def remove_titles_from_schema(schema: dict) -> dict[str, Any]:
def _strip_titles(node: Any) -> Any:
if isinstance(node, dict):
rebuilt: dict[str, Any] = {}
for key, value in node.items():
# Preserve properties named "title" under the JSON Schema "properties" mapping
if key == "properties" and isinstance(value, dict):
rebuilt[key] = {
prop_name: _strip_titles(prop_schema)
for prop_name, prop_schema in value.items()
}
continue
# Remove schema metadata field "title" elsewhere
if key == "title":
continue
rebuilt[key] = _strip_titles(value)
return rebuilt
if isinstance(node, list):
return [_strip_titles(item) for item in node]
return node
return _strip_titles(deepcopy(schema))
# ? Not used
def generate_constraint_sentences(schema: dict) -> str:
"""
Generate human-readable constraint sentences from a JSON schema.
Args:
schema: JSON schema dictionary
Returns:
String containing constraint sentences separated by newlines
"""
constraints = []
def extract_constraints_recursive(obj, prefix=""):
if isinstance(obj, dict):
if "properties" in obj:
properties = obj["properties"]
for prop_name, prop_def in properties.items():
current_path = f"{prefix}.{prop_name}" if prefix else prop_name
if isinstance(prop_def, dict):
prop_type = prop_def.get("type")
# Handle string constraints
if prop_type == "string":
min_length = prop_def.get("minLength")
max_length = prop_def.get("maxLength")
if min_length is not None and max_length is not None:
constraints.append(
f" - {current_path} should be less than {max_length} characters and greater than {min_length} characters"
)
elif max_length is not None:
constraints.append(
f" - {current_path} should be less than {max_length} characters"
)
elif min_length is not None:
constraints.append(
f" - {current_path} should be greater than {min_length} characters"
)
# Handle array constraints
elif prop_type == "array":
min_items = prop_def.get("minItems")
max_items = prop_def.get("maxItems")
if min_items is not None and max_items is not None:
constraints.append(
f" - {current_path} should have more than {min_items} items and less than {max_items} items"
)
elif max_items is not None:
constraints.append(
f" - {current_path} should have less than {max_items} items"
)
elif min_items is not None:
constraints.append(
f" - {current_path} should have more than {min_items} items"
)
# Recurse into nested objects
if prop_type == "object" or "properties" in prop_def:
extract_constraints_recursive(prop_def, current_path)
# Handle array items if they have properties
if prop_type == "array" and "items" in prop_def:
items_def = prop_def["items"]
if isinstance(items_def, dict) and (
"properties" in items_def
or items_def.get("type") == "object"
):
extract_constraints_recursive(
items_def, f"{current_path}[*]"
)
# Also recurse into other nested structures
for key, value in obj.items():
if key not in [
"properties",
"type",
"minLength",
"maxLength",
"minItems",
"maxItems",
] and isinstance(value, dict):
extract_constraints_recursive(value, prefix)
# Start extraction from the root schema
extract_constraints_recursive(schema)
return "\n".join(constraints)