solventum-image-metadata/backend/app/api/import_api.py
SamoilenkoVadym 07c6750d9e fix(import): handle NaN values in Excel/CSV import
- Replace NaN and Inf float values with None before JSON serialization
- Fixes 500 error: 'Out of range float values are not JSON compliant'
- Clean sample data in both /file and /excel/preview endpoints
- Pandas returns NaN for empty cells, JSON cannot serialize them

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2026-02-09 18:59:20 +00:00

216 lines
6.7 KiB
Python

"""
Import API Endpoints
Handles CSV/Excel/JSON import with column mapping.
"""
from fastapi import APIRouter, UploadFile, File, Depends, HTTPException, Request, status
from sqlalchemy.ext.asyncio import AsyncSession
from pathlib import Path
import secrets
from app.core.auth import get_current_user_id
from app.core.database import get_db, AuditLogRepository
from app.core.redis_client import RedisSessionStore
from app.services.file_service import get_file_service, FileService
from app.processors.metadata_importer import MetadataImporter
from app.models.file import (
ImportFileResponse,
ImportMappingConfig,
ExcelSheetPreviewRequest
)
router = APIRouter()
@router.post("/file", response_model=ImportFileResponse)
async def upload_import_file(
import_file: UploadFile = File(...),
request: Request = None,
user_id: int = Depends(get_current_user_id),
db: AsyncSession = Depends(get_db),
file_service: FileService = Depends(get_file_service)
):
"""
Upload CSV/Excel/JSON file for metadata import.
"""
# Save import file
file_info = await file_service.save_upload(import_file, user_id)
# Detect file type
file_ext = Path(file_info["filename"]).suffix.lower()
import_type = file_ext.replace('.', '') # csv, xlsx, json
# Preview file structure
importer = MetadataImporter()
try:
columns, sample_data, suggestions = importer.preview_file_structure(file_info["filepath"])
# For Excel files, get sheet names
sheet_names = None
if import_type == 'xlsx':
import openpyxl
wb = openpyxl.load_workbook(file_info["filepath"])
sheet_names = wb.sheetnames
# Create import session in Redis
redis: RedisSessionStore = request.app.state.redis
import_session_id = await redis.create_import_session(
user_id=user_id,
import_type=import_type,
filename=file_info["filename"],
filepath=file_info["filepath"]
)
# Log action
await AuditLogRepository.log_action(
db,
user_id=user_id,
action="import_upload",
details=f"Uploaded {import_type} import file: {file_info['filename']}"
)
# Clean sample data - replace NaN with None for JSON serialization
clean_sample_data = None
if sample_data:
import json
import numpy as np
clean_sample_data = []
for row in sample_data[:5]:
clean_row = {}
for key, value in row.items():
# Replace NaN/Inf with None
if isinstance(value, float) and (np.isnan(value) or np.isinf(value)):
clean_row[key] = None
else:
clean_row[key] = value
clean_sample_data.append(clean_row)
return ImportFileResponse(
success=True,
import_session_id=import_session_id,
filename=file_info["filename"],
import_type=import_type,
columns=columns,
sheet_names=sheet_names,
sample_data=clean_sample_data,
row_count=len(sample_data) if sample_data else 0
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Failed to parse import file: {str(e)}"
)
@router.post("/excel/preview")
async def preview_excel_sheet(
preview_request: ExcelSheetPreviewRequest,
request: Request,
user_id: int = Depends(get_current_user_id)
):
"""
Preview specific Excel sheet.
"""
# Get import session
redis: RedisSessionStore = request.app.state.redis
session_data = await redis.get_import_session(preview_request.excel_session_id)
if not session_data or session_data.get("user_id") != user_id:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Import session not found"
)
# Preview sheet
importer = MetadataImporter()
try:
import pandas as pd
import numpy as np
df = pd.read_excel(session_data["filepath"], sheet_name=preview_request.sheet_name)
# Clean sample data - replace NaN with None
sample_rows = df.head(5).to_dict('records')
clean_sample_data = []
for row in sample_rows:
clean_row = {}
for key, value in row.items():
if isinstance(value, float) and (np.isnan(value) or np.isinf(value)):
clean_row[key] = None
else:
clean_row[key] = value
clean_sample_data.append(clean_row)
return {
"success": True,
"columns": df.columns.tolist(),
"sample_data": clean_sample_data,
"row_count": len(df)
}
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Failed to preview sheet: {str(e)}"
)
@router.post("/configure")
async def configure_import_mapping(
mapping_config: ImportMappingConfig,
request: Request,
user_id: int = Depends(get_current_user_id),
db: AsyncSession = Depends(get_db)
):
"""
Configure column mapping for import file.
"""
# Get import session
redis: RedisSessionStore = request.app.state.redis
session_data = await redis.get_import_session(mapping_config.import_session_id)
if not session_data or session_data.get("user_id") != user_id:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Import session not found"
)
# Build column mapping dict
column_mapping = {
m.source_column: m.target_field
for m in mapping_config.column_mappings
}
# Import metadata with mapping
importer = MetadataImporter()
try:
metadata_map = importer.import_with_mapping(
session_data["filepath"],
column_mapping,
sheet_name=mapping_config.sheet_name
)
# Store metadata in session
await redis.update_import_metadata(
mapping_config.import_session_id,
metadata_map
)
# Log action
await AuditLogRepository.log_action(
db,
user_id=user_id,
action="import_configure",
details=f"Configured import mapping: {len(metadata_map)} records"
)
return {
"success": True,
"message": f"Import configured with {len(metadata_map)} records"
}
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Failed to configure import: {str(e)}"
)