Compare commits
No commits in common. "fix/duplicate-files" and "main" have entirely different histories.
fix/duplic
...
main
5 changed files with 19 additions and 77 deletions
14
.env.example
14
.env.example
|
|
@ -12,17 +12,9 @@ ROOT_PATH=/solventum-image-metadata
|
|||
# === Azure AD / SSO ===
|
||||
AZURE_TENANT_ID=e519c2e6-bc6d-4fdf-8d9c-923c2f002385
|
||||
AZURE_CLIENT_ID=9079054c-9620-4757-a256-23413042f1ef
|
||||
# AZURE_CLIENT_SECRET is REQUIRED for server-side MSAL flow (get from Azure Portal > App > Certificates & secrets)
|
||||
AZURE_CLIENT_SECRET=
|
||||
# Must match Azure AD App Registration > Authentication > Redirect URIs EXACTLY (including /auth/callback path)
|
||||
# For production: https://ai-sandbox.oliver.solutions/solventum-image-metadata/auth/callback
|
||||
# For local dev: http://localhost:5001/auth/callback
|
||||
REDIRECT_URI=https://ai-sandbox.oliver.solutions/solventum-image-metadata/auth/callback
|
||||
|
||||
# Optional: Multi-tenant support - comma-separated list of allowed tenant IDs
|
||||
# Leave empty to allow any organizational tenant (after Azure Portal configuration)
|
||||
# Example: tenant-id-1,tenant-id-2,tenant-id-3
|
||||
ALLOWED_TENANT_IDS=
|
||||
# AZURE_CLIENT_SECRET is not needed (client-side MSAL.js flow)
|
||||
# Must match Azure AD App Registration > Authentication > SPA Redirect URIs exactly
|
||||
REDIRECT_URI=https://ai-sandbox.oliver.solutions/solventum-image-metadata/
|
||||
|
||||
# === OpenAI (optional — for AI metadata generation) ===
|
||||
OPENAI_API_KEY=
|
||||
|
|
|
|||
|
|
@ -115,15 +115,7 @@ async def upload_files(
|
|||
"filename": filename,
|
||||
"file_type": file_type,
|
||||
})
|
||||
# Deduplicate results: replace existing entry with same filename
|
||||
existing_idx = next(
|
||||
(i for i, r in enumerate(results) if r.get("filename") == filename),
|
||||
None,
|
||||
)
|
||||
if existing_idx is not None:
|
||||
results[existing_idx] = file_result
|
||||
else:
|
||||
results.append(file_result)
|
||||
results.append(file_result)
|
||||
else:
|
||||
file_result = await metadata_service.process_uploaded_file(
|
||||
filepath=filepath,
|
||||
|
|
@ -133,15 +125,7 @@ async def upload_files(
|
|||
import_map=import_map,
|
||||
)
|
||||
store.add_file_to_session(session_id, file_result)
|
||||
# Deduplicate results: replace existing entry with same filename
|
||||
existing_idx = next(
|
||||
(i for i, r in enumerate(results) if r.get("filename") == filename),
|
||||
None,
|
||||
)
|
||||
if existing_idx is not None:
|
||||
results[existing_idx] = file_result
|
||||
else:
|
||||
results.append(file_result)
|
||||
results.append(file_result)
|
||||
|
||||
except ValueError as e:
|
||||
results.append({"filename": upload_file.filename, "error": str(e)})
|
||||
|
|
|
|||
|
|
@ -38,9 +38,14 @@ class FileService:
|
|||
user_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
filepath = user_dir / filename
|
||||
|
||||
# Overwrite if file already exists (user re-uploads same file).
|
||||
# Preserving original filename is critical for Excel metadata lookup.
|
||||
# Handle name collisions
|
||||
if filepath.exists():
|
||||
stem = filepath.stem
|
||||
suffix = filepath.suffix
|
||||
counter = 1
|
||||
while filepath.exists():
|
||||
filepath = user_dir / f"{stem}_{counter}{suffix}"
|
||||
counter += 1
|
||||
|
||||
# Stream to disk (handles large files without loading into memory)
|
||||
with open(filepath, "wb") as f:
|
||||
|
|
|
|||
|
|
@ -105,11 +105,7 @@ class SessionStore:
|
|||
conn.close()
|
||||
|
||||
def add_file_to_session(self, session_id: str, file_entry: Dict[str, Any]):
|
||||
"""Add a processed file entry to a session.
|
||||
|
||||
If a file with the same filename already exists in the session,
|
||||
it is replaced (deduplication for re-uploaded files).
|
||||
"""
|
||||
"""Add a processed file entry to a session."""
|
||||
conn = self._get_conn()
|
||||
try:
|
||||
row = conn.execute(
|
||||
|
|
@ -118,16 +114,7 @@ class SessionStore:
|
|||
).fetchone()
|
||||
if row:
|
||||
files = json.loads(row["files_json"])
|
||||
# Deduplicate: replace existing entry with same filename
|
||||
filename = file_entry.get("filename", "")
|
||||
existing_idx = next(
|
||||
(i for i, f in enumerate(files) if f.get("filename") == filename),
|
||||
None,
|
||||
)
|
||||
if existing_idx is not None:
|
||||
files[existing_idx] = file_entry
|
||||
else:
|
||||
files.append(file_entry)
|
||||
files.append(file_entry)
|
||||
conn.execute(
|
||||
"UPDATE file_sessions SET files_json = ? WHERE session_id = ?",
|
||||
(json.dumps(files, ensure_ascii=False), session_id),
|
||||
|
|
|
|||
34
src/auth.py
34
src/auth.py
|
|
@ -165,11 +165,6 @@ class MicrosoftSSO:
|
|||
self.tenant_id = os.getenv('AZURE_TENANT_ID')
|
||||
self.redirect_uri = os.getenv('REDIRECT_URI', 'http://localhost:5001/auth/callback')
|
||||
|
||||
# Optional: Comma-separated list of allowed tenant IDs for multi-tenant auth
|
||||
# Example: "tenant-id-1,tenant-id-2,tenant-id-3"
|
||||
allowed_tenants = os.getenv('ALLOWED_TENANT_IDS', '')
|
||||
self.allowed_tenant_ids = [t.strip() for t in allowed_tenants.split(',') if t.strip()]
|
||||
|
||||
# Check if SSO is configured
|
||||
if not all([self.client_id, self.client_secret, self.tenant_id]):
|
||||
self.enabled = False
|
||||
|
|
@ -178,16 +173,14 @@ class MicrosoftSSO:
|
|||
|
||||
try:
|
||||
import msal
|
||||
# Use specific tenant_id for single-tenant, or 'organizations' for multi-tenant
|
||||
# Single-tenant is more reliable and avoids client_id issues
|
||||
self.authority = f"https://login.microsoftonline.com/{self.tenant_id}"
|
||||
self.app = msal.ConfidentialClientApplication(
|
||||
client_id=self.client_id,
|
||||
self.client_id,
|
||||
authority=self.authority,
|
||||
client_credential=self.client_secret
|
||||
)
|
||||
self.enabled = True
|
||||
logger.info(f"Microsoft SSO initialized successfully (authority: {self.authority})")
|
||||
logger.info("Microsoft SSO initialized successfully")
|
||||
except ImportError:
|
||||
self.enabled = False
|
||||
logger.warning("Microsoft SSO not available (msal library not installed)")
|
||||
|
|
@ -209,13 +202,11 @@ class MicrosoftSSO:
|
|||
return None
|
||||
|
||||
try:
|
||||
auth_url = self.app.get_authorization_request_url(
|
||||
return self.app.get_authorization_request_url(
|
||||
scopes=["User.Read"],
|
||||
state=state,
|
||||
redirect_uri=self.redirect_uri
|
||||
)
|
||||
logger.info(f"Generated auth URL with redirect_uri: {self.redirect_uri}")
|
||||
return auth_url
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating auth URL: {e}")
|
||||
return None
|
||||
|
|
@ -234,28 +225,11 @@ class MicrosoftSSO:
|
|||
return None
|
||||
|
||||
try:
|
||||
# Explicitly pass all parameters as named arguments
|
||||
result = self.app.acquire_token_by_authorization_code(
|
||||
code=auth_code,
|
||||
auth_code,
|
||||
scopes=["User.Read"],
|
||||
redirect_uri=self.redirect_uri
|
||||
)
|
||||
|
||||
# Check for errors in the result
|
||||
if result and 'error' in result:
|
||||
logger.error(f"Token acquisition error: {result.get('error')} - {result.get('error_description')}")
|
||||
return result
|
||||
|
||||
# Validate tenant if allowed_tenant_ids is configured
|
||||
if result and self.allowed_tenant_ids:
|
||||
user_tenant = result.get('id_token_claims', {}).get('tid')
|
||||
if user_tenant and user_tenant not in self.allowed_tenant_ids:
|
||||
logger.warning(f"User from unauthorized tenant: {user_tenant}")
|
||||
return {
|
||||
'error': 'unauthorized_tenant',
|
||||
'error_description': 'Your organization is not authorized to access this application'
|
||||
}
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Error acquiring token: {e}")
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue