From 5de2003affe2e423ce30b9b1bc0e2b34bff4d832 Mon Sep 17 00:00:00 2001 From: michael Date: Mon, 3 Nov 2025 08:41:27 -0600 Subject: [PATCH] added SSO login via MSAL and PKCE, ready for deployment (theoretically) --- .env.example | 16 ++ .gitignore | 7 + .htaccess | 12 +- .user.ini | 8 + CLAUDE.md | 423 ++++++++++++++++++++++++++++++++++++++++++++++ README.md | 96 +++++++++-- auth.php | 114 +++++++++++++ auth_config.php | 124 ++++++++++++++ check_api.php | 6 + composer.json | 15 ++ config.php | 22 ++- download.php | 17 ++ index.php | 20 +++ login.php | 183 ++++++++++++++++++++ logout.php | 28 +++ process.php | 19 ++- style.css | 58 +++++++ test_download.php | 26 ++- 18 files changed, 1163 insertions(+), 31 deletions(-) create mode 100644 .env.example create mode 100644 .user.ini create mode 100644 CLAUDE.md create mode 100644 auth.php create mode 100644 auth_config.php create mode 100644 composer.json create mode 100644 login.php create mode 100644 logout.php diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..9e7abf8 --- /dev/null +++ b/.env.example @@ -0,0 +1,16 @@ +# Development Mode (set to true to bypass authentication for local testing) +DEV_MODE=false + +# Azure AD/MSAL Configuration +AZURE_CLIENT_ID=your_client_id_here +AZURE_AUTHORITY=https://login.microsoftonline.com/your_tenant_id_here +AZURE_REDIRECT_URI=https://yourdomain.com/voice2text/ + +# DeepL API Key +DEEPL_API_KEY=your_deepl_api_key_here + +# Python API Configuration +PYTHON_API_URL=http://localhost:5010 + +# Session Configuration (in seconds, default: 8 hours) +SESSION_TIMEOUT=28800 diff --git a/.gitignore b/.gitignore index 14b5e4d..e082309 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,13 @@ __pycache__/ *.so .Python +# PHP Dependencies +vendor/ +composer.lock + +# Environment variables +.env + # Output files outputs/*.txt outputs/*.vtt diff --git a/.htaccess b/.htaccess index c2ffc94..8ee265d 100644 --- a/.htaccess +++ b/.htaccess @@ -1,5 +1,7 @@ -php_value upload_max_filesize 350M -php_value post_max_size 350M -php_value max_execution_time 1200 -php_value max_input_time 1200 -php_value memory_limit 512M +# PHP settings moved to .user.ini for PHP-FPM compatibility (MAMP) +# If using mod_php, uncomment these lines: +# php_value upload_max_filesize 350M +# php_value post_max_size 350M +# php_value max_execution_time 1200 +# php_value max_input_time 1200 +# php_value memory_limit 512M diff --git a/.user.ini b/.user.ini new file mode 100644 index 0000000..1a5373e --- /dev/null +++ b/.user.ini @@ -0,0 +1,8 @@ +; PHP Configuration for Voice to Text Application +; Works with PHP-FPM (MAMP) + +upload_max_filesize = 350M +post_max_size = 350M +max_execution_time = 1200 +max_input_time = 1200 +memory_limit = 512M diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..ac7fc62 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,423 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +Voice to Text is a two-tier web application that transcribes audio files using OpenAI Whisper and optionally translates them using DeepL API. The application consists of: +- **Python Flask API** (backend): Handles transcription and translation +- **PHP web interface** (frontend): User interface, authentication, and request handling +- **Microsoft Azure AD SSO**: OAuth2 with PKCE flow for authentication + +## Development Setup + +### Initial Setup +```bash +# 1. Configure authentication +cp .env.example .env +# Edit .env with your Azure AD credentials + +# 2. Install PHP dependencies +composer install + +# 3. Install Python dependencies and create virtual environment +./setup.sh + +# 4. Start the Python API server +./start_api.sh + +# Or manually: +source venv/bin/activate +python api.py +``` + +The API runs on http://localhost:5010 by default. The PHP frontend should be served via MAMP, Apache, or any PHP-enabled web server with HTTPS enabled for production. + +### Testing the Application +```bash +# Check if Python API is running +curl http://localhost:5010/health + +# Or use the PHP diagnostic page +# Visit: check_api.php in browser + +# Test downloads +# Visit: test_download.php in browser +``` + +### Python Version Compatibility +- Recommended: Python 3.10 or 3.11 +- Supported: Python 3.8+ +- Warning: Python 3.12+ may have compatibility issues with some dependencies + +## Architecture + +### Three-Layer Design +The application uses a separation of concerns: +1. **Authentication Layer**: Microsoft Azure AD SSO with OAuth2 PKCE flow +2. **Python API (api.py)**: Computation-heavy tasks (Whisper transcription, DeepL translation) +3. **PHP Frontend**: User interface, session management, file handling, and proxying requests to Python API + +### Authentication Flow +``` +User Browser + ↓ +login.php (landing page) + ↓ (clicks "Sign in with Microsoft") +auth.php + ↓ (generates PKCE code_verifier & code_challenge) +Azure AD OAuth2 Authorization Endpoint + ↓ (user authenticates) +auth.php (callback) + ↓ (exchanges code + code_verifier for token) +Microsoft Graph API (/me) + ↓ (retrieves user info) +Session initialized: + - $_SESSION['authenticated'] = true + - $_SESSION['user_id'], ['user_name'], ['user_email'] + - $_SESSION['user_files'] = [] + ↓ +index.php (main app) +``` + +### Request Flow (After Authentication) +``` +User Browser (index.php) + ↓ (jQuery AJAX + FormData) +process.php + ↓ (auth check via isAuthenticated()) + ↓ (cURL to Python API) +api.py (Flask) + ↓ (Whisper transcription) + ↓ (Optional: DeepL translation) +outputs/ directory + ↓ (files tracked in $_SESSION['user_files']) +download.php + ↓ (auth + ownership check) +User Browser (download) +``` + +### Key Components + +**Authentication & Configuration Files**: + +**auth_config.php** (Authentication & Environment Configuration): +- Loads environment variables from .env using vlucas/phpdotenv +- Defines Azure AD configuration constants (CLIENT_ID, AUTHORITY, REDIRECT_URI) +- Configures secure session settings (httponly, secure, samesite) +- Provides helper functions: + - `isAuthenticated()`: Check if user is logged in and session is valid + - `requireAuth()`: Redirect to login.php if not authenticated + - `getCurrentUser()`: Get current user info from session + +**login.php** (Landing Page): +- First page users see when not authenticated +- Displays "Sign in with Microsoft" button with Microsoft logo +- Matches black/gold theme of main application +- Redirects to index.php if already authenticated + +**auth.php** (OAuth2 PKCE Handler): +- Implements OAuth2 Authorization Code flow with PKCE +- Step 1: Generates code_verifier (64-char random string) and code_challenge (SHA256 hash) +- Step 2: Redirects to Azure AD with PKCE parameters +- Step 3: Handles callback, verifies state (CSRF protection) +- Step 4: Exchanges authorization code + code_verifier for access token +- Step 5: Calls Microsoft Graph API to get user info +- Step 6: Initializes session with user data and empty file list +- Step 7: Redirects to index.php + +**logout.php** (Session Destruction): +- Clears all session variables +- Destroys session cookie +- Destroys session +- Redirects to login.php + +**config.php** (Configuration Loader): +- Requires auth_config.php +- Starts session if not already started +- All configuration now loaded from .env via auth_config.php + +**API & Core Files**: + +**api.py** (Flask REST API - Port 5010): +- `/health`: Health check endpoint +- `/transcribe`: Main endpoint - accepts audio file, format (txt/vtt/srt), translation settings +- `/download/`: Serves transcribed files +- Whisper model loaded once at startup and kept in memory +- DeepL translator initialized at startup +- Generates both original and translated files when translation is enabled + +**process.php** (PHP request handler): +- **Auth check**: Calls isAuthenticated() - returns error if not authenticated +- Receives multipart/form-data from frontend +- Validates file size (350MB limit) +- Forwards to Python API via cURL +- **File tracking**: Adds original and translated filenames to $_SESSION['user_files'] +- Returns formatted HTML for display (truncated at 10,000 chars for preview) +- Provides download links for full files + +**index.php** (Main UI): +- **Auth required**: Calls requireAuth() at top - redirects to login.php if not authenticated +- Displays user header with name, email, and logout button +- jQuery-based AJAX file upload +- Format selector (txt/vtt/srt) +- Translation toggle with language selector (30+ languages) +- Real-time progress bar during processing +- In-page preview of transcriptions +- Download buttons for original and translated files + +**download.php** (File server): +- **Auth required**: Calls isAuthenticated() - returns 401 if not authenticated +- **Ownership check**: Verifies requested file is in $_SESSION['user_files'] +- Returns 403 Forbidden if user doesn't own the file +- Logs unauthorized download attempts +- Serves files from outputs/ directory +- Security: Uses basename() to prevent directory traversal +- Sets proper Content-Type headers based on file extension + +**.env** (Environment Variables): +- AZURE_CLIENT_ID: Azure AD application client ID +- AZURE_AUTHORITY: Azure AD authority URL with tenant ID +- AZURE_REDIRECT_URI: OAuth2 redirect URI (must match Azure AD config) +- DEEPL_API_KEY: DeepL API key for translation +- PYTHON_API_URL: Python Flask API endpoint (default: http://localhost:5010) +- SESSION_TIMEOUT: Session timeout in seconds (default: 28800 = 8 hours) + +## Output Formats + +### Text (.txt) +Plain text transcription - full text of audio + +### VTT (.vtt) +WebVTT subtitle format with timestamps: +``` +WEBVTT + +00:00:00.000 --> 00:00:05.123 +First segment text + +00:00:05.123 --> 00:00:10.456 +Second segment text +``` + +### SRT (.srt) +SubRip subtitle format with timestamps: +``` +1 +00:00:00,000 --> 00:00:05,123 +First segment text + +2 +00:00:05,123 --> 00:00:10,456 +Second segment text +``` + +**Key Difference**: VTT uses period (.) for milliseconds, SRT uses comma (,) + +## Whisper Models + +Available models (edit api.py line 26 to change): +- `tiny`: Fastest, least accurate +- `base`: Default - good balance +- `small`: Better accuracy, slower +- `medium`: High accuracy, much slower +- `large`: Best accuracy, very slow + +Changing the model: +```python +# In api.py line 26: +model = whisper.load_model("small") # Change from "base" to desired model +``` + +## File Size and Timeout Limits + +- **Maximum file size**: 350MB (configured in .htaccess and process.php) +- **Processing timeout**: 5 minutes (300 seconds in process.php) +- **PHP settings** (.htaccess): + - upload_max_filesize: 350M + - post_max_size: 350M + - max_execution_time: 1200 seconds + - memory_limit: 512M + +## Translation + +Translation is powered by DeepL API: +- Supports 30+ languages +- Translation happens after transcription +- Original language is auto-detected by Whisper +- Both original and translated files are saved with suffixes: + - `filename_original.{ext}` + - `filename_translated.{ext}` + +## File Handling + +### outputs/ Directory +All transcribed files are saved here. The directory: +- Created automatically by setup.sh or api.py +- Should have write permissions (777 in production) +- Files are named: `{original_filename}_original.{ext}` and `{original_filename}_translated.{ext}` +- Not tracked by git (see .gitignore) + +### Temporary Files +- Audio files are saved temporarily during processing +- Cleaned up automatically after transcription (api.py line 186-187) + +## Authentication & Security + +### Microsoft Azure AD SSO +- **OAuth2 with PKCE**: Uses Proof Key for Code Exchange (RFC 7636) +- **No client secret needed**: PKCE allows public clients to authenticate securely +- **Code verifier**: 64-character random string generated for each auth request +- **Code challenge**: SHA256 hash of code_verifier, sent to Azure AD +- **Token exchange**: Authorization code + code_verifier exchanged for access token + +### Session-Based File Access Control +- **Session tracking**: Files tracked in $_SESSION['user_files'] array +- **Upload tracking**: When user transcribes audio, both original and translated filenames added to their session +- **Download validation**: download.php checks if requested file is in user's session before serving +- **Session timeout**: Configurable (default: 8 hours) - after timeout, user loses access to their files +- **Trade-off**: Files remain in outputs/ directory but become inaccessible after session expires + +### Session Security +- **httponly**: Session cookies not accessible via JavaScript (XSS protection) +- **secure**: Session cookies only transmitted over HTTPS (production) +- **samesite**: Set to 'Lax' to prevent CSRF attacks +- **strict_mode**: Rejects uninitialized session IDs +- **Session regeneration**: Session ID regenerated after login to prevent session fixation +- **CSRF protection**: OAuth2 state parameter validates callback authenticity + +### File Security +- **basename()**: Prevents directory traversal attacks in download.php +- **File size validation**: 350MB limit enforced in both .htaccess and process.php +- **Ownership logging**: Unauthorized download attempts logged with user ID +- **No file type validation**: Relies on FFmpeg to handle/reject unsupported formats + +### Environment Variables +- **.env file**: All sensitive credentials stored in .env (not in git) +- **API keys**: DeepL and Azure credentials loaded from environment +- **.gitignore**: .env explicitly excluded from version control + +### Production Considerations +1. **HTTPS required**: Secure cookies require HTTPS in production +2. **File cleanup**: Old files in outputs/ should be cleaned via cron job +3. **Session storage**: Consider Redis/Memcached for multi-server deployments +4. **Rate limiting**: No rate limiting currently - consider adding for production +5. **Logging**: Unauthorized attempts logged - monitor for suspicious activity + +## Session-Only File Tracking + +### How It Works +Files are tracked in the PHP session (`$_SESSION['user_files']` array) rather than a database. This approach was chosen for simplicity. + +### File Lifecycle +1. User uploads audio → process.php transcribes → adds filenames to $_SESSION['user_files'] +2. User can download files as long as session is active +3. Session expires or user logs out → files become inaccessible +4. Files remain in outputs/ directory but cannot be downloaded + +### Trade-offs +**Pros:** +- Simple implementation - no database needed +- Automatic "expiration" via session timeout +- Works well for temporary transcription tasks + +**Cons:** +- Files inaccessible after session expires +- Can't access files across multiple devices/browsers +- Orphaned files accumulate in outputs/ directory + +### Future Upgrades +To implement persistent file ownership: +1. Add SQLite/MySQL database with `users` and `files` tables +2. Store file ownership in database instead of session +3. Modify download.php to check database ownership +4. Consider filename-based ownership (encode user_id in filename) + +## Common Development Tasks + +### Changing Whisper Model +Edit api.py line 26 and restart the API: +```bash +# After editing +./start_api.sh +``` + +### Adjusting File Size Limits +Edit both: +1. `.htaccess` - PHP upload limits +2. `process.php` line 12 - PHP validation +3. If using production Apache: `/etc/php/.../php.ini` + +### Testing Authentication Flow +1. Clear your browser cookies +2. Visit the application root +3. Should redirect to login.php +4. Click "Sign in with Microsoft" +5. Authenticate with Azure AD +6. Should redirect back to index.php with user header visible + +### Testing Transcription +**Via Web UI:** +1. Log in via login.php +2. Upload a test audio file +3. Check that files appear in test_download.php + +**Via API directly (bypasses auth):** +```bash +curl -X POST http://localhost:5010/transcribe \ + -F "audio=@test.mp3" \ + -F "format=txt" \ + -F "translate=0" +``` + +### Testing File Access Control +1. Upload a file while logged in +2. Note the filename from the download link +3. Log out +4. Try to access download.php?file=filename directly +5. Should receive 401 Unauthorized + +### Adding New Languages +Edit the language selector in index.php (lines 41-73) to add DeepL-supported languages. + +## Production Deployment + +See README.md sections: +- "Production Deployment (Apache)" for full Apache setup +- "Setup Python API as Systemd Service" for running API as a service +- "Monitoring and Maintenance" for logs and cleanup + +Key production considerations: +1. Set up systemd service for Python API (voice2text-api.service) +2. Configure Apache virtual host +3. Set proper file permissions (www-data:www-data) +4. Set up log rotation +5. Configure cron job to clean old files in outputs/ +6. Move API keys to environment variables + +## Debugging + +### API Not Responding +1. Check if API is running: `curl http://localhost:5010/health` +2. Check process: `ps aux | grep python` +3. Test Python directly: `source venv/bin/activate && python api.py` +4. Visit check_api.php in browser for diagnostic info + +### Upload Fails +1. Check outputs/ directory exists and is writable +2. Verify file size is under 350MB +3. Check Apache/PHP error logs +4. Verify FFmpeg is installed: `which ffmpeg` + +### Transcription Errors +1. Check Python API logs (stdout/stderr) +2. Verify audio file format is supported by FFmpeg +3. Test with a small sample file first +4. Check available disk space in /tmp + +## Code Style Notes + +- **Python**: Uses Flask conventions, logging via Python logging module +- **PHP**: Uses procedural style, cURL for HTTP requests +- **JavaScript**: jQuery-based, uses AJAX for async file upload +- **CSS**: BEM-like naming, black/gold theme with animations diff --git a/README.md b/README.md index a917434..14318bc 100644 --- a/README.md +++ b/README.md @@ -22,10 +22,36 @@ A web application that converts audio files to text using OpenAI's Whisper model - PHP 7.4 or higher - MAMP or Apache server - FFmpeg (for audio processing) +- Composer (for PHP dependencies) +- Microsoft Azure AD application (for SSO authentication) ## Installation -### 1. Install FFmpeg +### 1. Configure Authentication + +This application uses Microsoft Azure AD for Single Sign-On (SSO) authentication with PKCE flow. + +**Step 1: Copy and configure environment file** +```bash +cp .env.example .env +``` + +**Step 2: Edit `.env` file with your Azure AD credentials:** +```env +AZURE_CLIENT_ID=your_client_id_here +AZURE_AUTHORITY=https://login.microsoftonline.com/your_tenant_id_here +AZURE_REDIRECT_URI=https://yourdomain.com/voice2text/ +DEEPL_API_KEY=your_deepl_api_key_here +PYTHON_API_URL=http://localhost:5010 +SESSION_TIMEOUT=28800 +``` + +**Step 3: Install PHP dependencies** +```bash +composer install +``` + +### 2. Install FFmpeg **macOS:** ```bash @@ -41,7 +67,7 @@ sudo apt install ffmpeg **Windows:** Download from https://ffmpeg.org/download.html -### 2. Setup Python Environment +### 3. Setup Python Environment Run the setup script: ```bash @@ -54,7 +80,7 @@ This will: - Install all dependencies (Flask, Whisper, etc.) - Create the outputs directory -### 3. Start the API Server +### 4. Start the API Server ```bash chmod +x start_api.sh @@ -69,19 +95,22 @@ python api.py The API will run on http://localhost:5010 -### 4. Configure Web Server +### 5. Configure Web Server Ensure your MAMP/Apache server points to this directory and PHP is enabled. ## Usage -1. Start the Python API server (see step 3 above) -2. Open the web application in your browser -3. Select output format (Text/VTT/SRT) -4. (Optional) Enable translation and select target language -5. Upload an audio file (max 350MB) -6. Wait for processing -7. Download original and/or translated transcription +1. Start the Python API server (see step 4 above) +2. Open the web application in your browser (you'll see a login page) +3. Click "Sign in with Microsoft" and authenticate with your Microsoft account +4. After authentication, you'll be redirected to the main application +5. Select output format (Text/VTT/SRT) +6. (Optional) Enable translation and select target language +7. Upload an audio file (max 350MB) +8. Wait for processing +9. Download original and/or translated transcription +10. Your files are associated with your session and only accessible to you ### Translation @@ -139,25 +168,58 @@ To change the model, edit `api.py` line 24: model = whisper.load_model("base") # Change to desired model ``` +## Authentication & Security + +### Microsoft Azure AD SSO +- Uses OAuth2 with PKCE (Proof Key for Code Exchange) flow +- Secure authentication without client secrets +- Session-based file access control +- Users can only download files they've uploaded in their current session + +### Session Management +- Secure session cookies (httponly, secure, samesite) +- Configurable session timeout (default: 8 hours) +- Session regeneration after login for security + +### File Access Control +- Files are tracked per-user session in `$_SESSION['user_files']` +- Download attempts are validated against user's file list +- Unauthorized access attempts are logged and blocked + +### Important Security Notes +- Ensure your `.env` file is never committed to git (it's in `.gitignore`) +- Use HTTPS in production for secure cookie transmission +- Files become inaccessible after session expires (files remain in `outputs/` but can't be downloaded) +- Consider setting up a cron job to clean old files from `outputs/` directory + ## File Structure ``` . ├── api.py # Python Flask API with Whisper & DeepL -├── index.php # Frontend interface -├── process.php # PHP request handler -├── download.php # File download handler -├── check_api.php # API status checker -├── test_download.php # Download functionality tester -├── config.php # Configuration (API URLs, keys) +├── login.php # Landing page with Microsoft SSO +├── auth.php # OAuth2 PKCE authentication handler +├── logout.php # Session destruction handler +├── index.php # Main application interface (auth required) +├── process.php # PHP request handler (auth required) +├── download.php # File download handler (auth + ownership check) +├── check_api.php # API status checker (auth required) +├── test_download.php # Download functionality tester (auth required) +├── config.php # Configuration loader +├── auth_config.php # Authentication & environment config ├── style.css # Black/gold theme styles +├── .env # Environment variables (NOT in git) +├── .env.example # Environment variables template ├── .htaccess # PHP upload limits ├── .gitignore # Git ignore rules +├── composer.json # PHP dependencies ├── requirements.txt # Python dependencies ├── setup.sh # Setup script ├── start_api.sh # API start script ├── README.md # This file +├── CLAUDE.md # Claude Code guidance ├── outputs/ # Transcribed files directory +├── vendor/ # Composer dependencies (NOT in git) └── venv/ # Python virtual environment ``` diff --git a/auth.php b/auth.php new file mode 100644 index 0000000..6e43c4d --- /dev/null +++ b/auth.php @@ -0,0 +1,114 @@ + AZURE_CLIENT_ID, + 'redirectUri' => AZURE_REDIRECT_URI, + 'urlAuthorize' => AZURE_AUTHORITY . '/oauth2/v2.0/authorize', + 'urlAccessToken' => AZURE_AUTHORITY . '/oauth2/v2.0/token', + 'urlResourceOwnerDetails' => 'https://graph.microsoft.com/v1.0/me', + 'scopes' => 'openid profile email User.Read' +]); + +// Step 1: No authorization code - initiate OAuth flow +if (!isset($_GET['code'])) { + + // Generate PKCE code verifier and challenge + $codeVerifier = bin2hex(random_bytes(32)); // 64-character random string + $codeChallenge = rtrim(strtr(base64_encode(hash('sha256', $codeVerifier, true)), '+/', '-_'), '='); + + // Store code verifier in session for later use + $_SESSION['oauth2_code_verifier'] = $codeVerifier; + + // Generate authorization URL with PKCE parameters + $authorizationUrl = $provider->getAuthorizationUrl([ + 'scope' => 'openid profile email User.Read', + 'code_challenge' => $codeChallenge, + 'code_challenge_method' => 'S256', + 'response_type' => 'code', + 'response_mode' => 'query' + ]); + + // Store state for CSRF protection + $_SESSION['oauth2state'] = $provider->getState(); + + // Redirect to Azure AD + header('Location: ' . $authorizationUrl); + exit; +} + +// Step 2: Authorization code received - exchange for access token +elseif (isset($_GET['code'])) { + + // Verify state to prevent CSRF attacks + if (empty($_GET['state']) || (isset($_SESSION['oauth2state']) && $_GET['state'] !== $_SESSION['oauth2state'])) { + unset($_SESSION['oauth2state']); + unset($_SESSION['oauth2_code_verifier']); + die('Invalid state. Possible CSRF attack.'); + } + + try { + // Retrieve code verifier from session + if (!isset($_SESSION['oauth2_code_verifier'])) { + die('Code verifier not found in session.'); + } + + $codeVerifier = $_SESSION['oauth2_code_verifier']; + + // Exchange authorization code for access token with PKCE + $accessToken = $provider->getAccessToken('authorization_code', [ + 'code' => $_GET['code'], + 'code_verifier' => $codeVerifier + ]); + + // Get user information from Microsoft Graph API + $request = $provider->getAuthenticatedRequest( + 'GET', + 'https://graph.microsoft.com/v1.0/me', + $accessToken->getToken() + ); + + $client = new \GuzzleHttp\Client(); + $response = $client->send($request); + $userData = json_decode($response->getBody(), true); + + // Store user information in session + $_SESSION['authenticated'] = true; + $_SESSION['user_id'] = $userData['id']; + $_SESSION['user_name'] = $userData['displayName'] ?? $userData['userPrincipalName']; + $_SESSION['user_email'] = $userData['userPrincipalName'] ?? $userData['mail']; + $_SESSION['access_token'] = $accessToken->getToken(); + $_SESSION['last_activity'] = time(); + + // Initialize user files array for tracking uploads + $_SESSION['user_files'] = []; + + // Clean up temporary session variables + unset($_SESSION['oauth2state']); + unset($_SESSION['oauth2_code_verifier']); + + // Regenerate session ID for security + session_regenerate_id(true); + + // Redirect to main application + header('Location: index.php'); + exit; + + } catch (\League\OAuth2\Client\Provider\Exception\IdentityProviderException $e) { + // Handle authentication errors + die('Authentication failed: ' . htmlspecialchars($e->getMessage())); + } catch (\Exception $e) { + // Handle other errors + die('An error occurred: ' . htmlspecialchars($e->getMessage())); + } +} diff --git a/auth_config.php b/auth_config.php new file mode 100644 index 0000000..ad14464 --- /dev/null +++ b/auth_config.php @@ -0,0 +1,124 @@ +load(); + +// Development Mode Configuration +define('DEV_MODE', filter_var($_ENV['DEV_MODE'] ?? false, FILTER_VALIDATE_BOOLEAN)); + +// Azure AD Configuration +define('AZURE_CLIENT_ID', $_ENV['AZURE_CLIENT_ID'] ?? ''); +define('AZURE_AUTHORITY', $_ENV['AZURE_AUTHORITY'] ?? ''); +define('AZURE_REDIRECT_URI', $_ENV['AZURE_REDIRECT_URI'] ?? ''); + +// Extract tenant ID from authority URL +if (AZURE_AUTHORITY) { + preg_match('/\/([^\/]+)$/', AZURE_AUTHORITY, $matches); + define('AZURE_TENANT_ID', $matches[1] ?? 'common'); +} else { + define('AZURE_TENANT_ID', 'common'); +} + +// Python API Configuration +define('PYTHON_API_URL', $_ENV['PYTHON_API_URL'] ?? 'http://localhost:5010'); + +// DeepL API Configuration +define('DEEPL_API_KEY', $_ENV['DEEPL_API_KEY'] ?? ''); + +// Session Configuration +define('SESSION_TIMEOUT', (int)($_ENV['SESSION_TIMEOUT'] ?? 28800)); // Default: 8 hours + +// Configure secure session settings (only if session hasn't started yet) +if (session_status() === PHP_SESSION_NONE) { + ini_set('session.cookie_httponly', '1'); + // Only require secure cookies in production (not in dev mode on localhost) + ini_set('session.cookie_secure', DEV_MODE ? '0' : '1'); + ini_set('session.cookie_samesite', 'Lax'); + ini_set('session.use_strict_mode', '1'); + ini_set('session.gc_maxlifetime', SESSION_TIMEOUT); +} + +/** + * Check if user is authenticated + * @return bool + */ +function isAuthenticated() { + if (session_status() === PHP_SESSION_NONE) { + session_start(); + } + + // In dev mode, auto-authenticate with mock user + if (DEV_MODE) { + if (!isset($_SESSION['authenticated']) || !$_SESSION['authenticated']) { + // Initialize dev mode session with mock user + $_SESSION['authenticated'] = true; + $_SESSION['user_id'] = 'dev-user-' . uniqid(); + $_SESSION['user_name'] = 'Dev User (Local)'; + $_SESSION['user_email'] = 'dev@localhost'; + $_SESSION['last_activity'] = time(); + $_SESSION['user_files'] = []; + } + return true; + } + + // Check if user is logged in + if (!isset($_SESSION['authenticated']) || !$_SESSION['authenticated']) { + return false; + } + + // Check session timeout + if (isset($_SESSION['last_activity']) && (time() - $_SESSION['last_activity'] > SESSION_TIMEOUT)) { + session_unset(); + session_destroy(); + return false; + } + + // Update last activity time + $_SESSION['last_activity'] = time(); + + return true; +} + +/** + * Require authentication - redirect to login if not authenticated + */ +function requireAuth() { + // In dev mode, authentication is auto-handled by isAuthenticated() + if (DEV_MODE) { + isAuthenticated(); // This will auto-create the session + return; + } + + if (!isAuthenticated()) { + header('Location: login.php'); + exit; + } +} + +/** + * Get current user information + * @return array|null + */ +function getCurrentUser() { + if (session_status() === PHP_SESSION_NONE) { + session_start(); + } + + if (!isAuthenticated()) { + return null; + } + + return [ + 'id' => $_SESSION['user_id'] ?? null, + 'name' => $_SESSION['user_name'] ?? null, + 'email' => $_SESSION['user_email'] ?? null + ]; +} diff --git a/check_api.php b/check_api.php index 7d1a866..985a33f 100644 --- a/check_api.php +++ b/check_api.php @@ -4,7 +4,13 @@ */ require_once 'config.php'; +// Require authentication +requireAuth(); + +$user = getCurrentUser(); + echo "

API Status Check

"; +echo "

Logged in as: " . htmlspecialchars($user['name']) . " (" . htmlspecialchars($user['email']) . ")

"; // Check if Python API is responding $ch = curl_init(PYTHON_API_URL . '/health'); diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..37e30d6 --- /dev/null +++ b/composer.json @@ -0,0 +1,15 @@ +{ + "name": "voice2text/app", + "description": "Voice to Text application with Microsoft SSO", + "type": "project", + "require": { + "php": ">=7.4", + "vlucas/phpdotenv": "^5.5", + "league/oauth2-client": "^2.7" + }, + "autoload": { + "psr-4": { + "Voice2Text\\": "src/" + } + } +} diff --git a/config.php b/config.php index eabdee6..bbf09ca 100644 --- a/config.php +++ b/config.php @@ -1,13 +1,21 @@ @@ -13,6 +19,20 @@ require_once 'config.php';
+ +
+ 🔧 DEV MODE ACTIVE - Authentication Bypassed +
+ + +
+ + Logout +
+
diff --git a/login.php b/login.php new file mode 100644 index 0000000..64f5706 --- /dev/null +++ b/login.php @@ -0,0 +1,183 @@ + + + + + + + Sign In - Voice to Text + + + + + + + diff --git a/logout.php b/logout.php new file mode 100644 index 0000000..9f954e1 --- /dev/null +++ b/logout.php @@ -0,0 +1,28 @@ + false, 'error' => 'Authentication required. Please log in.']); + exit; +} + if ($_SERVER['REQUEST_METHOD'] === 'POST' && isset($_FILES['voiceFile'])) { $file = $_FILES['voiceFile']; $outputFormat = isset($_POST['outputFormat']) ? $_POST['outputFormat'] : 'txt'; @@ -40,6 +46,14 @@ if ($_SERVER['REQUEST_METHOD'] === 'POST' && isset($_FILES['voiceFile'])) { $data = json_decode($response, true); if (isset($data['success']) && $data['success']) { + // Track file ownership in session + if (!isset($_SESSION['user_files'])) { + $_SESSION['user_files'] = []; + } + + // Add original file to user's file list + $_SESSION['user_files'][] = $data['filename']; + // Return content for display and download links $downloadUrl = 'download.php?file=' . urlencode($data['filename']); @@ -62,6 +76,9 @@ if ($_SERVER['REQUEST_METHOD'] === 'POST' && isset($_FILES['voiceFile'])) { // Add translated content if available if (isset($data['translated_filename'])) { + // Add translated file to user's file list + $_SESSION['user_files'][] = $data['translated_filename']; + $response['translatedFileUrl'] = 'download.php?file=' . urlencode($data['translated_filename']); $response['translatedFilename'] = $data['translated_filename']; diff --git a/style.css b/style.css index 19ed5e9..2e1edf1 100755 --- a/style.css +++ b/style.css @@ -29,6 +29,64 @@ input, button, textarea, select, label { animation: fadeIn 0.5s ease-in; } +.dev-mode-banner { + background: #ff9800; + color: #000; + padding: 10px 20px; + border-radius: 8px; + text-align: center; + font-weight: 600; + font-size: 14px; + margin-bottom: 20px; + border: 2px solid #f57c00; +} + +.user-header { + display: flex; + justify-content: space-between; + align-items: center; + padding: 15px 20px; + background: #0a0a0a; + border-radius: 12px; + border: 1px solid #333; + margin-bottom: 25px; +} + +.user-info { + display: flex; + flex-direction: column; + gap: 4px; +} + +.user-name { + color: #FFC407; + font-weight: 600; + font-size: 15px; +} + +.user-email { + color: #999; + font-size: 13px; +} + +.logout-btn { + padding: 8px 20px; + background: transparent; + color: #FFC407; + border: 2px solid #FFC407; + border-radius: 20px; + text-decoration: none; + font-weight: 600; + font-size: 14px; + transition: all 0.3s ease; +} + +.logout-btn:hover { + background: #FFC407; + color: #000; + transform: translateY(-1px); +} + @keyframes fadeIn { from { opacity: 0; diff --git a/test_download.php b/test_download.php index 7377142..d3a0c97 100644 --- a/test_download.php +++ b/test_download.php @@ -2,10 +2,34 @@ /** * Test download functionality */ +require_once 'config.php'; + +// Require authentication +requireAuth(); + +$user = getCurrentUser(); + +echo "

Download Test

"; +echo "

Logged in as: " . htmlspecialchars($user['name']) . " (" . htmlspecialchars($user['email']) . ")

"; + +// Show user's accessible files +echo "

Your Files (accessible for download):

"; +if (isset($_SESSION['user_files']) && count($_SESSION['user_files']) > 0) { + echo "
    "; + foreach ($_SESSION['user_files'] as $file) { + echo "
  • "; + echo "" . htmlspecialchars($file) . "
    "; + echo "Download"; + echo "

  • "; + } + echo "
"; +} else { + echo "

No files uploaded yet in this session.

"; +} // List all files in outputs directory $outputDir = __DIR__ . '/outputs/'; -echo "

Files in outputs directory:

"; +echo "

All Files in outputs directory:

"; echo "
    "; if (is_dir($outputDir)) {