'Invalid document_key format']); exit; } // Make sure we have the target blob name if (!isset($documentKeyData['target_blob'])) { logMessage("Missing target_blob in document_key", 'ERROR'); echo json_encode(['error' => 'Missing target blob information']); exit; } $targetBlobName = $documentKeyData['target_blob']; // Set timeout for downloads set_time_limit(60); // Variable to store file content $fileContent = false; // Initialize Azure Storage Helper $azureStorage = new AzureStorageHelper(); // Before downloading, check the status of the translation $statusUrl = MS_API_ENDPOINT . "/translator/document/batches/$document_id?api-version=" . MS_API_VERSION; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $statusUrl); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_HTTPHEADER, [ 'Ocp-Apim-Subscription-Key: ' . MS_API_KEY, 'Ocp-Apim-Subscription-Region: ' . MS_API_REGION ]); $response = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($httpCode !== 200 || $response === false) { logMessage("Failed to check translation status. HTTP Code: $httpCode", 'ERROR'); } else { $statusData = json_decode($response, true); if (isset($statusData['status']) && strtolower($statusData['status']) !== 'succeeded') { logMessage("Translation status is not Succeeded: " . $statusData['status'] . ". Attempting download anyway.", 'WARNING'); } else { logMessage("Translation has Succeeded status. Proceeding with download."); } } // DIRECT APPROACH: List all files in the Azure translated container and match to our target logMessage("USING DIRECT AZURE LISTING to find files for document ID: $document_id"); // Extract the target language and other needed info $fileExtension = ''; if (preg_match('/\.([^\.]+)$/', $original_filename, $extMatches)) { $fileExtension = strtolower($extMatches[1]); } logMessage("Original filename: $original_filename, Extension: $fileExtension, Target Language: $target_lang"); // Get info from document key $documentKeyData = json_decode($document_key, true); $sourceBlobName = isset($documentKeyData['source_blob']) ? $documentKeyData['source_blob'] : ''; $targetBlobName = isset($documentKeyData['target_blob']) ? $documentKeyData['target_blob'] : ''; logMessage("Source blob: $sourceBlobName, Target blob: $targetBlobName"); // Extract the basic filename without our added prefix $baseFilename = ''; if (preg_match('/^.*?\-([A-Z]+)_(.+)$/', $targetBlobName, $matches)) { $baseFilename = $matches[2]; // This is just the original filename logMessage("Base filename extracted from target blob: $baseFilename"); } // List all files in the translated container $sasToken = defined('AZURE_STORAGE_SAS_TOKEN') ? AZURE_STORAGE_SAS_TOKEN : ''; $accountName = 'opticaltranslations'; // Hardcoding for simplicity $containerUrl = "https://$accountName.blob.core.windows.net/translated-documents?restype=container&comp=list&$sasToken"; logMessage("Listing all files in translated container"); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $containerUrl); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); $listResponse = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); $fileContent = false; $directDownloadUrl = null; if ($listResponse !== false && $httpCode < 400) { logMessage("Successfully listed container contents"); // Parse XML $xml = simplexml_load_string($listResponse); if ($xml && isset($xml->Blobs->Blob)) { $potentialMatches = []; $exactMatches = []; // Go through all blobs and find potential matches foreach ($xml->Blobs->Blob as $blob) { $name = (string)$blob->Name; $lastModified = (string)$blob->Properties->{'Last-Modified'}; $timestamp = strtotime($lastModified); $size = (int)$blob->Properties->{'Content-Length'}; logMessage("Found blob: $name (size: $size bytes, modified: $lastModified)"); // Check for exact match - this is the file we uploaded if ($name === $targetBlobName) { logMessage("EXACT MATCH FOUND: $name"); $exactMatches[$name] = $timestamp; } // Check for matching language code if ((strpos($name, strtolower($target_lang)) !== false || strpos($name, strtoupper($target_lang)) !== false) && $size > 0) { logMessage("Language match found: $name"); $potentialMatches[$name] = $timestamp; } // Check for matching original filename (ignoring language prefix) if (!empty($baseFilename) && strpos($name, $baseFilename) !== false && $size > 0) { logMessage("Filename match found: $name"); $potentialMatches[$name] = $timestamp; } // Check for matching file extension if (!empty($fileExtension) && (strpos($name, ".$fileExtension") !== false) && $size > 0) { logMessage("Extension match found: $name"); $potentialMatches[$name] = $timestamp; } } // Sort matches by timestamp, most recent first if (!empty($exactMatches)) { arsort($exactMatches); $matchedFile = key($exactMatches); logMessage("Using exact match file: $matchedFile"); } else if (!empty($potentialMatches)) { arsort($potentialMatches); $matchedFile = key($potentialMatches); logMessage("Using potential match file: $matchedFile"); } else { logMessage("No matching files found", 'WARNING'); $matchedFile = null; } // If we found a matching file, try to download it if ($matchedFile) { $encodedName = rawurlencode($matchedFile); $directDownloadUrl = "https://$accountName.blob.core.windows.net/translated-documents/$encodedName?$sasToken"; logMessage("Direct download URL: " . substr($directDownloadUrl, 0, 100) . "..."); // Get file content $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $directDownloadUrl); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_TIMEOUT, 60); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_BINARYTRANSFER, true); // Always use binary mode $fileContent = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); $finalContentType = curl_getinfo($ch, CURLINFO_CONTENT_TYPE); logMessage("Download result: HTTP $httpCode, Content-Type: $finalContentType, Size: " . strlen($fileContent) . " bytes"); // Store detected content type $GLOBALS['detected_content_type'] = $finalContentType; $GLOBALS['matched_filename'] = $matchedFile; curl_close($ch); } } } else { logMessage("Failed to list container contents. HTTP: $httpCode", 'ERROR'); } if ($fileContent !== false) { logMessage("Successfully downloaded file. Size: " . strlen($fileContent) . " bytes"); } else { logMessage("Failed to download file using direct Azure listing. Falling back to normal method.", 'WARNING'); // Fall back to normal download method $fileContent = false; // Make document ID available globally for the fallback method $GLOBALS['document_id'] = $document_id; $GLOBALS['original_filename'] = $original_filename; // Use the original download method as fallback $fileContent = $azureStorage->downloadTranslatedFile($targetBlobName); } // We've already handled the fallback above if ($fileContent === false) { logMessage("Failed to download translated file from Azure Storage", 'ERROR'); // Try to download from source as fallback if (isset($documentKeyData['source_blob'])) { logMessage("Attempting to download source file as fallback", 'WARNING'); $sourceBlobName = $documentKeyData['source_blob']; $sourceContent = $azureStorage->downloadTranslatedFile($sourceBlobName); if ($sourceContent !== false) { logMessage("Downloaded source file as fallback"); $fileContent = "[TRANSLATION FAILED - SHOWING ORIGINAL CONTENT]\n\n" . $sourceContent; } else { echo json_encode(['error' => 'Failed to download translated file and source fallback']); exit; } } else { echo json_encode(['error' => 'Failed to download translated file']); exit; } } // If we have file content by this point, proceed with download if ($fileContent !== false) { // Create new filename with language code prefix $fileInfo = pathinfo($original_filename); $newFilename = strtoupper($target_lang) . '_' . $fileInfo['filename'] . '.' . $fileInfo['extension']; // Use the matched filename if available to determine content type $contentType = 'application/octet-stream'; // Default content type $extension = strtolower($fileInfo['extension']); // If we matched a file from Azure storage, use its extension if (isset($GLOBALS['matched_filename']) && !empty($GLOBALS['matched_filename'])) { $matchedFilename = $GLOBALS['matched_filename']; logMessage("Using matched filename for content type detection: $matchedFilename"); // Extract extension from matched filename if (preg_match('/\.([^\.]+)$/', $matchedFilename, $matchedExtMatches)) { $matchedExtension = strtolower($matchedExtMatches[1]); logMessage("Matched file extension: $matchedExtension"); // If the matched extension is different from requested, adjust the filename if ($matchedExtension !== $extension) { logMessage("Matched file extension ($matchedExtension) differs from requested ($extension). Adjusting filename.", 'WARNING'); $newFilename = $fileInfo['filename'] . '.' . $matchedExtension; } // Set content type based on matched extension switch ($matchedExtension) { case 'pdf': $contentType = 'application/pdf'; break; case 'docx': case 'doc': $contentType = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'; break; case 'pptx': case 'ppt': $contentType = 'application/vnd.openxmlformats-officedocument.presentationml.presentation'; break; case 'xlsx': case 'xls': $contentType = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'; break; case 'txt': $contentType = 'text/plain'; break; case 'html': case 'htm': $contentType = 'text/html'; break; } } } // Check if we detected a content type from the direct download else if (isset($GLOBALS['detected_content_type']) && !empty($GLOBALS['detected_content_type'])) { $contentType = $GLOBALS['detected_content_type']; logMessage("Using detected Content-Type from server: $contentType"); // If Microsoft translated to PDF, but we're requesting a different file type, // adjust the filename extension to match the content if ($contentType === 'application/pdf' && $extension !== 'pdf') { logMessage("Content is PDF but requested extension is $extension. Adjusting filename extension to .pdf", 'WARNING'); $newFilename = $fileInfo['filename'] . '.pdf'; } } // Fallback to the original extension else { logMessage("Fallback to original file extension: $extension"); switch ($extension) { case 'pdf': $contentType = 'application/pdf'; break; case 'docx': case 'doc': $contentType = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'; break; case 'pptx': case 'ppt': $contentType = 'application/vnd.openxmlformats-officedocument.presentationml.presentation'; break; case 'xlsx': case 'xls': $contentType = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'; break; case 'txt': $contentType = 'text/plain'; break; case 'html': case 'htm': $contentType = 'text/html'; break; } } // Basic content type detection from file content if ($contentType === 'application/octet-stream') { // Try to detect content type from file contents if (substr($fileContent, 0, 4) === '%PDF') { logMessage("PDF signature detected in content, setting content type to application/pdf", 'WARNING'); $contentType = 'application/pdf'; // If the file doesn't have .pdf extension, adjust it if ($extension !== 'pdf') { $newFilename = $fileInfo['filename'] . '.pdf'; } } else if (substr($fileContent, 0, 4) === 'PK\x03\x04') { // Office files are zip files if (strpos($original_filename, '.pptx') !== false || strpos($original_filename, '.ppt') !== false) { $contentType = 'application/vnd.openxmlformats-officedocument.presentationml.presentation'; } else if (strpos($original_filename, '.docx') !== false || strpos($original_filename, '.doc') !== false) { $contentType = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'; } else if (strpos($original_filename, '.xlsx') !== false || strpos($original_filename, '.xls') !== false) { $contentType = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'; } } } // For binary files like PDF and PowerPoint, ensure proper handling $isBinary = in_array($extension, ['pdf', 'pptx', 'ppt', 'docx', 'doc', 'xlsx', 'xls']); // Check for valid binary content for PDFs and PowerPoint files if ($isBinary) { // Basic binary file validation - check for file signatures if ($extension === 'pdf' && substr($fileContent, 0, 4) !== '%PDF') { logMessage("Warning: PDF file doesn't have proper signature", 'WARNING'); } else if (($extension === 'pptx' || $extension === 'docx') && substr($fileContent, 0, 4) !== 'PK\x03\x04') { logMessage("Warning: Office file doesn't have proper ZIP signature", 'WARNING'); } // Ensure we're not sending text content for binary files if (preg_match('/^\[EMERGENCY FALLBACK/', $fileContent)) { logMessage("Error: Emergency fallback content detected for binary file", 'ERROR'); // Still proceed with download, but notify in logs } // Log file size details logMessage("Binary file download: $contentType, size: " . strlen($fileContent) . " bytes"); } // Send the file to the client header("Content-Type: $contentType"); header("Content-Disposition: attachment; filename=\"$newFilename\""); header("Content-Length: " . strlen($fileContent)); header("Cache-Control: no-cache, must-revalidate"); header("Pragma: no-cache"); echo $fileContent; logMessage("File download initiated: $newFilename"); // Flush output to the client, then send webhook in the background // This ensures the user gets their download immediately if (function_exists('fastcgi_finish_request')) { // PHP-FPM: cleanly closes the connection to the client fastcgi_finish_request(); } else { // Fallback for non-FPM setups (e.g. local development) ignore_user_abort(true); if (ob_get_level() > 0) { ob_end_flush(); } flush(); } // Now send the webhook data in the background (client already has their file) $webhookData = [ 'tool' => 'DOCUMENT_TRANSLATION', 'date' => date('Y-m-d H:i:s'), 'user' => 'user@example.com', // Replace with actual user information if available 'model' => 'Microsoft Translator', 'settings' => 'Document Translation', 'subTool' => 'Microsoft Translator API', 'originalFilename' => $original_filename, 'translatedFilename' => $newFilename, 'targetLanguage' => $target_lang, 'documentId' => $document_id, 'file' => base64_encode($fileContent) ]; sendDataToWebhook($webhookData); } else { echo json_encode(['error' => 'Failed to retrieve file content']); } } else { logMessage("Missing required parameters", 'ERROR'); echo json_encode(['error' => 'Missing required parameters (document_id, document_key, original_filename, or target_lang)']); } function sendDataToWebhook($data) { if (!defined('DATASTORE_WEBHOOK') || empty(DATASTORE_WEBHOOK)) { logMessage('Webhook URL not defined. Skipping webhook notification.', 'WARNING'); return; } $url = DATASTORE_WEBHOOK; $headers = ['Content-Type: application/json']; $payload = json_encode($data); $ch = curl_init($url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_POST, true); curl_setopt($ch, CURLOPT_POSTFIELDS, $payload); curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); $response = curl_exec($ch); $statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); $error = curl_error($ch); curl_close($ch); if ($statusCode === 200) { logMessage('Data sent to webhook successfully'); } else { logMessage('Error sending data to webhook: HTTP ' . $statusCode . ', cURL error: ' . $error . ', payload size: ' . strlen($payload) . ' bytes', 'ERROR'); } } ?>