connectionString = AZURE_STORAGE_CONNECTION_STRING; $this->sourceContainer = AZURE_STORAGE_CONTAINER_SOURCE; $this->targetContainer = AZURE_STORAGE_CONTAINER_TARGET; } /** * Upload a file to the source container in Azure Blob Storage * * @param string $filePath Path to the temporary file * @param string $fileName Name of the file * @return array|bool Returns array with blob URL and SAS token on success, false on failure */ public function uploadSourceFile($filePath, $fileName) { logMessage("Uploading file to Azure Storage: $fileName from $filePath"); try { // Create a unique blob name to avoid collisions $blobName = uniqid() . '-' . $fileName; logMessage("Generated blob name: $blobName"); // Verify file exists and is readable if (!file_exists($filePath)) { logMessage("Error: File does not exist at path: $filePath", 'ERROR'); return false; } if (!is_readable($filePath)) { logMessage("Error: File is not readable at path: $filePath", 'ERROR'); return false; } $fileSize = filesize($filePath); logMessage("File size: $fileSize bytes"); if ($fileSize <= 0) { logMessage("Error: File is empty or could not determine file size", 'ERROR'); return false; } // First save a local copy for backup $localStorageDir = 'local_storage/' . $this->sourceContainer; if (!file_exists($localStorageDir)) { if (!mkdir($localStorageDir, 0777, true)) { logMessage("Failed to create local storage directory: $localStorageDir", 'ERROR'); return false; } } $localFilePath = $localStorageDir . '/' . $blobName; if (!copy($filePath, $localFilePath)) { logMessage("Failed to copy file to local storage: $localFilePath", 'ERROR'); return false; } logMessage("File saved to local storage: $localFilePath"); // Now upload to Azure Blob Storage using SAS token // Get the SAS token from config $sasToken = AZURE_STORAGE_SAS_TOKEN; // Extract account name from connection string preg_match('/AccountName=([^;]+)/', $this->connectionString, $nameMatches); $accountName = !empty($nameMatches[1]) ? $nameMatches[1] : 'opticaltranslations'; // URL encode the blob name to handle spaces and special characters $encodedBlobName = rawurlencode($blobName); $blobUrl = "https://$accountName.blob.core.windows.net/{$this->sourceContainer}/$encodedBlobName"; $uploadUrl = $blobUrl . '?' . $sasToken; logMessage("Uploading file to Azure Blob Storage: $blobUrl"); // Get file mime type $finfo = finfo_open(FILEINFO_MIME_TYPE); $contentType = finfo_file($finfo, $filePath); finfo_close($finfo); // Read file content $fileContent = file_get_contents($filePath); // Upload using cURL $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $uploadUrl); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT'); curl_setopt($ch, CURLOPT_POSTFIELDS, $fileContent); curl_setopt($ch, CURLOPT_HTTPHEADER, [ 'x-ms-blob-type: BlockBlob', 'Content-Type: ' . $contentType, 'Content-Length: ' . strlen($fileContent) ]); $response = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($httpCode >= 200 && $httpCode < 300) { logMessage("Successfully uploaded file to Azure Blob Storage. HTTP: $httpCode"); } else { logMessage("Failed to upload file to Azure. HTTP: $httpCode, Response: $response", 'ERROR'); } // Extract account name from connection string preg_match('/AccountName=([^;]+)/', $this->connectionString, $nameMatches); $accountName = !empty($nameMatches[1]) ? $nameMatches[1] : 'opticaltranslations'; // Use the provided SAS token from config $sasToken = defined('AZURE_STORAGE_SAS_TOKEN') ? AZURE_STORAGE_SAS_TOKEN : $this->generateSasToken($this->sourceContainer, '', 'racwdl'); // Generate blob URL $blobUrl = "https://$accountName.blob.core.windows.net/{$this->sourceContainer}"; logMessage("Using container-level access for translation API"); logMessage("Container URL: $blobUrl"); logMessage("Using Blob Name in prefix filter: $blobName"); return [ 'blob_url' => $blobUrl, 'sas_token' => $sasToken, 'blob_name' => $blobName ]; } catch (Exception $e) { logMessage("Error processing file: " . $e->getMessage() . "\nStack trace: " . $e->getTraceAsString(), 'ERROR'); return false; } } /** * Get the target URL with SAS token for the translation result * * @param string $fileName Original file name * @param string $targetLang Target language * @return array|bool Returns array with blob URL and SAS token on success, false on failure */ public function getTargetBlob($fileName, $targetLang) { logMessage("Setting up target blob for: $fileName, language: $targetLang"); try { // Create a unique blob name for the translated file $blobName = uniqid() . '-' . strtoupper($targetLang) . '_' . $fileName; logMessage("Generated target blob name: $blobName"); // Ensure local storage directory exists for backup logMessage("Setting up local storage directory for backup"); // Create the target directory $localStorageDir = 'local_storage/' . $this->targetContainer; if (!file_exists($localStorageDir)) { if (!mkdir($localStorageDir, 0777, true)) { logMessage("Failed to create local target directory: $localStorageDir", 'ERROR'); return false; } logMessage("Created local target directory: $localStorageDir"); } // Use the provided SAS token from config for target container $sasToken = defined('AZURE_STORAGE_SAS_TOKEN') ? AZURE_STORAGE_SAS_TOKEN : $this->generateSasToken($this->targetContainer, '', 'racwdl'); // Set up the container URL for Document Translation service // Extract account name from connection string preg_match('/AccountName=([^;]+)/', $this->connectionString, $nameMatches); $accountName = !empty($nameMatches[1]) ? $nameMatches[1] : 'opticaltranslations'; $containerUrl = "https://$accountName.blob.core.windows.net/{$this->targetContainer}"; logMessage("Using container-level access for translation API"); logMessage("Target container URL: $containerUrl"); logMessage("Using target blob name: $blobName"); logMessage("Generated target SAS token (partial): " . substr($sasToken, 0, 20) . "..."); logMessage("Target blob setup successful"); return [ 'blob_url' => $containerUrl, // Container-level URL for Document Translation 'sas_token' => $sasToken, 'blob_name' => $blobName ]; } catch (Exception $e) { logMessage("Error setting up target blob: " . $e->getMessage() . "\nStack trace: " . $e->getTraceAsString(), 'ERROR'); return false; } } /** * Download a translated file from the target container * * @param string $blobName Name of the blob * @return string|bool Returns file content on success, false on failure */ public function downloadTranslatedFile($blobName) { logMessage("Downloading translated file: $blobName"); try { // Extract target language and original filename for Microsoft's naming convention preg_match('/^.*?\-([A-Z]+)_(.+)$/', $blobName, $matches); $targetLang = isset($matches[1]) ? $matches[1] : ''; $origFilename = isset($matches[2]) ? $matches[2] : ''; // Detect file extension for specialized handling $fileExtension = ''; if (preg_match('/\.([^\.]+)$/', $blobName, $extMatches)) { $fileExtension = strtolower($extMatches[1]); } logMessage("File extension detected: $fileExtension"); // Extract original filename without extension $filenameWithoutExt = preg_replace('/\.[^\.]+$/', '', $origFilename); // Try all Microsoft's possible naming patterns, prioritizing exact match first $possibleTargetNames = [ $blobName, // Original name (HIGHEST PRIORITY) ]; // Log the exact blob name we're looking for first logMessage("HIGHEST PRIORITY: Looking for exact blob name match: $blobName"); // Add other possibilities as fallbacks $otherPossibilities = [ strtolower($targetLang) . '/' . $origFilename, // langcode/filename $targetLang . '/' . $origFilename, // LANGCODE/filename $origFilename . '.' . strtolower($targetLang), // filename.langcode $origFilename . '.' . $targetLang, // filename.LANGCODE // Additional Microsoft patterns strtolower($targetLang), // Just the language folder $targetLang, // Just the language folder (uppercase) $origFilename, // Just the original filename without language ]; // Add file type specific patterns if ($fileExtension === 'pdf') { // Special PDF patterns $pdfPatterns = [ // Microsoft often translates PDFs differently $origFilename . '.translated.pdf', $filenameWithoutExt . '.translated.pdf', $filenameWithoutExt . '_' . strtolower($targetLang) . '.pdf', $filenameWithoutExt . '_' . $targetLang . '.pdf', 'translated_' . $origFilename, 'translated/' . $origFilename, strtolower($targetLang) . '/' . $filenameWithoutExt . '.pdf', $targetLang . '/' . $filenameWithoutExt . '.pdf', // Microsoft sometimes creates a nested folder structure 'translated/' . strtolower($targetLang) . '/' . $origFilename, 'translated/' . $targetLang . '/' . $origFilename, ]; $otherPossibilities = array_merge($pdfPatterns, $otherPossibilities); } else if ($fileExtension === 'pptx' || $fileExtension === 'ppt') { // Special PowerPoint patterns $pptPatterns = [ // Microsoft often puts PowerPoint files in specific folders 'slides/' . $origFilename, 'presentations/' . $origFilename, $filenameWithoutExt . '_' . strtolower($targetLang) . '.pptx', $filenameWithoutExt . '_' . $targetLang . '.pptx', $filenameWithoutExt . '_translated.pptx', 'translated_' . $origFilename, // Microsoft sometimes changes the extension from ppt to pptx $filenameWithoutExt . '.pptx', // Check for language-specific folders strtolower($targetLang) . '/' . $origFilename, $targetLang . '/' . $origFilename, strtolower($targetLang) . '/' . $filenameWithoutExt . '.pptx', $targetLang . '/' . $filenameWithoutExt . '.pptx', ]; $otherPossibilities = array_merge($pptPatterns, $otherPossibilities); } else { // Generic additional patterns for other file types $genericPatterns = [ $origFilename . '.txt', // Text file output $origFilename . '.docx', // Word document output $origFilename . '.pdf', // PDF output $filenameWithoutExt . '.docx', // Word document with different extension $filenameWithoutExt . '.txt', // Text with different extension $filenameWithoutExt . '_' . strtolower($targetLang) . '.' . $fileExtension, $filenameWithoutExt . '_' . $targetLang . '.' . $fileExtension, ]; $otherPossibilities = array_merge($genericPatterns, $otherPossibilities); } // Add other possibilities to the list $possibleTargetNames = array_merge($possibleTargetNames, $otherPossibilities); logMessage("Looking for possible translated file names: " . implode(', ', $possibleTargetNames)); // Check if we have a local translated file first $localTargetPath = 'local_storage/' . $this->targetContainer . '/' . $blobName; if (file_exists($localTargetPath)) { logMessage("Found existing local translated file at: $localTargetPath"); $fileContent = file_get_contents($localTargetPath); if ($fileContent === false) { logMessage("Error reading local translated file: $localTargetPath", 'ERROR'); } else { logMessage("Local translated file read successfully"); return $fileContent; } } // DIRECT DOWNLOAD ATTEMPT FIRST - this should work if file exists with the exact same name logMessage("PRIORITY DIRECT DOWNLOAD ATTEMPT for: $blobName"); // File type detection to handle different file types appropriately $fileExtension = ''; if (preg_match('/\.([^\.]+)$/', $blobName, $extMatches)) { $fileExtension = strtolower($extMatches[1]); } logMessage("File extension detected: $fileExtension"); // Use the provided SAS token $sasToken = defined('AZURE_STORAGE_SAS_TOKEN') ? AZURE_STORAGE_SAS_TOKEN : $this->generateSasToken($this->targetContainer, $blobName, 'r'); // Extract account name from connection string preg_match('/AccountName=([^;]+)/', $this->connectionString, $nameMatches); $accountName = !empty($nameMatches[1]) ? $nameMatches[1] : 'opticaltranslations'; // URL encode the blob name to handle spaces and special characters $encodedName = rawurlencode($blobName); $directBlobUrl = "https://$accountName.blob.core.windows.net/{$this->targetContainer}/$encodedName"; $directDownloadUrl = $directBlobUrl . '?' . $sasToken; logMessage("DIRECT DOWNLOAD URL (partial): " . substr($directDownloadUrl, 0, 60) . "..."); // Use cURL to download directly - handle binary files properly $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $directDownloadUrl); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_TIMEOUT, 60); // Increase timeout for larger files curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // Disable SSL verification for testing curl_setopt($ch, CURLOPT_VERBOSE, true); // Enable verbose output for troubleshooting curl_setopt($ch, CURLOPT_HEADER, false); // Don't include headers in output // Special handling for PDF and PPTX files (binary data) if ($fileExtension === 'pdf' || $fileExtension === 'pptx' || $fileExtension === 'ppt') { logMessage("Setting binary transfer mode for $fileExtension file"); curl_setopt($ch, CURLOPT_BINARYTRANSFER, true); } $fileContent = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); $error = curl_error($ch); $curlInfo = curl_getinfo($ch); curl_close($ch); logMessage("DIRECT DOWNLOAD CURL INFO: " . json_encode($curlInfo)); if ($fileContent !== false && $httpCode < 400) { logMessage("DIRECT DOWNLOAD SUCCESSFUL! Size: " . strlen($fileContent) . " bytes"); // Save a copy locally if (!file_exists(dirname($localTargetPath))) { mkdir(dirname($localTargetPath), 0777, true); } if (file_put_contents($localTargetPath, $fileContent)) { logMessage("Saved direct download to local storage: $localTargetPath"); } return $fileContent; } else { logMessage("DIRECT DOWNLOAD FAILED. HTTP: $httpCode, Error: $error", 'WARNING'); } // If direct download failed, continue with other approaches // Special handling for binary files (PDF, PPTX) which are known to cause issues if (isset($GLOBALS['is_binary_file']) && $GLOBALS['is_binary_file'] === true) { $originalFilename = isset($GLOBALS['original_filename']) ? $GLOBALS['original_filename'] : ''; $fileExtension = isset($GLOBALS['file_extension']) ? $GLOBALS['file_extension'] : ''; logMessage("SPECIAL BINARY FILE HANDLING: For $fileExtension file: $originalFilename"); // For binary files, we'll do a complete scan of the container looking // for any files that might match our extension or original name // Extract filename without extension $filenameWithoutExt = ''; if (!empty($originalFilename)) { $filenameWithoutExt = preg_replace('/\.[^\.]+$/', '', $originalFilename); logMessage("Filename without extension: $filenameWithoutExt"); } } // List all files in the target container to find what Microsoft actually created $sasToken = defined('AZURE_STORAGE_SAS_TOKEN') ? AZURE_STORAGE_SAS_TOKEN : $this->generateSasToken($this->targetContainer, '', 'rl'); // Extract account name from connection string preg_match('/AccountName=([^;]+)/', $this->connectionString, $nameMatches); $accountName = !empty($nameMatches[1]) ? $nameMatches[1] : 'opticaltranslations'; // First try listing the container contents to see what files are there $containerUrl = "https://$accountName.blob.core.windows.net/{$this->targetContainer}?restype=container&comp=list&$sasToken"; logMessage("Listing container contents to find translated file. URL (partial): " . substr($containerUrl, 0, 60) . "..."); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $containerUrl); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_TIMEOUT, 30); $listResponse = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($listResponse !== false && $httpCode < 400) { logMessage("Successfully listed container contents. Response size: " . strlen($listResponse) . " bytes"); // Parse the XML response to find blob names $xml = simplexml_load_string($listResponse); if ($xml) { $recentFiles = []; $binaryFileMatches = []; // Check each blob entry foreach ($xml->Blobs->Blob as $blob) { $name = (string)$blob->Name; $lastModified = (string)$blob->Properties->{'Last-Modified'}; $timestamp = strtotime($lastModified); // Add to our list of files with their timestamps $recentFiles[$name] = $timestamp; logMessage("Found blob in container: $name (modified: $lastModified)"); // Special handling for binary files (PDF, PPTX) if (isset($GLOBALS['is_binary_file']) && $GLOBALS['is_binary_file'] === true) { $originalFilename = isset($GLOBALS['original_filename']) ? $GLOBALS['original_filename'] : ''; $fileExtension = isset($GLOBALS['file_extension']) ? $GLOBALS['file_extension'] : ''; $filenameWithoutExt = isset($filenameWithoutExt) ? $filenameWithoutExt : ''; // Check for matching extension if (!empty($fileExtension) && (strpos($name, ".$fileExtension") !== false || strpos($name, ".".strtoupper($fileExtension)) !== false)) { logMessage("Found matching extension ($fileExtension) in file: $name"); $binaryFileMatches[$name] = $timestamp; } // Check for matching filename (full or partial) if (!empty($originalFilename) && (strpos($name, $originalFilename) !== false || (!empty($filenameWithoutExt) && strpos($name, $filenameWithoutExt) !== false))) { logMessage("Found matching filename part in: $name"); $binaryFileMatches[$name] = $timestamp; } // Check for target language folder/prefix if (!empty($targetLang) && (strpos($name, strtolower($targetLang) . '/') === 0 || strpos($name, $targetLang . '/') === 0)) { logMessage("Found matching language prefix for $targetLang in file: $name"); $binaryFileMatches[$name] = $timestamp; } } } // Sort by timestamp, most recent first arsort($recentFiles); // For binary files, add the specific matches first if (!empty($binaryFileMatches)) { arsort($binaryFileMatches); logMessage("Found " . count($binaryFileMatches) . " potential binary file matches"); // Add binary matches to the beginning of the list foreach (array_keys($binaryFileMatches) as $matchedFile) { if (!in_array($matchedFile, $possibleTargetNames)) { array_unshift($possibleTargetNames, $matchedFile); logMessage("Added PRIORITY binary file match: $matchedFile"); } } } // Add recently created files to our possible names list foreach (array_keys($recentFiles) as $recentFile) { if (!in_array($recentFile, $possibleTargetNames)) { array_push($possibleTargetNames, $recentFile); logMessage("Added recent file to check list: $recentFile"); } } } } else { logMessage("Failed to list container contents. HTTP: $httpCode", 'WARNING'); } // Try to download each possible file foreach ($possibleTargetNames as $possibleName) { // URL encode the possible name to handle spaces and special characters $encodedName = rawurlencode($possibleName); $blobUrl = "https://$accountName.blob.core.windows.net/{$this->targetContainer}/$encodedName"; $downloadUrl = $blobUrl . '?' . $sasToken; logMessage("Attempting to download from Azure. URL (partial): " . substr($downloadUrl, 0, 60) . "..."); // Use cURL to download - handle binary files properly $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $downloadUrl); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_TIMEOUT, 60); // Increase timeout for larger files curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // Disable SSL verification curl_setopt($ch, CURLOPT_HEADER, false); // Don't include headers in output // Special handling for PDF and PPTX files (binary data) if ($fileExtension === 'pdf' || $fileExtension === 'pptx' || $fileExtension === 'ppt' || strpos($possibleName, '.pdf') !== false || strpos($possibleName, '.pptx') !== false || strpos($possibleName, '.ppt') !== false) { curl_setopt($ch, CURLOPT_BINARYTRANSFER, true); } $fileContent = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); $error = curl_error($ch); curl_close($ch); if ($fileContent !== false && $httpCode < 400) { logMessage("Successfully downloaded from Azure. Size: " . strlen($fileContent) . " bytes"); // Save a copy locally if (!file_exists(dirname($localTargetPath))) { mkdir(dirname($localTargetPath), 0777, true); } if (file_put_contents($localTargetPath, $fileContent)) { logMessage("Saved Azure file to local storage: $localTargetPath"); } return $fileContent; } logMessage("Azure download failed for '$possibleName'. HTTP: $httpCode", 'WARNING'); } // If all else fails - check if we can directly access Microsoft's document translation result try { logMessage("Attempting to access translation result directly via Microsoft API"); // Get the document ID from the global variable (set in download.php) or from $_GET $documentId = $GLOBALS['document_id'] ?? $_GET['document_id'] ?? ''; if (!empty($documentId)) { $documentUrl = MS_API_ENDPOINT . "/translator/document/batches/$documentId/documents?api-version=" . MS_API_VERSION; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $documentUrl); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_HTTPHEADER, [ 'Ocp-Apim-Subscription-Key: ' . MS_API_KEY, 'Ocp-Apim-Subscription-Region: ' . MS_API_REGION ]); $documentResponse = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($documentResponse !== false && $httpCode < 400) { $documentData = json_decode($documentResponse, true); logMessage("Successfully retrieved document details from Microsoft. Found " . count($documentData['value']) . " documents"); // Look for a document that has a targetUrl foreach ($documentData['value'] as $document) { if (isset($document['targetUrl']) && !empty($document['targetUrl'])) { $directUrl = $document['targetUrl']; logMessage("Found direct target URL from Microsoft: " . substr($directUrl, 0, 60) . "..."); // Try to download it directly $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $directUrl); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_TIMEOUT, 30); $directContent = curl_exec($ch); $directHttpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($directContent !== false && $directHttpCode < 400) { logMessage("Successfully downloaded translated file directly from Microsoft URL. Size: " . strlen($directContent) . " bytes"); // Check file signatures $contentType = ''; // Log beginning of file for debugging $fileStart = substr($directContent, 0, 20); $fileStartHex = bin2hex(substr($directContent, 0, 20)); logMessage("File starts with: " . $fileStartHex); // Check for PDF signature if (substr($directContent, 0, 4) === '%PDF') { logMessage("PDF signature detected in content"); $contentType = 'application/pdf'; } // Check for Office file signature (ZIP format) else if (substr($directContent, 0, 4) === 'PK'.chr(3).chr(4)) { logMessage("Office file (ZIP) signature detected"); if (isset($GLOBALS['file_extension'])) { if ($GLOBALS['file_extension'] === 'pptx' || $GLOBALS['file_extension'] === 'ppt') { $contentType = 'application/vnd.openxmlformats-officedocument.presentationml.presentation'; } else if ($GLOBALS['file_extension'] === 'docx' || $GLOBALS['file_extension'] === 'doc') { $contentType = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'; } } } if ($contentType) { logMessage("Content type detected from file signature: $contentType"); } else { logMessage("Could not detect content type from file signature"); } // Save a copy locally if (!file_exists(dirname($localTargetPath))) { mkdir(dirname($localTargetPath), 0777, true); } if (file_put_contents($localTargetPath, $directContent)) { logMessage("Saved direct download to local storage: $localTargetPath"); } return $directContent; } logMessage("Failed to download from direct URL. HTTP: $directHttpCode", 'WARNING'); } } } else { logMessage("Failed to retrieve document details from Microsoft. HTTP: $httpCode", 'WARNING'); } } } catch (Exception $e) { logMessage("Failed to access Microsoft API directly: " . $e->getMessage(), 'ERROR'); } // If we still can't find the file, try to download the folder structures try { $prefixes = [strtolower($targetLang) . '/', $targetLang . '/']; $containerUrl = "https://$accountName.blob.core.windows.net/{$this->targetContainer}?restype=container&comp=list&delimiter=/&$sasToken"; foreach ($prefixes as $prefix) { $prefixUrl = $containerUrl . "&prefix=" . urlencode($prefix); logMessage("Checking for prefix: $prefix"); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $prefixUrl); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_TIMEOUT, 30); $prefixResponse = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($prefixResponse !== false && $httpCode < 400) { $xml = simplexml_load_string($prefixResponse); if ($xml && isset($xml->Blobs->Blob)) { foreach ($xml->Blobs->Blob as $blob) { $name = (string)$blob->Name; logMessage("Found prefixed blob: $name"); // Try to download this blob $encodedName = rawurlencode($name); $blobUrl = "https://$accountName.blob.core.windows.net/{$this->targetContainer}/$encodedName"; $downloadUrl = $blobUrl . '?' . $sasToken; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $downloadUrl); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_TIMEOUT, 30); $fileContent = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($fileContent !== false && $httpCode < 400) { logMessage("Successfully downloaded prefixed file. Size: " . strlen($fileContent) . " bytes"); // Save a copy locally if (!file_exists(dirname($localTargetPath))) { mkdir(dirname($localTargetPath), 0777, true); } if (file_put_contents($localTargetPath, $fileContent)) { logMessage("Saved prefixed file to local storage: $localTargetPath"); } return $fileContent; } } } } } } catch (Exception $e) { logMessage("Failed to check prefixes: " . $e->getMessage(), 'ERROR'); } logMessage("All download attempts failed", 'ERROR'); // One last attempt - try to check if the file exists in the Azure Portal directly try { logMessage("LAST RESORT: Trying direct Azure Portal access"); // Generate the Azure Portal URL for direct checking $sasToken = defined('AZURE_STORAGE_SAS_TOKEN') ? AZURE_STORAGE_SAS_TOKEN : $this->generateSasToken($this->targetContainer, '', 'racwdl'); // Direct blob URLs with SAS token $portalUrl = "https://portal.azure.com/#blade/Microsoft_Azure_Storage/BlobPropertiesBladeNoHandles/storageAccountId/%2Fsubscriptions%2F{SUB_ID}%2FresourceGroups%2F{RESOURCE_GROUP}%2Fproviders%2FMicrosoft.Storage%2FstorageAccounts%2F{$accountName}"; logMessage("LAST RESORT: Check Azure Portal manually using the SAS token: " . substr($sasToken, 0, 30) . "..."); logMessage("LAST RESORT: You may need to check storage account '$accountName' container '{$this->targetContainer}' for file named: $blobName"); // If the document has completed successfully but we can't find the file, // there's likely a permission issue or naming convention issue if (isset($GLOBALS['document_id']) && !empty($GLOBALS['document_id'])) { $documentId = $GLOBALS['document_id']; logMessage("LAST RESORT: You may need to check the document status at: " . MS_API_ENDPOINT . "/translator/document/batches/$documentId"); } } catch (Exception $e) { logMessage("Failed last resort attempt: " . $e->getMessage(), 'ERROR'); } // If all else fails, create an emergency file logMessage("Creating emergency file as last resort", 'WARNING'); $emergencyContent = "[EMERGENCY FALLBACK - TRANSLATION NOT AVAILABLE]\n\nThe requested translation could not be found or downloaded.\n\nPlease check Azure Storage container '{$this->targetContainer}' manually for file: $blobName\n\nTranslation may have succeeded but file access failed."; if (!file_exists(dirname($localTargetPath))) { mkdir(dirname($localTargetPath), 0777, true); } if (file_put_contents($localTargetPath, $emergencyContent)) { logMessage("Saved emergency file to: $localTargetPath"); return $emergencyContent; } return false; } catch (Exception $e) { logMessage("Error in download process: " . $e->getMessage() . "\nStack trace: " . $e->getTraceAsString(), 'ERROR'); return false; } } /** * Helper method to get a blob URL */ public function getBlobUrl($container, $blobName) { // Extract account name from connection string preg_match('/AccountName=([^;]+)/', $this->connectionString, $matches); $accountName = !empty($matches[1]) ? $matches[1] : 'opticaltranslations'; // URL encode the blob name to handle spaces and special characters $encodedBlobName = rawurlencode($blobName); return "https://$accountName.blob.core.windows.net/$container/$encodedBlobName"; } /** * Helper method to get the container URL */ private function getContainerUrl($container) { // Extract account name from connection string // Try to get from BlobEndpoint first, which is a more reliable method if (strpos($this->connectionString, 'BlobEndpoint=') !== false) { preg_match('/BlobEndpoint=https:\/\/([^\.]+)\.blob\.core\.windows\.net/', $this->connectionString, $blobMatches); if (!empty($blobMatches[1])) { $accountName = $blobMatches[1]; return "https://$accountName.blob.core.windows.net/$container"; } } // Fallback to AccountName preg_match('/AccountName=([^;]+)/', $this->connectionString, $matches); if (!empty($matches[1])) { $accountName = $matches[1]; return "https://$accountName.blob.core.windows.net/$container"; } // Last resort hardcoded value return "https://opticaltranslations.blob.core.windows.net/$container"; } /** * Helper method to get the upload URL */ private function getUploadUrl($container, $blobName) { // For document translation, we need container-level SAS tokens $sasToken = $this->generateSasToken($container, $blobName, 'rwc'); $containerUrl = $this->getContainerUrl($container); return $containerUrl . '/' . $blobName . '?' . $sasToken; } /** * Helper method to upload a file to blob storage */ private function uploadToBlobStorage($url, $filePath) { // Read the file content if (!file_exists($filePath)) { logMessage("File not found: $filePath", 'ERROR'); throw new Exception("File not found: $filePath"); } $fileContent = file_get_contents($filePath); if ($fileContent === false) { logMessage("Failed to read file: $filePath", 'ERROR'); throw new Exception("Failed to read file: $filePath"); } $fileSize = strlen($fileContent); logMessage("Uploading file to Azure Blob Storage. File size: $fileSize bytes"); // Get file mime type $finfo = new finfo(FILEINFO_MIME_TYPE); $contentType = $finfo->file($filePath); logMessage("File content type: $contentType"); // Ensure we have a valid URL (check if it already has SAS token) if (strpos($url, '?') === false) { logMessage("Warning: URL does not contain SAS token", 'WARNING'); } logMessage("Upload URL (partial): " . substr($url, 0, 100) . "..."); // Upload directly using Azure REST API for BlockBlob $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT'); curl_setopt($ch, CURLOPT_POSTFIELDS, $fileContent); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true); curl_setopt($ch, CURLOPT_HTTPHEADER, [ 'x-ms-blob-type: BlockBlob', 'Content-Type: ' . $contentType, 'Content-Length: ' . $fileSize ]); $response = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); $error = curl_error($ch); // Log detailed curl info for debugging $curlInfo = curl_getinfo($ch); logMessage("CURL Info: " . json_encode($curlInfo)); curl_close($ch); logMessage("Blob storage upload response: HTTP $httpCode"); if ($httpCode < 200 || $httpCode >= 300) { logMessage("Failed to upload blob. HTTP code: $httpCode, Error: $error, Response: $response", 'ERROR'); throw new Exception("Failed to upload blob. HTTP code: $httpCode, Error: $error"); } logMessage("File uploaded to Azure Blob Storage successfully"); return true; } /** * Generate a SAS token for blob access * * This implementation uses the Azure Storage REST API to generate SAS tokens * If a SAS token is already defined in the config, it will use that instead */ public function generateSasToken($container, $blobName = '', $permissions = 'r') { // First check if we can use the SAS token from config if (defined('AZURE_STORAGE_SAS_TOKEN') && !empty(AZURE_STORAGE_SAS_TOKEN)) { logMessage("Using predefined SAS token from config"); return AZURE_STORAGE_SAS_TOKEN; } $isContainerLevel = empty($blobName); $resourceType = $isContainerLevel ? 'container' : 'blob'; logMessage("Generating SAS token for $resourceType: $container" . ($isContainerLevel ? '' : "/$blobName") . ", permissions: $permissions"); // Extract account details from connection string // Try to extract from SharedAccessSignature if available if (strpos($this->connectionString, 'SharedAccessSignature=') !== false) { preg_match('/SharedAccessSignature=([^;]+)/', $this->connectionString, $sasMatches); if (!empty($sasMatches[1])) { $sasToken = $sasMatches[1]; logMessage("Using SAS token from connection string (partial): " . substr($sasToken, 0, 30) . "..."); return $sasToken; } } // Otherwise try to extract account name and key if available preg_match('/AccountName=([^;]+)/', $this->connectionString, $nameMatches); preg_match('/AccountKey=([^;]+)/', $this->connectionString, $keyMatches); // If we have valid account info, generate a new SAS token if (!empty($nameMatches[1]) && !empty($keyMatches[1])) { $accountName = $nameMatches[1]; $accountKey = $keyMatches[1]; // Set token expiry time - 24 hours from now $expiryTime = time() + 86400; // 24 hours $startTime = time() - 60; // Start time 1 minute ago to avoid clock skew issues // Format dates for SAS token $expiryTimeFormatted = gmdate('Y-m-d\TH:i:s\Z', $expiryTime); $startTimeFormatted = gmdate('Y-m-d\TH:i:s\Z', $startTime); // For container-level access which is required for Microsoft Document Translation API if ($isContainerLevel) { // Generate a container-level SAS token $resourceType = 'c'; // Container $canonicalResource = "/blob/$accountName/$container"; } else { // Generate a blob-level SAS token $resourceType = 'o'; // Object/Blob $canonicalResource = "/blob/$accountName/$container/$blobName"; } // Build components $sasComponents = [ 'sv' => '2022-11-02', // Storage service version 'ss' => 'b', // Blob service 'srt' => $resourceType, // Resource type (c=container, o=object/blob) 'sp' => $permissions, // Permissions 'se' => $expiryTimeFormatted, 'st' => $startTimeFormatted, 'spr' => 'https', // HTTPS only ]; // Create authorization string to sign $stringToSign = implode("\n", [ $permissions, $startTimeFormatted, $expiryTimeFormatted, $canonicalResource, '', // signed identifier (blank) 'https', // protocol '2022-11-02' // storage version ]); // Generate the signature // Replace deprecated utf8_encode with mb_convert_encoding $signature = base64_encode(hash_hmac('sha256', mb_convert_encoding($stringToSign, 'UTF-8'), base64_decode($accountKey), true)); // Build the SAS query string $sasString = implode('&', array_map(function($key, $value) { return "$key=" . urlencode($value); }, array_keys($sasComponents), $sasComponents)); // Add the signature $sasToken = $sasString . "&sig=" . urlencode($signature); logMessage("Generated SAS token for $resourceType (partial): " . substr($sasToken, 0, 30) . "..."); return $sasToken; } // If we can't extract SAS token or account details, log an error logMessage("Could not extract valid SAS token or account details from connection string", 'ERROR'); // Return empty string, which will likely cause an error later, but at least the app won't crash here return ''; } /** * Ensure the containers exist * * @return bool Returns true if containers exist or were created, false on failure */ public function ensureContainersExist() { logMessage("Verifying Azure Storage containers: {$this->sourceContainer} and {$this->targetContainer}"); try { // Extract account name from BlobEndpoint in the connection string preg_match('/BlobEndpoint=https:\/\/([^\.]+)\.blob\.core\.windows\.net/', $this->connectionString, $nameMatches); // For compatibility with the new connection string format $accountName = !empty($nameMatches[1]) ? $nameMatches[1] : 'opticaltranslations'; if (empty($accountName)) { logMessage("Failed to extract account name from connection string, using default", 'WARNING'); } // Get SAS token for container operations $sasToken = defined('AZURE_STORAGE_SAS_TOKEN') ? AZURE_STORAGE_SAS_TOKEN : $this->generateSasToken($this->sourceContainer, '', 'racwdl'); // Check if containers exist and create them if they don't $containersToCheck = [$this->sourceContainer, $this->targetContainer]; $containersExist = true; foreach ($containersToCheck as $container) { logMessage("Checking container: $container"); // Method 1: Check if container exists using Azure Storage REST API with SAS token $containerUrl = "https://$accountName.blob.core.windows.net/$container?restype=container&$sasToken"; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $containerUrl); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_NOBODY, true); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // Disable SSL verification for testing curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); logMessage("Container check response code: $httpCode"); if ($httpCode >= 200 && $httpCode < 300) { logMessage("Container $container exists"); continue; // Container exists, move to next container } // Method 2: Try listing the container contents $listUrl = "https://$accountName.blob.core.windows.net/$container?restype=container&comp=list&$sasToken"; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $listUrl); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // Disable SSL verification for testing $response = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); logMessage("Container list response code: $httpCode"); if ($httpCode >= 200 && $httpCode < 300) { logMessage("Container $container exists (confirmed by listing)"); continue; // Container exists, move to next container } // If all checks fail, assume container doesn't exist and try to create it logMessage("Container $container doesn't exist or isn't accessible, creating..."); // Create container API call with SAS token $createContainerUrl = "https://$accountName.blob.core.windows.net/$container?restype=container&$sasToken"; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $createContainerUrl); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT'); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // Disable SSL verification for testing $response = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($httpCode >= 200 && $httpCode < 300) { logMessage("Container $container created successfully"); } else { logMessage("Failed to create container $container: HTTP $httpCode, Response: $response", 'ERROR'); $containersExist = false; } } if ($containersExist) { logMessage("Container verification completed successfully"); return true; } else { // For this application, we'll try to proceed even if container creation fails // Microsoft's service might still be able to access the containers logMessage("Container verification had issues, but we'll proceed anyway", 'WARNING'); return true; } } catch (Exception $e) { logMessage("Error verifying containers: " . $e->getMessage(), 'ERROR'); // For this application, we'll try to proceed even if verification fails // Microsoft's service might still be able to access the containers logMessage("Container verification failed with exception, but we'll proceed anyway", 'WARNING'); return true; } } } ?>