1101 lines
No EOL
56 KiB
PHP
1101 lines
No EOL
56 KiB
PHP
<?php
|
|
// Set defaults in case not included
|
|
if (!defined('AZURE_STORAGE_CONNECTION_STRING')) {
|
|
define('AZURE_STORAGE_CONNECTION_STRING', 'DefaultEndpointsProtocol=https;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;');
|
|
}
|
|
if (!defined('AZURE_STORAGE_CONTAINER_SOURCE')) {
|
|
define('AZURE_STORAGE_CONTAINER_SOURCE', 'source-documents');
|
|
}
|
|
if (!defined('AZURE_STORAGE_CONTAINER_TARGET')) {
|
|
define('AZURE_STORAGE_CONTAINER_TARGET', 'translated-documents');
|
|
}
|
|
|
|
include_once 'config.php';
|
|
include_once 'logger.php';
|
|
|
|
/**
|
|
* Azure Storage Helper Class for Document Translation
|
|
*
|
|
* This class provides helper functions to work with Azure Blob Storage,
|
|
* which is required for Microsoft's Document Translation API.
|
|
*/
|
|
class AzureStorageHelper {
|
|
private $connectionString;
|
|
private $sourceContainer;
|
|
private $targetContainer;
|
|
|
|
/**
|
|
* Constructor
|
|
*/
|
|
public function __construct() {
|
|
$this->connectionString = AZURE_STORAGE_CONNECTION_STRING;
|
|
$this->sourceContainer = AZURE_STORAGE_CONTAINER_SOURCE;
|
|
$this->targetContainer = AZURE_STORAGE_CONTAINER_TARGET;
|
|
}
|
|
|
|
/**
|
|
* Upload a file to the source container in Azure Blob Storage
|
|
*
|
|
* @param string $filePath Path to the temporary file
|
|
* @param string $fileName Name of the file
|
|
* @return array|bool Returns array with blob URL and SAS token on success, false on failure
|
|
*/
|
|
public function uploadSourceFile($filePath, $fileName) {
|
|
logMessage("Uploading file to Azure Storage: $fileName from $filePath");
|
|
|
|
try {
|
|
// Create a unique blob name to avoid collisions
|
|
$blobName = uniqid() . '-' . $fileName;
|
|
logMessage("Generated blob name: $blobName");
|
|
|
|
// Verify file exists and is readable
|
|
if (!file_exists($filePath)) {
|
|
logMessage("Error: File does not exist at path: $filePath", 'ERROR');
|
|
return false;
|
|
}
|
|
|
|
if (!is_readable($filePath)) {
|
|
logMessage("Error: File is not readable at path: $filePath", 'ERROR');
|
|
return false;
|
|
}
|
|
|
|
$fileSize = filesize($filePath);
|
|
logMessage("File size: $fileSize bytes");
|
|
|
|
if ($fileSize <= 0) {
|
|
logMessage("Error: File is empty or could not determine file size", 'ERROR');
|
|
return false;
|
|
}
|
|
|
|
// First save a local copy for backup
|
|
$localStorageDir = 'local_storage/' . $this->sourceContainer;
|
|
if (!file_exists($localStorageDir)) {
|
|
if (!mkdir($localStorageDir, 0777, true)) {
|
|
logMessage("Failed to create local storage directory: $localStorageDir", 'ERROR');
|
|
return false;
|
|
}
|
|
}
|
|
|
|
$localFilePath = $localStorageDir . '/' . $blobName;
|
|
if (!copy($filePath, $localFilePath)) {
|
|
logMessage("Failed to copy file to local storage: $localFilePath", 'ERROR');
|
|
return false;
|
|
}
|
|
|
|
logMessage("File saved to local storage: $localFilePath");
|
|
|
|
// Now upload to Azure Blob Storage using SAS token
|
|
// Get the SAS token from config
|
|
$sasToken = AZURE_STORAGE_SAS_TOKEN;
|
|
|
|
// Extract account name from connection string
|
|
preg_match('/AccountName=([^;]+)/', $this->connectionString, $nameMatches);
|
|
$accountName = !empty($nameMatches[1]) ? $nameMatches[1] : 'opticaltranslations';
|
|
|
|
// URL encode the blob name to handle spaces and special characters
|
|
$encodedBlobName = rawurlencode($blobName);
|
|
$blobUrl = "https://$accountName.blob.core.windows.net/{$this->sourceContainer}/$encodedBlobName";
|
|
$uploadUrl = $blobUrl . '?' . $sasToken;
|
|
|
|
logMessage("Uploading file to Azure Blob Storage: $blobUrl");
|
|
|
|
// Get file mime type
|
|
$finfo = finfo_open(FILEINFO_MIME_TYPE);
|
|
$contentType = finfo_file($finfo, $filePath);
|
|
finfo_close($finfo);
|
|
|
|
// Read file content
|
|
$fileContent = file_get_contents($filePath);
|
|
|
|
// Upload using cURL
|
|
$ch = curl_init();
|
|
curl_setopt($ch, CURLOPT_URL, $uploadUrl);
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT');
|
|
curl_setopt($ch, CURLOPT_POSTFIELDS, $fileContent);
|
|
curl_setopt($ch, CURLOPT_HTTPHEADER, [
|
|
'x-ms-blob-type: BlockBlob',
|
|
'Content-Type: ' . $contentType,
|
|
'Content-Length: ' . strlen($fileContent)
|
|
]);
|
|
|
|
$response = curl_exec($ch);
|
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
curl_close($ch);
|
|
|
|
if ($httpCode >= 200 && $httpCode < 300) {
|
|
logMessage("Successfully uploaded file to Azure Blob Storage. HTTP: $httpCode");
|
|
} else {
|
|
logMessage("Failed to upload file to Azure. HTTP: $httpCode, Response: $response", 'ERROR');
|
|
}
|
|
|
|
// Extract account name from connection string
|
|
preg_match('/AccountName=([^;]+)/', $this->connectionString, $nameMatches);
|
|
$accountName = !empty($nameMatches[1]) ? $nameMatches[1] : 'opticaltranslations';
|
|
|
|
// Use the provided SAS token from config
|
|
$sasToken = defined('AZURE_STORAGE_SAS_TOKEN') ? AZURE_STORAGE_SAS_TOKEN : $this->generateSasToken($this->sourceContainer, '', 'racwdl');
|
|
|
|
// Generate blob URL
|
|
$blobUrl = "https://$accountName.blob.core.windows.net/{$this->sourceContainer}";
|
|
|
|
logMessage("Using container-level access for translation API");
|
|
logMessage("Container URL: $blobUrl");
|
|
logMessage("Using Blob Name in prefix filter: $blobName");
|
|
|
|
return [
|
|
'blob_url' => $blobUrl,
|
|
'sas_token' => $sasToken,
|
|
'blob_name' => $blobName
|
|
];
|
|
} catch (Exception $e) {
|
|
logMessage("Error processing file: " . $e->getMessage() . "\nStack trace: " . $e->getTraceAsString(), 'ERROR');
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the target URL with SAS token for the translation result
|
|
*
|
|
* @param string $fileName Original file name
|
|
* @param string $targetLang Target language
|
|
* @return array|bool Returns array with blob URL and SAS token on success, false on failure
|
|
*/
|
|
public function getTargetBlob($fileName, $targetLang) {
|
|
logMessage("Setting up target blob for: $fileName, language: $targetLang");
|
|
|
|
try {
|
|
// Create a unique blob name for the translated file
|
|
$blobName = uniqid() . '-' . strtoupper($targetLang) . '_' . $fileName;
|
|
logMessage("Generated target blob name: $blobName");
|
|
|
|
// Ensure local storage directory exists for backup
|
|
logMessage("Setting up local storage directory for backup");
|
|
|
|
// Create the target directory
|
|
$localStorageDir = 'local_storage/' . $this->targetContainer;
|
|
if (!file_exists($localStorageDir)) {
|
|
if (!mkdir($localStorageDir, 0777, true)) {
|
|
logMessage("Failed to create local target directory: $localStorageDir", 'ERROR');
|
|
return false;
|
|
}
|
|
logMessage("Created local target directory: $localStorageDir");
|
|
}
|
|
|
|
// Use the provided SAS token from config for target container
|
|
$sasToken = defined('AZURE_STORAGE_SAS_TOKEN') ? AZURE_STORAGE_SAS_TOKEN : $this->generateSasToken($this->targetContainer, '', 'racwdl');
|
|
|
|
// Set up the container URL for Document Translation service
|
|
// Extract account name from connection string
|
|
preg_match('/AccountName=([^;]+)/', $this->connectionString, $nameMatches);
|
|
$accountName = !empty($nameMatches[1]) ? $nameMatches[1] : 'opticaltranslations';
|
|
$containerUrl = "https://$accountName.blob.core.windows.net/{$this->targetContainer}";
|
|
|
|
logMessage("Using container-level access for translation API");
|
|
logMessage("Target container URL: $containerUrl");
|
|
logMessage("Using target blob name: $blobName");
|
|
logMessage("Generated target SAS token (partial): " . substr($sasToken, 0, 20) . "...");
|
|
|
|
logMessage("Target blob setup successful");
|
|
return [
|
|
'blob_url' => $containerUrl, // Container-level URL for Document Translation
|
|
'sas_token' => $sasToken,
|
|
'blob_name' => $blobName
|
|
];
|
|
} catch (Exception $e) {
|
|
logMessage("Error setting up target blob: " . $e->getMessage() . "\nStack trace: " . $e->getTraceAsString(), 'ERROR');
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Download a translated file from the target container
|
|
*
|
|
* @param string $blobName Name of the blob
|
|
* @return string|bool Returns file content on success, false on failure
|
|
*/
|
|
public function downloadTranslatedFile($blobName) {
|
|
logMessage("Downloading translated file: $blobName");
|
|
|
|
try {
|
|
// Extract target language and original filename for Microsoft's naming convention
|
|
preg_match('/^.*?\-([A-Z]+)_(.+)$/', $blobName, $matches);
|
|
$targetLang = isset($matches[1]) ? $matches[1] : '';
|
|
$origFilename = isset($matches[2]) ? $matches[2] : '';
|
|
|
|
// Detect file extension for specialized handling
|
|
$fileExtension = '';
|
|
if (preg_match('/\.([^\.]+)$/', $blobName, $extMatches)) {
|
|
$fileExtension = strtolower($extMatches[1]);
|
|
}
|
|
logMessage("File extension detected: $fileExtension");
|
|
|
|
// Extract original filename without extension
|
|
$filenameWithoutExt = preg_replace('/\.[^\.]+$/', '', $origFilename);
|
|
|
|
// Try all Microsoft's possible naming patterns, prioritizing exact match first
|
|
$possibleTargetNames = [
|
|
$blobName, // Original name (HIGHEST PRIORITY)
|
|
];
|
|
|
|
// Log the exact blob name we're looking for first
|
|
logMessage("HIGHEST PRIORITY: Looking for exact blob name match: $blobName");
|
|
|
|
// Add other possibilities as fallbacks
|
|
$otherPossibilities = [
|
|
strtolower($targetLang) . '/' . $origFilename, // langcode/filename
|
|
$targetLang . '/' . $origFilename, // LANGCODE/filename
|
|
$origFilename . '.' . strtolower($targetLang), // filename.langcode
|
|
$origFilename . '.' . $targetLang, // filename.LANGCODE
|
|
// Additional Microsoft patterns
|
|
strtolower($targetLang), // Just the language folder
|
|
$targetLang, // Just the language folder (uppercase)
|
|
$origFilename, // Just the original filename without language
|
|
];
|
|
|
|
// Add file type specific patterns
|
|
if ($fileExtension === 'pdf') {
|
|
// Special PDF patterns
|
|
$pdfPatterns = [
|
|
// Microsoft often translates PDFs differently
|
|
$origFilename . '.translated.pdf',
|
|
$filenameWithoutExt . '.translated.pdf',
|
|
$filenameWithoutExt . '_' . strtolower($targetLang) . '.pdf',
|
|
$filenameWithoutExt . '_' . $targetLang . '.pdf',
|
|
'translated_' . $origFilename,
|
|
'translated/' . $origFilename,
|
|
strtolower($targetLang) . '/' . $filenameWithoutExt . '.pdf',
|
|
$targetLang . '/' . $filenameWithoutExt . '.pdf',
|
|
// Microsoft sometimes creates a nested folder structure
|
|
'translated/' . strtolower($targetLang) . '/' . $origFilename,
|
|
'translated/' . $targetLang . '/' . $origFilename,
|
|
];
|
|
$otherPossibilities = array_merge($pdfPatterns, $otherPossibilities);
|
|
} else if ($fileExtension === 'pptx' || $fileExtension === 'ppt') {
|
|
// Special PowerPoint patterns
|
|
$pptPatterns = [
|
|
// Microsoft often puts PowerPoint files in specific folders
|
|
'slides/' . $origFilename,
|
|
'presentations/' . $origFilename,
|
|
$filenameWithoutExt . '_' . strtolower($targetLang) . '.pptx',
|
|
$filenameWithoutExt . '_' . $targetLang . '.pptx',
|
|
$filenameWithoutExt . '_translated.pptx',
|
|
'translated_' . $origFilename,
|
|
// Microsoft sometimes changes the extension from ppt to pptx
|
|
$filenameWithoutExt . '.pptx',
|
|
// Check for language-specific folders
|
|
strtolower($targetLang) . '/' . $origFilename,
|
|
$targetLang . '/' . $origFilename,
|
|
strtolower($targetLang) . '/' . $filenameWithoutExt . '.pptx',
|
|
$targetLang . '/' . $filenameWithoutExt . '.pptx',
|
|
];
|
|
$otherPossibilities = array_merge($pptPatterns, $otherPossibilities);
|
|
} else {
|
|
// Generic additional patterns for other file types
|
|
$genericPatterns = [
|
|
$origFilename . '.txt', // Text file output
|
|
$origFilename . '.docx', // Word document output
|
|
$origFilename . '.pdf', // PDF output
|
|
$filenameWithoutExt . '.docx', // Word document with different extension
|
|
$filenameWithoutExt . '.txt', // Text with different extension
|
|
$filenameWithoutExt . '_' . strtolower($targetLang) . '.' . $fileExtension,
|
|
$filenameWithoutExt . '_' . $targetLang . '.' . $fileExtension,
|
|
];
|
|
$otherPossibilities = array_merge($genericPatterns, $otherPossibilities);
|
|
}
|
|
|
|
// Add other possibilities to the list
|
|
$possibleTargetNames = array_merge($possibleTargetNames, $otherPossibilities);
|
|
|
|
logMessage("Looking for possible translated file names: " . implode(', ', $possibleTargetNames));
|
|
|
|
// Check if we have a local translated file first
|
|
$localTargetPath = 'local_storage/' . $this->targetContainer . '/' . $blobName;
|
|
|
|
if (file_exists($localTargetPath)) {
|
|
logMessage("Found existing local translated file at: $localTargetPath");
|
|
$fileContent = file_get_contents($localTargetPath);
|
|
|
|
if ($fileContent === false) {
|
|
logMessage("Error reading local translated file: $localTargetPath", 'ERROR');
|
|
} else {
|
|
logMessage("Local translated file read successfully");
|
|
return $fileContent;
|
|
}
|
|
}
|
|
|
|
// DIRECT DOWNLOAD ATTEMPT FIRST - this should work if file exists with the exact same name
|
|
logMessage("PRIORITY DIRECT DOWNLOAD ATTEMPT for: $blobName");
|
|
|
|
// File type detection to handle different file types appropriately
|
|
$fileExtension = '';
|
|
if (preg_match('/\.([^\.]+)$/', $blobName, $extMatches)) {
|
|
$fileExtension = strtolower($extMatches[1]);
|
|
}
|
|
logMessage("File extension detected: $fileExtension");
|
|
|
|
// Use the provided SAS token
|
|
$sasToken = defined('AZURE_STORAGE_SAS_TOKEN') ? AZURE_STORAGE_SAS_TOKEN :
|
|
$this->generateSasToken($this->targetContainer, $blobName, 'r');
|
|
|
|
// Extract account name from connection string
|
|
preg_match('/AccountName=([^;]+)/', $this->connectionString, $nameMatches);
|
|
$accountName = !empty($nameMatches[1]) ? $nameMatches[1] : 'opticaltranslations';
|
|
|
|
// URL encode the blob name to handle spaces and special characters
|
|
$encodedName = rawurlencode($blobName);
|
|
$directBlobUrl = "https://$accountName.blob.core.windows.net/{$this->targetContainer}/$encodedName";
|
|
$directDownloadUrl = $directBlobUrl . '?' . $sasToken;
|
|
|
|
logMessage("DIRECT DOWNLOAD URL (partial): " . substr($directDownloadUrl, 0, 60) . "...");
|
|
|
|
// Use cURL to download directly - handle binary files properly
|
|
$ch = curl_init();
|
|
curl_setopt($ch, CURLOPT_URL, $directDownloadUrl);
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
|
curl_setopt($ch, CURLOPT_TIMEOUT, 60); // Increase timeout for larger files
|
|
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // Disable SSL verification for testing
|
|
curl_setopt($ch, CURLOPT_VERBOSE, true); // Enable verbose output for troubleshooting
|
|
curl_setopt($ch, CURLOPT_HEADER, false); // Don't include headers in output
|
|
|
|
// Special handling for PDF and PPTX files (binary data)
|
|
if ($fileExtension === 'pdf' || $fileExtension === 'pptx' || $fileExtension === 'ppt') {
|
|
logMessage("Setting binary transfer mode for $fileExtension file");
|
|
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true);
|
|
}
|
|
|
|
$fileContent = curl_exec($ch);
|
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
$error = curl_error($ch);
|
|
$curlInfo = curl_getinfo($ch);
|
|
curl_close($ch);
|
|
|
|
logMessage("DIRECT DOWNLOAD CURL INFO: " . json_encode($curlInfo));
|
|
|
|
if ($fileContent !== false && $httpCode < 400) {
|
|
logMessage("DIRECT DOWNLOAD SUCCESSFUL! Size: " . strlen($fileContent) . " bytes");
|
|
|
|
// Save a copy locally
|
|
if (!file_exists(dirname($localTargetPath))) {
|
|
mkdir(dirname($localTargetPath), 0777, true);
|
|
}
|
|
|
|
if (file_put_contents($localTargetPath, $fileContent)) {
|
|
logMessage("Saved direct download to local storage: $localTargetPath");
|
|
}
|
|
|
|
return $fileContent;
|
|
} else {
|
|
logMessage("DIRECT DOWNLOAD FAILED. HTTP: $httpCode, Error: $error", 'WARNING');
|
|
}
|
|
|
|
// If direct download failed, continue with other approaches
|
|
// Special handling for binary files (PDF, PPTX) which are known to cause issues
|
|
if (isset($GLOBALS['is_binary_file']) && $GLOBALS['is_binary_file'] === true) {
|
|
$originalFilename = isset($GLOBALS['original_filename']) ? $GLOBALS['original_filename'] : '';
|
|
$fileExtension = isset($GLOBALS['file_extension']) ? $GLOBALS['file_extension'] : '';
|
|
|
|
logMessage("SPECIAL BINARY FILE HANDLING: For $fileExtension file: $originalFilename");
|
|
|
|
// For binary files, we'll do a complete scan of the container looking
|
|
// for any files that might match our extension or original name
|
|
|
|
// Extract filename without extension
|
|
$filenameWithoutExt = '';
|
|
if (!empty($originalFilename)) {
|
|
$filenameWithoutExt = preg_replace('/\.[^\.]+$/', '', $originalFilename);
|
|
logMessage("Filename without extension: $filenameWithoutExt");
|
|
}
|
|
}
|
|
|
|
// List all files in the target container to find what Microsoft actually created
|
|
$sasToken = defined('AZURE_STORAGE_SAS_TOKEN') ? AZURE_STORAGE_SAS_TOKEN :
|
|
$this->generateSasToken($this->targetContainer, '', 'rl');
|
|
|
|
// Extract account name from connection string
|
|
preg_match('/AccountName=([^;]+)/', $this->connectionString, $nameMatches);
|
|
$accountName = !empty($nameMatches[1]) ? $nameMatches[1] : 'opticaltranslations';
|
|
|
|
// First try listing the container contents to see what files are there
|
|
$containerUrl = "https://$accountName.blob.core.windows.net/{$this->targetContainer}?restype=container&comp=list&$sasToken";
|
|
|
|
logMessage("Listing container contents to find translated file. URL (partial): " . substr($containerUrl, 0, 60) . "...");
|
|
|
|
$ch = curl_init();
|
|
curl_setopt($ch, CURLOPT_URL, $containerUrl);
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
|
|
|
|
$listResponse = curl_exec($ch);
|
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
curl_close($ch);
|
|
|
|
if ($listResponse !== false && $httpCode < 400) {
|
|
logMessage("Successfully listed container contents. Response size: " . strlen($listResponse) . " bytes");
|
|
|
|
// Parse the XML response to find blob names
|
|
$xml = simplexml_load_string($listResponse);
|
|
if ($xml) {
|
|
$recentFiles = [];
|
|
$binaryFileMatches = [];
|
|
|
|
// Check each blob entry
|
|
foreach ($xml->Blobs->Blob as $blob) {
|
|
$name = (string)$blob->Name;
|
|
$lastModified = (string)$blob->Properties->{'Last-Modified'};
|
|
$timestamp = strtotime($lastModified);
|
|
|
|
// Add to our list of files with their timestamps
|
|
$recentFiles[$name] = $timestamp;
|
|
|
|
logMessage("Found blob in container: $name (modified: $lastModified)");
|
|
|
|
// Special handling for binary files (PDF, PPTX)
|
|
if (isset($GLOBALS['is_binary_file']) && $GLOBALS['is_binary_file'] === true) {
|
|
$originalFilename = isset($GLOBALS['original_filename']) ? $GLOBALS['original_filename'] : '';
|
|
$fileExtension = isset($GLOBALS['file_extension']) ? $GLOBALS['file_extension'] : '';
|
|
$filenameWithoutExt = isset($filenameWithoutExt) ? $filenameWithoutExt : '';
|
|
|
|
// Check for matching extension
|
|
if (!empty($fileExtension) &&
|
|
(strpos($name, ".$fileExtension") !== false ||
|
|
strpos($name, ".".strtoupper($fileExtension)) !== false)) {
|
|
logMessage("Found matching extension ($fileExtension) in file: $name");
|
|
$binaryFileMatches[$name] = $timestamp;
|
|
}
|
|
|
|
// Check for matching filename (full or partial)
|
|
if (!empty($originalFilename) &&
|
|
(strpos($name, $originalFilename) !== false ||
|
|
(!empty($filenameWithoutExt) && strpos($name, $filenameWithoutExt) !== false))) {
|
|
logMessage("Found matching filename part in: $name");
|
|
$binaryFileMatches[$name] = $timestamp;
|
|
}
|
|
|
|
// Check for target language folder/prefix
|
|
if (!empty($targetLang) &&
|
|
(strpos($name, strtolower($targetLang) . '/') === 0 ||
|
|
strpos($name, $targetLang . '/') === 0)) {
|
|
logMessage("Found matching language prefix for $targetLang in file: $name");
|
|
$binaryFileMatches[$name] = $timestamp;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort by timestamp, most recent first
|
|
arsort($recentFiles);
|
|
|
|
// For binary files, add the specific matches first
|
|
if (!empty($binaryFileMatches)) {
|
|
arsort($binaryFileMatches);
|
|
logMessage("Found " . count($binaryFileMatches) . " potential binary file matches");
|
|
|
|
// Add binary matches to the beginning of the list
|
|
foreach (array_keys($binaryFileMatches) as $matchedFile) {
|
|
if (!in_array($matchedFile, $possibleTargetNames)) {
|
|
array_unshift($possibleTargetNames, $matchedFile);
|
|
logMessage("Added PRIORITY binary file match: $matchedFile");
|
|
}
|
|
}
|
|
}
|
|
|
|
// Add recently created files to our possible names list
|
|
foreach (array_keys($recentFiles) as $recentFile) {
|
|
if (!in_array($recentFile, $possibleTargetNames)) {
|
|
array_push($possibleTargetNames, $recentFile);
|
|
logMessage("Added recent file to check list: $recentFile");
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
logMessage("Failed to list container contents. HTTP: $httpCode", 'WARNING');
|
|
}
|
|
|
|
// Try to download each possible file
|
|
foreach ($possibleTargetNames as $possibleName) {
|
|
// URL encode the possible name to handle spaces and special characters
|
|
$encodedName = rawurlencode($possibleName);
|
|
$blobUrl = "https://$accountName.blob.core.windows.net/{$this->targetContainer}/$encodedName";
|
|
$downloadUrl = $blobUrl . '?' . $sasToken;
|
|
|
|
logMessage("Attempting to download from Azure. URL (partial): " . substr($downloadUrl, 0, 60) . "...");
|
|
|
|
// Use cURL to download - handle binary files properly
|
|
$ch = curl_init();
|
|
curl_setopt($ch, CURLOPT_URL, $downloadUrl);
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
|
curl_setopt($ch, CURLOPT_TIMEOUT, 60); // Increase timeout for larger files
|
|
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // Disable SSL verification
|
|
curl_setopt($ch, CURLOPT_HEADER, false); // Don't include headers in output
|
|
|
|
// Special handling for PDF and PPTX files (binary data)
|
|
if ($fileExtension === 'pdf' || $fileExtension === 'pptx' || $fileExtension === 'ppt' ||
|
|
strpos($possibleName, '.pdf') !== false ||
|
|
strpos($possibleName, '.pptx') !== false ||
|
|
strpos($possibleName, '.ppt') !== false) {
|
|
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true);
|
|
}
|
|
|
|
$fileContent = curl_exec($ch);
|
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
$error = curl_error($ch);
|
|
curl_close($ch);
|
|
|
|
if ($fileContent !== false && $httpCode < 400) {
|
|
logMessage("Successfully downloaded from Azure. Size: " . strlen($fileContent) . " bytes");
|
|
|
|
// Save a copy locally
|
|
if (!file_exists(dirname($localTargetPath))) {
|
|
mkdir(dirname($localTargetPath), 0777, true);
|
|
}
|
|
|
|
if (file_put_contents($localTargetPath, $fileContent)) {
|
|
logMessage("Saved Azure file to local storage: $localTargetPath");
|
|
}
|
|
|
|
return $fileContent;
|
|
}
|
|
|
|
logMessage("Azure download failed for '$possibleName'. HTTP: $httpCode", 'WARNING');
|
|
}
|
|
|
|
// If all else fails - check if we can directly access Microsoft's document translation result
|
|
try {
|
|
logMessage("Attempting to access translation result directly via Microsoft API");
|
|
|
|
// Get the document ID from the global variable (set in download.php) or from $_GET
|
|
$documentId = $GLOBALS['document_id'] ?? $_GET['document_id'] ?? '';
|
|
|
|
if (!empty($documentId)) {
|
|
$documentUrl = MS_API_ENDPOINT . "/translator/document/batches/$documentId/documents?api-version=" . MS_API_VERSION;
|
|
|
|
$ch = curl_init();
|
|
curl_setopt($ch, CURLOPT_URL, $documentUrl);
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($ch, CURLOPT_HTTPHEADER, [
|
|
'Ocp-Apim-Subscription-Key: ' . MS_API_KEY,
|
|
'Ocp-Apim-Subscription-Region: ' . MS_API_REGION
|
|
]);
|
|
|
|
$documentResponse = curl_exec($ch);
|
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
curl_close($ch);
|
|
|
|
if ($documentResponse !== false && $httpCode < 400) {
|
|
$documentData = json_decode($documentResponse, true);
|
|
logMessage("Successfully retrieved document details from Microsoft. Found " . count($documentData['value']) . " documents");
|
|
|
|
// Look for a document that has a targetUrl
|
|
foreach ($documentData['value'] as $document) {
|
|
if (isset($document['targetUrl']) && !empty($document['targetUrl'])) {
|
|
$directUrl = $document['targetUrl'];
|
|
logMessage("Found direct target URL from Microsoft: " . substr($directUrl, 0, 60) . "...");
|
|
|
|
// Try to download it directly
|
|
$ch = curl_init();
|
|
curl_setopt($ch, CURLOPT_URL, $directUrl);
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
|
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
|
|
|
|
$directContent = curl_exec($ch);
|
|
$directHttpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
curl_close($ch);
|
|
|
|
if ($directContent !== false && $directHttpCode < 400) {
|
|
logMessage("Successfully downloaded translated file directly from Microsoft URL. Size: " . strlen($directContent) . " bytes");
|
|
|
|
// Check file signatures
|
|
$contentType = '';
|
|
|
|
// Log beginning of file for debugging
|
|
$fileStart = substr($directContent, 0, 20);
|
|
$fileStartHex = bin2hex(substr($directContent, 0, 20));
|
|
logMessage("File starts with: " . $fileStartHex);
|
|
|
|
// Check for PDF signature
|
|
if (substr($directContent, 0, 4) === '%PDF') {
|
|
logMessage("PDF signature detected in content");
|
|
$contentType = 'application/pdf';
|
|
}
|
|
// Check for Office file signature (ZIP format)
|
|
else if (substr($directContent, 0, 4) === 'PK'.chr(3).chr(4)) {
|
|
logMessage("Office file (ZIP) signature detected");
|
|
if (isset($GLOBALS['file_extension'])) {
|
|
if ($GLOBALS['file_extension'] === 'pptx' || $GLOBALS['file_extension'] === 'ppt') {
|
|
$contentType = 'application/vnd.openxmlformats-officedocument.presentationml.presentation';
|
|
} else if ($GLOBALS['file_extension'] === 'docx' || $GLOBALS['file_extension'] === 'doc') {
|
|
$contentType = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($contentType) {
|
|
logMessage("Content type detected from file signature: $contentType");
|
|
} else {
|
|
logMessage("Could not detect content type from file signature");
|
|
}
|
|
|
|
// Save a copy locally
|
|
if (!file_exists(dirname($localTargetPath))) {
|
|
mkdir(dirname($localTargetPath), 0777, true);
|
|
}
|
|
|
|
if (file_put_contents($localTargetPath, $directContent)) {
|
|
logMessage("Saved direct download to local storage: $localTargetPath");
|
|
}
|
|
|
|
return $directContent;
|
|
}
|
|
|
|
logMessage("Failed to download from direct URL. HTTP: $directHttpCode", 'WARNING');
|
|
}
|
|
}
|
|
} else {
|
|
logMessage("Failed to retrieve document details from Microsoft. HTTP: $httpCode", 'WARNING');
|
|
}
|
|
}
|
|
} catch (Exception $e) {
|
|
logMessage("Failed to access Microsoft API directly: " . $e->getMessage(), 'ERROR');
|
|
}
|
|
|
|
// If we still can't find the file, try to download the folder structures
|
|
try {
|
|
$prefixes = [strtolower($targetLang) . '/', $targetLang . '/'];
|
|
$containerUrl = "https://$accountName.blob.core.windows.net/{$this->targetContainer}?restype=container&comp=list&delimiter=/&$sasToken";
|
|
|
|
foreach ($prefixes as $prefix) {
|
|
$prefixUrl = $containerUrl . "&prefix=" . urlencode($prefix);
|
|
logMessage("Checking for prefix: $prefix");
|
|
|
|
$ch = curl_init();
|
|
curl_setopt($ch, CURLOPT_URL, $prefixUrl);
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
|
|
|
|
$prefixResponse = curl_exec($ch);
|
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
curl_close($ch);
|
|
|
|
if ($prefixResponse !== false && $httpCode < 400) {
|
|
$xml = simplexml_load_string($prefixResponse);
|
|
if ($xml && isset($xml->Blobs->Blob)) {
|
|
foreach ($xml->Blobs->Blob as $blob) {
|
|
$name = (string)$blob->Name;
|
|
logMessage("Found prefixed blob: $name");
|
|
|
|
// Try to download this blob
|
|
$encodedName = rawurlencode($name);
|
|
$blobUrl = "https://$accountName.blob.core.windows.net/{$this->targetContainer}/$encodedName";
|
|
$downloadUrl = $blobUrl . '?' . $sasToken;
|
|
|
|
$ch = curl_init();
|
|
curl_setopt($ch, CURLOPT_URL, $downloadUrl);
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
|
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
|
|
|
|
$fileContent = curl_exec($ch);
|
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
curl_close($ch);
|
|
|
|
if ($fileContent !== false && $httpCode < 400) {
|
|
logMessage("Successfully downloaded prefixed file. Size: " . strlen($fileContent) . " bytes");
|
|
|
|
// Save a copy locally
|
|
if (!file_exists(dirname($localTargetPath))) {
|
|
mkdir(dirname($localTargetPath), 0777, true);
|
|
}
|
|
|
|
if (file_put_contents($localTargetPath, $fileContent)) {
|
|
logMessage("Saved prefixed file to local storage: $localTargetPath");
|
|
}
|
|
|
|
return $fileContent;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch (Exception $e) {
|
|
logMessage("Failed to check prefixes: " . $e->getMessage(), 'ERROR');
|
|
}
|
|
|
|
logMessage("All download attempts failed", 'ERROR');
|
|
|
|
// One last attempt - try to check if the file exists in the Azure Portal directly
|
|
try {
|
|
logMessage("LAST RESORT: Trying direct Azure Portal access");
|
|
|
|
// Generate the Azure Portal URL for direct checking
|
|
$sasToken = defined('AZURE_STORAGE_SAS_TOKEN') ? AZURE_STORAGE_SAS_TOKEN :
|
|
$this->generateSasToken($this->targetContainer, '', 'racwdl');
|
|
|
|
// Direct blob URLs with SAS token
|
|
$portalUrl = "https://portal.azure.com/#blade/Microsoft_Azure_Storage/BlobPropertiesBladeNoHandles/storageAccountId/%2Fsubscriptions%2F{SUB_ID}%2FresourceGroups%2F{RESOURCE_GROUP}%2Fproviders%2FMicrosoft.Storage%2FstorageAccounts%2F{$accountName}";
|
|
|
|
logMessage("LAST RESORT: Check Azure Portal manually using the SAS token: " . substr($sasToken, 0, 30) . "...");
|
|
logMessage("LAST RESORT: You may need to check storage account '$accountName' container '{$this->targetContainer}' for file named: $blobName");
|
|
|
|
// If the document has completed successfully but we can't find the file,
|
|
// there's likely a permission issue or naming convention issue
|
|
if (isset($GLOBALS['document_id']) && !empty($GLOBALS['document_id'])) {
|
|
$documentId = $GLOBALS['document_id'];
|
|
logMessage("LAST RESORT: You may need to check the document status at: " . MS_API_ENDPOINT . "/translator/document/batches/$documentId");
|
|
}
|
|
} catch (Exception $e) {
|
|
logMessage("Failed last resort attempt: " . $e->getMessage(), 'ERROR');
|
|
}
|
|
|
|
// If all else fails, create an emergency file
|
|
logMessage("Creating emergency file as last resort", 'WARNING');
|
|
$emergencyContent = "[EMERGENCY FALLBACK - TRANSLATION NOT AVAILABLE]\n\nThe requested translation could not be found or downloaded.\n\nPlease check Azure Storage container '{$this->targetContainer}' manually for file: $blobName\n\nTranslation may have succeeded but file access failed.";
|
|
|
|
if (!file_exists(dirname($localTargetPath))) {
|
|
mkdir(dirname($localTargetPath), 0777, true);
|
|
}
|
|
|
|
if (file_put_contents($localTargetPath, $emergencyContent)) {
|
|
logMessage("Saved emergency file to: $localTargetPath");
|
|
return $emergencyContent;
|
|
}
|
|
|
|
return false;
|
|
} catch (Exception $e) {
|
|
logMessage("Error in download process: " . $e->getMessage() . "\nStack trace: " . $e->getTraceAsString(), 'ERROR');
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Helper method to get a blob URL
|
|
*/
|
|
public function getBlobUrl($container, $blobName) {
|
|
// Extract account name from connection string
|
|
preg_match('/AccountName=([^;]+)/', $this->connectionString, $matches);
|
|
$accountName = !empty($matches[1]) ? $matches[1] : 'opticaltranslations';
|
|
|
|
// URL encode the blob name to handle spaces and special characters
|
|
$encodedBlobName = rawurlencode($blobName);
|
|
|
|
return "https://$accountName.blob.core.windows.net/$container/$encodedBlobName";
|
|
}
|
|
|
|
/**
|
|
* Helper method to get the container URL
|
|
*/
|
|
private function getContainerUrl($container) {
|
|
// Extract account name from connection string
|
|
|
|
// Try to get from BlobEndpoint first, which is a more reliable method
|
|
if (strpos($this->connectionString, 'BlobEndpoint=') !== false) {
|
|
preg_match('/BlobEndpoint=https:\/\/([^\.]+)\.blob\.core\.windows\.net/', $this->connectionString, $blobMatches);
|
|
if (!empty($blobMatches[1])) {
|
|
$accountName = $blobMatches[1];
|
|
return "https://$accountName.blob.core.windows.net/$container";
|
|
}
|
|
}
|
|
|
|
// Fallback to AccountName
|
|
preg_match('/AccountName=([^;]+)/', $this->connectionString, $matches);
|
|
if (!empty($matches[1])) {
|
|
$accountName = $matches[1];
|
|
return "https://$accountName.blob.core.windows.net/$container";
|
|
}
|
|
|
|
// Last resort hardcoded value
|
|
return "https://opticaltranslations.blob.core.windows.net/$container";
|
|
}
|
|
|
|
/**
|
|
* Helper method to get the upload URL
|
|
*/
|
|
private function getUploadUrl($container, $blobName) {
|
|
// For document translation, we need container-level SAS tokens
|
|
$sasToken = $this->generateSasToken($container, $blobName, 'rwc');
|
|
$containerUrl = $this->getContainerUrl($container);
|
|
return $containerUrl . '/' . $blobName . '?' . $sasToken;
|
|
}
|
|
|
|
/**
|
|
* Helper method to upload a file to blob storage
|
|
*/
|
|
private function uploadToBlobStorage($url, $filePath) {
|
|
// Read the file content
|
|
if (!file_exists($filePath)) {
|
|
logMessage("File not found: $filePath", 'ERROR');
|
|
throw new Exception("File not found: $filePath");
|
|
}
|
|
|
|
$fileContent = file_get_contents($filePath);
|
|
if ($fileContent === false) {
|
|
logMessage("Failed to read file: $filePath", 'ERROR');
|
|
throw new Exception("Failed to read file: $filePath");
|
|
}
|
|
|
|
$fileSize = strlen($fileContent);
|
|
logMessage("Uploading file to Azure Blob Storage. File size: $fileSize bytes");
|
|
|
|
// Get file mime type
|
|
$finfo = new finfo(FILEINFO_MIME_TYPE);
|
|
$contentType = $finfo->file($filePath);
|
|
logMessage("File content type: $contentType");
|
|
|
|
// Ensure we have a valid URL (check if it already has SAS token)
|
|
if (strpos($url, '?') === false) {
|
|
logMessage("Warning: URL does not contain SAS token", 'WARNING');
|
|
}
|
|
|
|
logMessage("Upload URL (partial): " . substr($url, 0, 100) . "...");
|
|
|
|
// Upload directly using Azure REST API for BlockBlob
|
|
$ch = curl_init();
|
|
curl_setopt($ch, CURLOPT_URL, $url);
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT');
|
|
curl_setopt($ch, CURLOPT_POSTFIELDS, $fileContent);
|
|
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
|
|
curl_setopt($ch, CURLOPT_HTTPHEADER, [
|
|
'x-ms-blob-type: BlockBlob',
|
|
'Content-Type: ' . $contentType,
|
|
'Content-Length: ' . $fileSize
|
|
]);
|
|
|
|
$response = curl_exec($ch);
|
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
$error = curl_error($ch);
|
|
|
|
// Log detailed curl info for debugging
|
|
$curlInfo = curl_getinfo($ch);
|
|
logMessage("CURL Info: " . json_encode($curlInfo));
|
|
|
|
curl_close($ch);
|
|
|
|
logMessage("Blob storage upload response: HTTP $httpCode");
|
|
|
|
if ($httpCode < 200 || $httpCode >= 300) {
|
|
logMessage("Failed to upload blob. HTTP code: $httpCode, Error: $error, Response: $response", 'ERROR');
|
|
throw new Exception("Failed to upload blob. HTTP code: $httpCode, Error: $error");
|
|
}
|
|
|
|
logMessage("File uploaded to Azure Blob Storage successfully");
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Generate a SAS token for blob access
|
|
*
|
|
* This implementation uses the Azure Storage REST API to generate SAS tokens
|
|
* If a SAS token is already defined in the config, it will use that instead
|
|
*/
|
|
public function generateSasToken($container, $blobName = '', $permissions = 'r') {
|
|
// First check if we can use the SAS token from config
|
|
if (defined('AZURE_STORAGE_SAS_TOKEN') && !empty(AZURE_STORAGE_SAS_TOKEN)) {
|
|
logMessage("Using predefined SAS token from config");
|
|
return AZURE_STORAGE_SAS_TOKEN;
|
|
}
|
|
|
|
$isContainerLevel = empty($blobName);
|
|
$resourceType = $isContainerLevel ? 'container' : 'blob';
|
|
|
|
logMessage("Generating SAS token for $resourceType: $container" . ($isContainerLevel ? '' : "/$blobName") . ", permissions: $permissions");
|
|
|
|
// Extract account details from connection string
|
|
// Try to extract from SharedAccessSignature if available
|
|
if (strpos($this->connectionString, 'SharedAccessSignature=') !== false) {
|
|
preg_match('/SharedAccessSignature=([^;]+)/', $this->connectionString, $sasMatches);
|
|
if (!empty($sasMatches[1])) {
|
|
$sasToken = $sasMatches[1];
|
|
logMessage("Using SAS token from connection string (partial): " . substr($sasToken, 0, 30) . "...");
|
|
return $sasToken;
|
|
}
|
|
}
|
|
|
|
// Otherwise try to extract account name and key if available
|
|
preg_match('/AccountName=([^;]+)/', $this->connectionString, $nameMatches);
|
|
preg_match('/AccountKey=([^;]+)/', $this->connectionString, $keyMatches);
|
|
|
|
// If we have valid account info, generate a new SAS token
|
|
if (!empty($nameMatches[1]) && !empty($keyMatches[1])) {
|
|
$accountName = $nameMatches[1];
|
|
$accountKey = $keyMatches[1];
|
|
|
|
// Set token expiry time - 24 hours from now
|
|
$expiryTime = time() + 86400; // 24 hours
|
|
$startTime = time() - 60; // Start time 1 minute ago to avoid clock skew issues
|
|
|
|
// Format dates for SAS token
|
|
$expiryTimeFormatted = gmdate('Y-m-d\TH:i:s\Z', $expiryTime);
|
|
$startTimeFormatted = gmdate('Y-m-d\TH:i:s\Z', $startTime);
|
|
|
|
// For container-level access which is required for Microsoft Document Translation API
|
|
if ($isContainerLevel) {
|
|
// Generate a container-level SAS token
|
|
$resourceType = 'c'; // Container
|
|
$canonicalResource = "/blob/$accountName/$container";
|
|
} else {
|
|
// Generate a blob-level SAS token
|
|
$resourceType = 'o'; // Object/Blob
|
|
$canonicalResource = "/blob/$accountName/$container/$blobName";
|
|
}
|
|
|
|
// Build components
|
|
$sasComponents = [
|
|
'sv' => '2022-11-02', // Storage service version
|
|
'ss' => 'b', // Blob service
|
|
'srt' => $resourceType, // Resource type (c=container, o=object/blob)
|
|
'sp' => $permissions, // Permissions
|
|
'se' => $expiryTimeFormatted,
|
|
'st' => $startTimeFormatted,
|
|
'spr' => 'https', // HTTPS only
|
|
];
|
|
|
|
// Create authorization string to sign
|
|
$stringToSign = implode("\n", [
|
|
$permissions,
|
|
$startTimeFormatted,
|
|
$expiryTimeFormatted,
|
|
$canonicalResource,
|
|
'', // signed identifier (blank)
|
|
'https', // protocol
|
|
'2022-11-02' // storage version
|
|
]);
|
|
|
|
// Generate the signature
|
|
// Replace deprecated utf8_encode with mb_convert_encoding
|
|
$signature = base64_encode(hash_hmac('sha256', mb_convert_encoding($stringToSign, 'UTF-8'), base64_decode($accountKey), true));
|
|
|
|
// Build the SAS query string
|
|
$sasString = implode('&', array_map(function($key, $value) {
|
|
return "$key=" . urlencode($value);
|
|
}, array_keys($sasComponents), $sasComponents));
|
|
|
|
// Add the signature
|
|
$sasToken = $sasString . "&sig=" . urlencode($signature);
|
|
|
|
logMessage("Generated SAS token for $resourceType (partial): " . substr($sasToken, 0, 30) . "...");
|
|
|
|
return $sasToken;
|
|
}
|
|
|
|
// If we can't extract SAS token or account details, log an error
|
|
logMessage("Could not extract valid SAS token or account details from connection string", 'ERROR');
|
|
|
|
// Return empty string, which will likely cause an error later, but at least the app won't crash here
|
|
return '';
|
|
}
|
|
|
|
/**
|
|
* Ensure the containers exist
|
|
*
|
|
* @return bool Returns true if containers exist or were created, false on failure
|
|
*/
|
|
public function ensureContainersExist() {
|
|
logMessage("Verifying Azure Storage containers: {$this->sourceContainer} and {$this->targetContainer}");
|
|
|
|
try {
|
|
// Extract account name from BlobEndpoint in the connection string
|
|
preg_match('/BlobEndpoint=https:\/\/([^\.]+)\.blob\.core\.windows\.net/', $this->connectionString, $nameMatches);
|
|
|
|
// For compatibility with the new connection string format
|
|
$accountName = !empty($nameMatches[1]) ? $nameMatches[1] : 'opticaltranslations';
|
|
|
|
if (empty($accountName)) {
|
|
logMessage("Failed to extract account name from connection string, using default", 'WARNING');
|
|
}
|
|
|
|
// Get SAS token for container operations
|
|
$sasToken = defined('AZURE_STORAGE_SAS_TOKEN') ? AZURE_STORAGE_SAS_TOKEN :
|
|
$this->generateSasToken($this->sourceContainer, '', 'racwdl');
|
|
|
|
// Check if containers exist and create them if they don't
|
|
$containersToCheck = [$this->sourceContainer, $this->targetContainer];
|
|
$containersExist = true;
|
|
|
|
foreach ($containersToCheck as $container) {
|
|
logMessage("Checking container: $container");
|
|
|
|
// Method 1: Check if container exists using Azure Storage REST API with SAS token
|
|
$containerUrl = "https://$accountName.blob.core.windows.net/$container?restype=container&$sasToken";
|
|
|
|
$ch = curl_init();
|
|
curl_setopt($ch, CURLOPT_URL, $containerUrl);
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($ch, CURLOPT_NOBODY, true);
|
|
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // Disable SSL verification for testing
|
|
|
|
curl_exec($ch);
|
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
curl_close($ch);
|
|
|
|
logMessage("Container check response code: $httpCode");
|
|
|
|
if ($httpCode >= 200 && $httpCode < 300) {
|
|
logMessage("Container $container exists");
|
|
continue; // Container exists, move to next container
|
|
}
|
|
|
|
// Method 2: Try listing the container contents
|
|
$listUrl = "https://$accountName.blob.core.windows.net/$container?restype=container&comp=list&$sasToken";
|
|
|
|
$ch = curl_init();
|
|
curl_setopt($ch, CURLOPT_URL, $listUrl);
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // Disable SSL verification for testing
|
|
|
|
$response = curl_exec($ch);
|
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
curl_close($ch);
|
|
|
|
logMessage("Container list response code: $httpCode");
|
|
|
|
if ($httpCode >= 200 && $httpCode < 300) {
|
|
logMessage("Container $container exists (confirmed by listing)");
|
|
continue; // Container exists, move to next container
|
|
}
|
|
|
|
// If all checks fail, assume container doesn't exist and try to create it
|
|
logMessage("Container $container doesn't exist or isn't accessible, creating...");
|
|
|
|
// Create container API call with SAS token
|
|
$createContainerUrl = "https://$accountName.blob.core.windows.net/$container?restype=container&$sasToken";
|
|
|
|
$ch = curl_init();
|
|
curl_setopt($ch, CURLOPT_URL, $createContainerUrl);
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT');
|
|
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // Disable SSL verification for testing
|
|
|
|
$response = curl_exec($ch);
|
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
curl_close($ch);
|
|
|
|
if ($httpCode >= 200 && $httpCode < 300) {
|
|
logMessage("Container $container created successfully");
|
|
} else {
|
|
logMessage("Failed to create container $container: HTTP $httpCode, Response: $response", 'ERROR');
|
|
$containersExist = false;
|
|
}
|
|
}
|
|
|
|
if ($containersExist) {
|
|
logMessage("Container verification completed successfully");
|
|
return true;
|
|
} else {
|
|
// For this application, we'll try to proceed even if container creation fails
|
|
// Microsoft's service might still be able to access the containers
|
|
logMessage("Container verification had issues, but we'll proceed anyway", 'WARNING');
|
|
return true;
|
|
}
|
|
} catch (Exception $e) {
|
|
logMessage("Error verifying containers: " . $e->getMessage(), 'ERROR');
|
|
// For this application, we'll try to proceed even if verification fails
|
|
// Microsoft's service might still be able to access the containers
|
|
logMessage("Container verification failed with exception, but we'll proceed anyway", 'WARNING');
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
?>
|