msft-trns/process.php
2026-03-02 17:21:57 +00:00

265 lines
No EOL
12 KiB
PHP

<?php
// Enable error reporting for debugging
ini_set('display_errors', 1);
ini_set('display_startup_errors', 1);
error_reporting(E_ALL);
include 'config.php';
include 'logger.php';
include 'azure_storage.php';
// Create log file if it doesn't exist
$logFile = 'app.log';
if (!file_exists($logFile)) {
file_put_contents($logFile, "Log started at " . date('Y-m-d H:i:s') . PHP_EOL);
chmod($logFile, 0666); // Make writable by web server
}
header('Content-Type: application/json');
logMessage("Received request to process.php");
if ($_SERVER['REQUEST_METHOD'] === 'POST') {
try {
logMessage("Received POST request");
if (!isset($_FILES['file'])) {
logMessage("No file received", 'ERROR');
echo json_encode(['error' => 'No file received']);
exit;
}
$file = $_FILES['file'];
logMessage("File received: " . $file['name']);
// Check for upload errors
if ($file['error'] !== UPLOAD_ERR_OK) {
$uploadErrors = [
1 => 'The uploaded file exceeds the upload_max_filesize directive in php.ini',
2 => 'The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form',
3 => 'The uploaded file was only partially uploaded',
4 => 'No file was uploaded',
6 => 'Missing a temporary folder',
7 => 'Failed to write file to disk',
8 => 'A PHP extension stopped the file upload'
];
$errorMessage = isset($uploadErrors[$file['error']]) ? $uploadErrors[$file['error']] : 'Unknown upload error';
logMessage("File upload error: " . $errorMessage, 'ERROR');
echo json_encode(['error' => 'File upload error: ' . $errorMessage]);
exit;
}
// Check if the file exists and is readable
if (!file_exists($file['tmp_name']) || !is_readable($file['tmp_name'])) {
logMessage("Temporary file does not exist or is not readable: " . $file['tmp_name'], 'ERROR');
echo json_encode(['error' => 'Temporary file does not exist or is not readable']);
exit;
}
$source_lang = $_POST['source_lang'] ?? '';
$target_lang = $_POST['target_lang'] ?? '';
$formality = $_POST['formality'] ?? 'default';
logMessage("Source Language: $source_lang");
logMessage("Target Language: $target_lang");
logMessage("Formality: $formality");
// Initialize Azure Storage Helper
logMessage("Initializing Azure Storage Helper");
$azureStorage = new AzureStorageHelper();
// Ensure containers exist
logMessage("Ensuring Azure Storage containers exist");
if (!$azureStorage->ensureContainersExist()) {
logMessage("Failed to verify Azure Storage containers", 'ERROR');
echo json_encode(['error' => 'Failed to verify Azure Storage containers']);
exit;
}
// Upload file to Azure Blob Storage
logMessage("Uploading file to Azure Blob Storage: " . $file['name']);
$sourceBlob = $azureStorage->uploadSourceFile($file['tmp_name'], $file['name']);
if (!$sourceBlob) {
logMessage("Failed to upload file to Azure Storage", 'ERROR');
echo json_encode(['error' => 'Failed to upload file to Azure Storage']);
exit;
}
// Set up target blob for the translated file
logMessage("Setting up target blob for translated file");
$targetBlob = $azureStorage->getTargetBlob($file['name'], $target_lang);
if (!$targetBlob) {
logMessage("Failed to set up target blob", 'ERROR');
echo json_encode(['error' => 'Failed to set up target blob']);
exit;
}
// Prepare the request to Microsoft Document Translation API
$batchTranslationUrl = MS_API_ENDPOINT . '/translator/document/batches?api-version=' . MS_API_VERSION;
logMessage("Prepared translation API URL: $batchTranslationUrl");
// Format the source and target language codes correctly
$formattedSourceLang = $source_lang !== 'AUTO' ? strtolower($source_lang) : '';
$formattedTargetLang = strtolower($target_lang);
// Handle special cases for Microsoft's language codes
if ($formattedTargetLang === 'en-gb') {
$formattedTargetLang = 'en-GB';
} else if ($formattedTargetLang === 'en-us') {
$formattedTargetLang = 'en-US';
} else if ($formattedTargetLang === 'pt-pt') {
$formattedTargetLang = 'pt-PT';
} else if ($formattedTargetLang === 'pt-br') {
$formattedTargetLang = 'pt-BR';
}
// For Microsoft Document Translation, we need container-level URLs
// The sourceUrl should be the container URL with SAS token
// Extract account name from connection string
preg_match('/AccountName=([^;]+)/', AZURE_STORAGE_CONNECTION_STRING, $nameMatches);
$accountName = !empty($nameMatches[1]) ? $nameMatches[1] : 'opticaltranslations';
// Use the container URLs directly with SAS tokens
$sasToken = defined('AZURE_STORAGE_SAS_TOKEN') ? AZURE_STORAGE_SAS_TOKEN : $sourceBlob['sas_token'];
$sourceUrl = $sourceBlob['blob_url'] . '?' . $sasToken;
$targetUrl = $targetBlob['blob_url'] . '?' . $sasToken;
logMessage("Source Container URL with SAS (partial): " . substr($sourceUrl, 0, 100) . "...");
logMessage("Target Container URL with SAS (partial): " . substr($targetUrl, 0, 100) . "...");
// Let's try a different approach with direct blob URLs
// Extract account name from connection string
preg_match('/AccountName=([^;]+)/', AZURE_STORAGE_CONNECTION_STRING, $nameMatches);
$accountName = !empty($nameMatches[1]) ? $nameMatches[1] : 'opticaltranslations';
$sasToken = defined('AZURE_STORAGE_SAS_TOKEN') ? AZURE_STORAGE_SAS_TOKEN : '';
// Format the blob name to be URL-safe (replace spaces with %20)
$safeBlobName = str_replace(' ', '%20', $sourceBlob['blob_name']);
// Direct blob URLs with SAS token - make sure the URL is properly formed
$sourceDirectUrl = "https://$accountName.blob.core.windows.net/".AZURE_STORAGE_CONTAINER_SOURCE."/$safeBlobName?$sasToken";
// Target folder URL - make sure to include the container name
$targetDirectUrl = "https://$accountName.blob.core.windows.net/".AZURE_STORAGE_CONTAINER_TARGET."?$sasToken";
// Use container-level URLs with filter parameter, similar to the test script
// Instead of pointing directly to the blob, point to the container and specify the blob with filter
$sourceContainerUrl = "https://$accountName.blob.core.windows.net/".AZURE_STORAGE_CONTAINER_SOURCE."?$sasToken";
$targetContainerUrl = "https://$accountName.blob.core.windows.net/".AZURE_STORAGE_CONTAINER_TARGET."?$sasToken";
// Modified request body for Document Translation API
$requestBody = [
'inputs' => [
[
'source' => [
'sourceUrl' => $sourceContainerUrl,
'storageSource' => 'AzureBlob',
'language' => $formattedSourceLang ?: 'en', // Default to English if auto-detect
'filter' => [
'prefix' => $sourceBlob['blob_name'],
'includeSubfolders' => false
]
],
'targets' => [
[
'targetUrl' => $targetContainerUrl,
'storageSource' => 'AzureBlob',
'language' => $formattedTargetLang,
'category' => 'general'
]
]
]
]
];
// Log the full request body for debugging
logMessage("Full request body: " . json_encode($requestBody, JSON_PRETTY_PRINT));
// Add source language if specified (not AUTO)
if (!empty($formattedSourceLang)) {
$requestBody['inputs'][0]['source']['language'] = $formattedSourceLang;
}
// Set formality if supported by Microsoft Translator
if ($formality === 'more' || $formality === 'less') {
$requestBody['inputs'][0]['targets'][0]['formality'] = ($formality === 'more') ? 'Formal' : 'Informal';
}
logMessage("Prepared request body: " . json_encode($requestBody));
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $batchTranslationUrl);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Ocp-Apim-Subscription-Key: ' . MS_API_KEY,
'Ocp-Apim-Subscription-Region: ' . MS_API_REGION,
'Content-Type: application/json'
]);
$jsonBody = json_encode($requestBody);
curl_setopt($ch, CURLOPT_POSTFIELDS, $jsonBody);
logMessage("Sending request to Microsoft Translator API");
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$curlError = curl_error($ch);
logMessage("CURL Info: " . json_encode(curl_getinfo($ch)));
logApiCall($batchTranslationUrl, 'POST', [
'Ocp-Apim-Subscription-Key: ' . MS_API_KEY,
'Ocp-Apim-Subscription-Region: ' . MS_API_REGION,
'Content-Type: application/json'
], $requestBody, $response, $httpCode);
if ($response === false) {
$error = curl_error($ch);
logMessage("cURL Error: " . $error, 'ERROR');
echo json_encode(['error' => 'cURL error: ' . $error]);
} else {
logMessage("API Response Code: $httpCode");
logMessage("API Response: $response");
if ($httpCode >= 400) {
logMessage("HTTP Error Response: $httpCode - $response", 'ERROR');
echo json_encode(['error' => "HTTP Error: $httpCode", 'details' => $response]);
} else {
$result = json_decode($response, true);
if (json_last_error() === JSON_ERROR_NONE) {
// Store the blob information for later use
if (isset($result['id'])) {
// Create a custom response that matches what our frontend expects
$translationResponse = [
'document_id' => $result['id'], // Microsoft's batch ID
'document_key' => json_encode([
'source_blob' => $sourceBlob['blob_name'],
'target_blob' => $targetBlob['blob_name'],
'ms_response' => $result
])
];
echo json_encode($translationResponse);
logMessage("Translation job submitted successfully with ID: " . $result['id']);
} else {
logMessage("Microsoft Translator API error: " . json_encode($result), 'ERROR');
echo json_encode(['error' => 'Microsoft Translator API response missing ID', 'details' => $result]);
}
} else {
logMessage("Invalid JSON response: " . json_last_error_msg(), 'ERROR');
echo json_encode(['error' => 'Invalid JSON response: ' . json_last_error_msg(), 'raw_response' => $response]);
}
}
}
curl_close($ch);
} catch (Exception $e) {
logMessage("Exception: " . $e->getMessage() . "\nStack trace: " . $e->getTraceAsString(), 'ERROR');
echo json_encode(['error' => 'Server error: ' . $e->getMessage()]);
}
} else {
logMessage("Received non-POST request", 'ERROR');
echo json_encode(['error' => 'Invalid request method']);
}