volt-newsroom-scraper-report/web/generate.php
DJP 63459a15f6 Fix apache_setenv error for non-mod_php environments
- Commented out apache_setenv() which isn't available in all PHP configs
- Function was optional for output buffering control
- Other buffering controls still in place
2026-01-07 13:24:38 -05:00

130 lines
3.2 KiB
PHP

<?php
/**
* Backend script to generate newsroom reports
* Runs Python script and streams output in real-time
*/
require_once __DIR__ . '/config.php';
require_once __DIR__ . '/AuthMiddleware.php';
$auth = new AuthMiddleware();
$user = $auth->requireAuth();
// Get POST data
$input = json_decode(file_get_contents('php://input'), true);
if (!$input || !isset($input['date'])) {
http_response_code(400);
echo json_encode(['success' => false, 'error' => 'Date is required']);
exit;
}
$date = trim($input['date']);
// Validate date format (basic check)
if (!preg_match('/^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday),\s+\w+\s+\d+$/', $date)) {
http_response_code(400);
echo json_encode(['success' => false, 'error' => 'Invalid date format. Use: "Monday, January 6"']);
exit;
}
// Disable ALL output buffering for real-time streaming
// @apache_setenv('no-gzip', 1); // Not available in all PHP configs
@ini_set('zlib.output_compression', 0);
@ini_set('implicit_flush', 1);
@ini_set('output_buffering', 0);
ob_implicit_flush(1);
// Clear all output buffers
while (ob_get_level()) {
ob_end_clean();
}
// Start fresh with no buffering
ob_start();
header('Content-Type: text/plain; charset=utf-8');
header('X-Accel-Buffering: no');
header('Cache-Control: no-cache');
// End the buffer and flush
ob_end_flush();
flush();
// Change to project directory
chdir(PROJECT_ROOT);
// Build command to run Python script with date argument
$pythonBin = PYTHON_VENV;
$scriptPath = PYTHON_SCRIPT;
// Escape arguments
$escapedDate = escapeshellarg($date);
// Command to run (activate venv and run script with date)
$command = "{$pythonBin} {$scriptPath} {$escapedDate} 2>&1";
echo "Starting report generation for: {$date}\n";
echo str_repeat('=', 60) . "\n\n";
flush();
// Execute command and stream output
$process = popen($command, 'r');
if (!$process) {
echo "\nERROR: Failed to start Python process\n";
echo json_encode(['success' => false, 'error' => 'Failed to start process']);
exit;
}
$output = '';
$lineCount = 0;
while (!feof($process)) {
$line = fgets($process);
if ($line !== false) {
echo $line;
// Force flush output to browser
if (ob_get_level() > 0) {
ob_flush();
}
flush();
$output .= $line;
$lineCount++;
// Send keep-alive every 10 lines
if ($lineCount % 10 == 0) {
echo str_pad('', 4096) . "\n";
flush();
}
}
// Small delay to prevent CPU spinning
usleep(10000); // 10ms
}
$returnCode = pclose($process);
// Parse output to find generated filename
$filename = null;
if (preg_match('/Report saved to:.*\/(Newsroom_Report_[\d-]+\.pdf)/', $output, $matches)) {
$filename = $matches[1];
}
// Send final status
echo "\n" . str_repeat('=', 60) . "\n";
if ($returnCode === 0 && $filename) {
echo "\n" . json_encode([
'success' => true,
'filename' => $filename,
'message' => 'Report generated successfully'
]) . "\n";
} else {
echo "\n" . json_encode([
'success' => false,
'error' => 'Report generation failed (exit code: ' . $returnCode . ')',
'returnCode' => $returnCode
]) . "\n";
}