175 lines
No EOL
5.5 KiB
Bash
Executable file
175 lines
No EOL
5.5 KiB
Bash
Executable file
#!/bin/bash
|
|
|
|
# Enhanced script to extract user emails and prompts from veo-video-generator systemd logs
|
|
# Usage: ./extract_user_logs_robust.sh [output_file.csv] [service_name] [date_range]
|
|
# Examples:
|
|
# ./extract_user_logs_robust.sh usage_report.csv
|
|
# ./extract_user_logs_robust.sh usage_report.csv veo-video-generator "--since=2024-06-01"
|
|
|
|
# Set defaults
|
|
OUTPUT_FILE="${1:-video_generation_usage.csv}"
|
|
SERVICE_NAME="${2:-veo-video-generator}"
|
|
DATE_RANGE="${3:-}"
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Function to print colored output
|
|
print_status() {
|
|
echo -e "${GREEN}[INFO]${NC} $1"
|
|
}
|
|
|
|
print_warning() {
|
|
echo -e "${YELLOW}[WARN]${NC} $1"
|
|
}
|
|
|
|
print_error() {
|
|
echo -e "${RED}[ERROR]${NC} $1"
|
|
}
|
|
|
|
# Check if required tools are installed
|
|
check_dependencies() {
|
|
local missing_deps=()
|
|
|
|
if ! command -v journalctl >/dev/null 2>&1; then
|
|
missing_deps+=("systemd (journalctl)")
|
|
fi
|
|
|
|
if ! command -v jq >/dev/null 2>&1; then
|
|
missing_deps+=("jq")
|
|
fi
|
|
|
|
if [ ${#missing_deps[@]} -ne 0 ]; then
|
|
print_error "Missing required dependencies: ${missing_deps[*]}"
|
|
print_error "Please install missing dependencies and try again"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# Function to validate JSON and extract fields safely
|
|
extract_json_fields() {
|
|
local json_string="$1"
|
|
local timestamp="$2"
|
|
|
|
# Try to validate JSON first
|
|
if echo "$json_string" | jq empty 2>/dev/null; then
|
|
# Extract fields using jq
|
|
local user_email=$(echo "$json_string" | jq -r '.user_email // empty' 2>/dev/null)
|
|
local prompt=$(echo "$json_string" | jq -r '.prompt // empty' 2>/dev/null)
|
|
local video_length=$(echo "$json_string" | jq -r '.video_length_sec // empty' 2>/dev/null)
|
|
local aspect_ratio=$(echo "$json_string" | jq -r '.aspect_ratio // empty' 2>/dev/null)
|
|
local person_generation=$(echo "$json_string" | jq -r '.person_generation // empty' 2>/dev/null)
|
|
|
|
# Clean up prompt for CSV (replace commas and quotes)
|
|
prompt=$(echo "$prompt" | sed 's/,/;/g' | sed 's/"/\\"/g')
|
|
|
|
# Only output if we have essential fields
|
|
if [ -n "$user_email" ] && [ "$user_email" != "null" ]; then
|
|
echo "\"$timestamp\",\"$user_email\",\"$prompt\",\"$video_length\",\"$aspect_ratio\",\"$person_generation\""
|
|
return 0
|
|
fi
|
|
else
|
|
print_warning "Invalid JSON found at $timestamp: $json_string"
|
|
fi
|
|
|
|
return 1
|
|
}
|
|
|
|
print_status "Starting log extraction..."
|
|
print_status "Service: $SERVICE_NAME"
|
|
print_status "Output file: $OUTPUT_FILE"
|
|
if [ -n "$DATE_RANGE" ]; then
|
|
print_status "Date range: $DATE_RANGE"
|
|
fi
|
|
|
|
# Check dependencies
|
|
check_dependencies
|
|
|
|
# Check if service exists
|
|
if ! systemctl list-units --full -a | grep -q "$SERVICE_NAME.service"; then
|
|
print_warning "Service '$SERVICE_NAME' not found in systemctl list-units"
|
|
print_warning "This might be normal if the service is not currently loaded"
|
|
fi
|
|
|
|
# Create CSV header
|
|
echo "timestamp,user_email,prompt,video_length_sec,aspect_ratio,person_generation" > "$OUTPUT_FILE"
|
|
|
|
# Build journalctl command
|
|
JOURNAL_CMD="journalctl -u $SERVICE_NAME --no-pager --output=short-iso"
|
|
if [ -n "$DATE_RANGE" ]; then
|
|
JOURNAL_CMD="$JOURNAL_CMD $DATE_RANGE"
|
|
fi
|
|
|
|
print_status "Extracting logs... (this may take a while for large log files)"
|
|
|
|
# Counter for processing
|
|
processed_lines=0
|
|
valid_records=0
|
|
|
|
# Process logs
|
|
eval "$JOURNAL_CMD" | grep "Raw JSON data received:" | while IFS= read -r line; do
|
|
processed_lines=$((processed_lines + 1))
|
|
|
|
# Show progress every 100 lines
|
|
if [ $((processed_lines % 100)) -eq 0 ]; then
|
|
print_status "Processed $processed_lines log lines..."
|
|
fi
|
|
|
|
# Extract timestamp (first field)
|
|
timestamp=$(echo "$line" | awk '{print $1}')
|
|
|
|
# Extract JSON part - handle various formats
|
|
json_part=""
|
|
if [[ "$line" =~ Raw\ JSON\ data\ received:\ (.+)$ ]]; then
|
|
json_part="${BASH_REMATCH[1]}"
|
|
else
|
|
# Fallback extraction method
|
|
json_part=$(echo "$line" | sed -n "s/.*Raw JSON data received: \(.*\)/\1/p")
|
|
fi
|
|
|
|
# Process if we found JSON
|
|
if [ -n "$json_part" ]; then
|
|
if extract_json_fields "$json_part" "$timestamp" >> "$OUTPUT_FILE"; then
|
|
valid_records=$((valid_records + 1))
|
|
fi
|
|
fi
|
|
done
|
|
|
|
# Get final counts (need to do this outside the while loop due to subshell)
|
|
record_count=$(tail -n +2 "$OUTPUT_FILE" | wc -l)
|
|
|
|
print_status "Processing complete!"
|
|
print_status "Total valid records extracted: $record_count"
|
|
print_status "Output saved to: $OUTPUT_FILE"
|
|
|
|
if [ $record_count -eq 0 ]; then
|
|
print_warning "No records found. This could mean:"
|
|
print_warning " - No logs exist for the specified service/date range"
|
|
print_warning " - The log format has changed"
|
|
print_warning " - The service name is incorrect"
|
|
exit 1
|
|
fi
|
|
|
|
# Show summary statistics
|
|
echo ""
|
|
print_status "=== SUMMARY REPORT ==="
|
|
|
|
# Unique users
|
|
echo "Unique users found:"
|
|
tail -n +2 "$OUTPUT_FILE" | cut -d',' -f2 | sed 's/"//g' | sort | uniq -c | sort -nr
|
|
|
|
# Date range
|
|
echo ""
|
|
echo "Date range of requests:"
|
|
tail -n +2 "$OUTPUT_FILE" | cut -d',' -f1 | sed 's/"//g' | sort | head -1 | xargs -I {} echo "First: {}"
|
|
tail -n +2 "$OUTPUT_FILE" | cut -d',' -f1 | sed 's/"//g' | sort | tail -1 | xargs -I {} echo "Last: {}"
|
|
|
|
# Most active users
|
|
echo ""
|
|
echo "Top 5 most active users:"
|
|
tail -n +2 "$OUTPUT_FILE" | cut -d',' -f2 | sed 's/"//g' | sort | uniq -c | sort -nr | head -5
|
|
|
|
print_status "Report generation complete!" |