This commit is contained in:
Manish Tanwar 2025-11-13 18:45:59 +05:30
parent f3186276c4
commit 56fe6437b6
4 changed files with 159 additions and 95 deletions

View file

@ -47,7 +47,10 @@
"Read(//tmp/**)",
"WebFetch(domain:docs.cloud.google.com)",
"Bash(journalctl:*)",
"Bash(sudo systemctl status:*)"
"Bash(sudo systemctl status:*)",
"WebFetch(domain:github.com)",
"WebFetch(domain:pypi.org)",
"Bash(lsof:*)"
],
"deny": []
}

View file

@ -16,7 +16,7 @@ google-api-core==2.25.0rc0
google-api-python-client==2.169.0
google-auth==2.40.0
google-auth-httplib2==0.2.0
google-genai>=1.44.0
google-genai>=1.49.0
googleapis-common-protos==1.70.0
grpcio==1.71.0
grpcio-status==1.71.0

View file

@ -228,96 +228,71 @@ class VideoProcessor:
except Exception as val_err:
logger.warning(f"Could not validate video file: {str(val_err)} - proceeding anyway")
# Use different approach based on file size
# Small files (< 10MB): use inline base64 data (faster, no upload wait)
# Large files (>= 10MB): use file upload API (handles larger files)
# Note: Base64 adds ~37% overhead, so 10MB file = ~13.7MB base64
SIZE_THRESHOLD_MB = 10
uploaded_file = None
# SIMPLIFIED APPROACH: Use inline base64 for all files
# File Upload API has known issues (KeyError: 'file' in SDK 1.45.0-1.49.0)
# Gemini API REQUEST limit: 1GB (1073741824 bytes) - confirmed by testing
# Base64 adds ~37% overhead, so we validate encoded size
if file_size_mb < SIZE_THRESHOLD_MB:
# Small file: Use base64 encoding for inline data
logger.info(f"File < {SIZE_THRESHOLD_MB}MB, using inline base64 data")
with open(video_path, "rb") as video_file_obj:
video_data = video_file_obj.read()
video_base64 = base64.b64encode(video_data).decode('utf-8')
# Calculate estimated encoded size (base64 overhead ~37%)
BASE64_OVERHEAD = 1.37
estimated_encoded_mb = file_size_mb * BASE64_OVERHEAD
API_LIMIT_MB = 1000 # 1GB API request limit
logger.info(f"Base64 encoding complete. Size: {len(video_base64)} characters")
# Create the content parts using inline data
prompt_parts = [
{"text": prompt},
{"inline_data": {
"mime_type": mime_type,
"data": video_base64
}}
]
else:
# Large file: Use file upload API
logger.info(f"File >= {SIZE_THRESHOLD_MB}MB, using file upload API")
upload_start = time.time()
uploaded_file = self.client.files.upload(
file=video_path
# Check if encoded size would exceed API limit
if estimated_encoded_mb > API_LIMIT_MB:
error_msg = (
f"Video chunk is too large: {file_size_mb:.2f}MB raw, "
f"~{estimated_encoded_mb:.1f}MB after base64 encoding. "
f"This exceeds the {API_LIMIT_MB}MB (1GB) API limit. "
f"The video needs to be split into smaller chunks. "
f"Target chunk size: 500MB (accounting for variable bitrate)."
)
logger.info(f"Upload complete in {time.time() - upload_start:.1f}s. File URI: {uploaded_file.uri}")
logger.info(f"Initial file state: {uploaded_file.state}")
logger.error(error_msg)
result["message"] = error_msg
return result
# Wait for file to be processed with timeout
max_wait_time = 300 # 5 minutes timeout
wait_start = time.time()
check_count = 0
# Warn if approaching API limit (>900MB encoded)
if estimated_encoded_mb > 900:
logger.warning(
f"Chunk is large: {file_size_mb:.1f}MB raw, "
f"~{estimated_encoded_mb:.1f}MB encoded. "
f"Approaching {API_LIMIT_MB}MB API limit. Processing may be slower."
)
while uploaded_file.state == "PROCESSING":
check_count += 1
elapsed = time.time() - wait_start
# Use base64 encoding for all files (reliable and works consistently)
logger.info(f"Encoding video as base64 (file size: {file_size_mb:.2f}MB)")
encode_start = time.time()
if elapsed > max_wait_time:
error_msg = f"File processing timeout after {max_wait_time}s. File may be too large or corrupted."
logger.error(error_msg)
# Try to delete the file
try:
self.client.files.delete(name=uploaded_file.name)
except:
pass
result["message"] = error_msg
return result
with open(video_path, "rb") as video_file_obj:
video_data = video_file_obj.read()
video_base64 = base64.b64encode(video_data).decode('utf-8')
logger.info(f"File is still processing (check #{check_count}, {elapsed:.0f}s elapsed), waiting...")
time.sleep(3)
encode_time = time.time() - encode_start
encoded_size_mb = len(video_base64) / (1024 * 1024)
logger.info(f"Base64 encoding complete in {encode_time:.2f}s. Encoded size: {encoded_size_mb:.2f}MB ({len(video_base64)} chars)")
try:
uploaded_file = self.client.files.get(name=uploaded_file.name)
logger.info(f"Updated file state: {uploaded_file.state}")
except Exception as status_err:
logger.error(f"Error checking file status: {str(status_err)}")
# Wait a bit longer and try again
time.sleep(5)
try:
uploaded_file = self.client.files.get(name=uploaded_file.name)
except Exception as retry_err:
error_msg = f"Failed to check file upload status: {str(retry_err)}"
logger.error(error_msg)
result["message"] = error_msg
return result
# Verify actual encoded size is within API limits (1GB)
if encoded_size_mb > API_LIMIT_MB:
error_msg = (
f"Encoded video size ({encoded_size_mb:.2f}MB) exceeds Gemini API limit of {API_LIMIT_MB}MB (1GB). "
f"Original file: {file_size_mb:.2f}MB. "
f"This chunk needs further splitting. Target: 500MB raw chunks. "
f"Variable bitrate caused larger-than-expected chunk size."
)
logger.error(error_msg)
result["message"] = error_msg
return result
if uploaded_file.state != "ACTIVE":
error_msg = f"File upload failed. State: {uploaded_file.state}"
logger.error(error_msg)
logger.error("This may indicate the video is corrupted, unsupported format, or contains invalid data")
result["message"] = error_msg
return result
# Create the content parts using inline data
prompt_parts = [
{"text": prompt},
{"inline_data": {
"mime_type": mime_type,
"data": video_base64
}}
]
logger.info("File is ACTIVE and ready for processing")
# Create content parts using file reference
prompt_parts = [
{"text": prompt},
{"file_data": {
"file_uri": uploaded_file.uri,
"mime_type": mime_type
}}
]
uploaded_file = None # Not using File Upload API
# Rate limiting: Wait to avoid hitting API limits
# Free tier: 5 RPM, so minimum 12 seconds between requests

View file

@ -72,32 +72,102 @@ class VideoSplitter:
logger.error(f"Error detecting video duration: {str(e)}")
return None
def needs_splitting(self, video_path: str) -> bool:
def needs_splitting(self, video_path: str, max_chunk_size_mb: float = 500) -> bool:
"""
Check if a video needs to be split based on its duration.
Check if a video needs to be split based on duration OR file size.
A video needs splitting if:
1. Duration > 54 minutes (Gemini API time limit), OR
2. File size > 500MB (conservative target to handle variable bitrate)
With 30% variance: 500MB × 1.3 = 650MB max
After base64 encoding: 650MB × 1.37 = 891MB (well under 1GB API limit)
Args:
video_path: Path to the video file
max_chunk_size_mb: Maximum chunk size in MB (default: 500MB)
Returns:
True if video duration exceeds chunk duration, False otherwise
True if video needs splitting based on duration or size, False otherwise
"""
duration = self.get_video_duration(video_path)
if duration is None:
logger.warning("Could not determine if video needs splitting")
logger.warning("Could not determine video duration for splitting check")
return False
needs_split = duration > self.chunk_duration_seconds
# Check duration
needs_split_duration = duration > self.chunk_duration_seconds
# Check file size
file_size_bytes = os.path.getsize(video_path)
file_size_mb = file_size_bytes / (1024 * 1024)
needs_split_size = file_size_mb > max_chunk_size_mb
needs_split = needs_split_duration or needs_split_size
if needs_split:
logger.info(f"Video needs splitting: {duration/60:.2f} min > {self.chunk_duration_minutes} min")
reasons = []
if needs_split_duration:
reasons.append(f"duration {duration/60:.2f} min > {self.chunk_duration_minutes} min")
if needs_split_size:
reasons.append(f"file size {file_size_mb:.1f}MB > {max_chunk_size_mb}MB")
logger.info(f"Video needs splitting: {' AND '.join(reasons)}")
else:
logger.info(f"Video does not need splitting: {duration/60:.2f} min <= {self.chunk_duration_minutes} min")
logger.info(f"Video does not need splitting: duration {duration/60:.2f} min <= {self.chunk_duration_minutes} min, size {file_size_mb:.1f}MB <= {max_chunk_size_mb:.0f}MB")
return needs_split
def calculate_optimal_chunk_duration(self, video_path: str, max_chunk_size_mb: float = 500) -> int:
"""
Calculate optimal chunk duration based on file size and video duration
to ensure chunks don't exceed a maximum file size.
IMPORTANT: Gemini API has a 1GB request payload limit.
Conservative target of 500MB accounts for variable bitrate (VBR).
With 30% VBR variance: 500MB × 1.3 = 650MB max
After base64 encoding: 650MB × 1.37 = 891MB (under 1GB limit)
Args:
video_path: Path to the video file
max_chunk_size_mb: Maximum desired chunk size in MB (default: 500MB)
Returns:
Optimal chunk duration in seconds
"""
duration = self.get_video_duration(video_path)
if duration is None:
logger.warning("Could not determine duration, using default chunk duration")
return self.chunk_duration_seconds
# Get file size
file_size_bytes = os.path.getsize(video_path)
file_size_mb = file_size_bytes / (1024 * 1024)
file_size_gb = file_size_bytes / (1024 * 1024 * 1024)
# Calculate average bitrate (bytes per second)
avg_bitrate = file_size_bytes / duration
# Calculate chunk duration that would result in max_chunk_size_mb
max_chunk_size_bytes = max_chunk_size_mb * 1024 * 1024
optimal_duration = max_chunk_size_bytes / avg_bitrate
# Use the smaller of optimal duration or default chunk duration
final_duration = min(optimal_duration, self.chunk_duration_seconds)
# Ensure minimum chunk duration of 5 minutes (300 seconds)
final_duration = max(final_duration, 300)
logger.info(
f"Calculated optimal chunk duration: {final_duration:.0f}s ({final_duration/60:.1f} min) "
f"based on file size {file_size_mb:.1f}MB ({file_size_gb:.2f}GB) and duration {duration/60:.1f} min. "
f"Target chunk size: {max_chunk_size_mb}MB"
)
return int(final_duration)
def split_video(self, video_path: str, output_dir: Optional[str] = None) -> List[str]:
"""
Split a video into multiple chunks based on the configured chunk duration.
Automatically adjusts chunk duration if file size would result in chunks > 1.2GB.
Args:
video_path: Path to the video file to split
@ -110,6 +180,9 @@ class VideoSplitter:
if duration is None:
raise ValueError("Could not determine video duration")
# Calculate optimal chunk duration based on file size
chunk_duration = self.calculate_optimal_chunk_duration(video_path)
# Use temp directory if none specified
if output_dir is None:
output_dir = tempfile.mkdtemp(prefix="video_chunks_")
@ -118,17 +191,17 @@ class VideoSplitter:
os.makedirs(output_dir, exist_ok=True)
# Calculate number of chunks needed
num_chunks = int(duration / self.chunk_duration_seconds) + (
1 if duration % self.chunk_duration_seconds > 0 else 0
num_chunks = int(duration / chunk_duration) + (
1 if duration % chunk_duration > 0 else 0
)
logger.info(f"Splitting video into {num_chunks} chunks")
logger.info(f"Splitting video into {num_chunks} chunks (chunk duration: {chunk_duration/60:.1f} min)")
chunk_paths = []
video_basename = os.path.splitext(os.path.basename(video_path))[0]
video_extension = os.path.splitext(video_path)[1]
for i in range(num_chunks):
start_time = i * self.chunk_duration_seconds
start_time = i * chunk_duration
chunk_output = os.path.join(
output_dir,
f"{video_basename}_chunk_{i+1:02d}{video_extension}"
@ -140,7 +213,7 @@ class VideoSplitter:
# Split the video using ffmpeg
# Using -t to specify duration of this chunk
# Using -c copy for fast processing (no re-encoding)
stream = ffmpeg.input(video_path, ss=start_time, t=self.chunk_duration_seconds)
stream = ffmpeg.input(video_path, ss=start_time, t=chunk_duration)
stream = ffmpeg.output(
stream,
chunk_output,
@ -151,7 +224,20 @@ class VideoSplitter:
ffmpeg.run(stream, capture_stdout=True, capture_stderr=True, overwrite_output=True)
chunk_paths.append(chunk_output)
logger.info(f"Successfully created chunk {i+1}/{num_chunks}")
# Log chunk size for monitoring
chunk_size_bytes = os.path.getsize(chunk_output)
chunk_size_mb = chunk_size_bytes / (1024 * 1024)
chunk_size_gb = chunk_size_bytes / (1024 * 1024 * 1024)
logger.info(f"Successfully created chunk {i+1}/{num_chunks} (size: {chunk_size_mb:.1f}MB / {chunk_size_gb:.2f}GB)")
# Warn if chunk is approaching size limits (500MB target due to VBR variance)
if chunk_size_mb > 550:
logger.warning(
f"Chunk {i+1} is {chunk_size_mb:.1f}MB ({chunk_size_gb:.2f}GB), exceeding the 500MB target. "
f"After base64 encoding (~37% overhead), this will be ~{chunk_size_mb * 1.37:.1f}MB. "
f"API limit is 1000MB (1GB). If close to limit, consider reducing video quality."
)
except ffmpeg.Error as e:
error_msg = e.stderr.decode() if e.stderr else str(e)