diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 2c8da8e..fd8899b 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -47,7 +47,10 @@ "Read(//tmp/**)", "WebFetch(domain:docs.cloud.google.com)", "Bash(journalctl:*)", - "Bash(sudo systemctl status:*)" + "Bash(sudo systemctl status:*)", + "WebFetch(domain:github.com)", + "WebFetch(domain:pypi.org)", + "Bash(lsof:*)" ], "deny": [] } diff --git a/backend/requirements.txt b/backend/requirements.txt index 3c5a1fa..fd12556 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -16,7 +16,7 @@ google-api-core==2.25.0rc0 google-api-python-client==2.169.0 google-auth==2.40.0 google-auth-httplib2==0.2.0 -google-genai>=1.44.0 +google-genai>=1.49.0 googleapis-common-protos==1.70.0 grpcio==1.71.0 grpcio-status==1.71.0 diff --git a/backend/video_processor.py b/backend/video_processor.py index d18d074..f22c173 100644 --- a/backend/video_processor.py +++ b/backend/video_processor.py @@ -228,96 +228,71 @@ class VideoProcessor: except Exception as val_err: logger.warning(f"Could not validate video file: {str(val_err)} - proceeding anyway") - # Use different approach based on file size - # Small files (< 10MB): use inline base64 data (faster, no upload wait) - # Large files (>= 10MB): use file upload API (handles larger files) - # Note: Base64 adds ~37% overhead, so 10MB file = ~13.7MB base64 - SIZE_THRESHOLD_MB = 10 - uploaded_file = None + # SIMPLIFIED APPROACH: Use inline base64 for all files + # File Upload API has known issues (KeyError: 'file' in SDK 1.45.0-1.49.0) + # Gemini API REQUEST limit: 1GB (1073741824 bytes) - confirmed by testing + # Base64 adds ~37% overhead, so we validate encoded size - if file_size_mb < SIZE_THRESHOLD_MB: - # Small file: Use base64 encoding for inline data - logger.info(f"File < {SIZE_THRESHOLD_MB}MB, using inline base64 data") - with open(video_path, "rb") as video_file_obj: - video_data = video_file_obj.read() - video_base64 = base64.b64encode(video_data).decode('utf-8') + # Calculate estimated encoded size (base64 overhead ~37%) + BASE64_OVERHEAD = 1.37 + estimated_encoded_mb = file_size_mb * BASE64_OVERHEAD + API_LIMIT_MB = 1000 # 1GB API request limit - logger.info(f"Base64 encoding complete. Size: {len(video_base64)} characters") - - # Create the content parts using inline data - prompt_parts = [ - {"text": prompt}, - {"inline_data": { - "mime_type": mime_type, - "data": video_base64 - }} - ] - else: - # Large file: Use file upload API - logger.info(f"File >= {SIZE_THRESHOLD_MB}MB, using file upload API") - upload_start = time.time() - - uploaded_file = self.client.files.upload( - file=video_path + # Check if encoded size would exceed API limit + if estimated_encoded_mb > API_LIMIT_MB: + error_msg = ( + f"Video chunk is too large: {file_size_mb:.2f}MB raw, " + f"~{estimated_encoded_mb:.1f}MB after base64 encoding. " + f"This exceeds the {API_LIMIT_MB}MB (1GB) API limit. " + f"The video needs to be split into smaller chunks. " + f"Target chunk size: 500MB (accounting for variable bitrate)." ) - logger.info(f"Upload complete in {time.time() - upload_start:.1f}s. File URI: {uploaded_file.uri}") - logger.info(f"Initial file state: {uploaded_file.state}") + logger.error(error_msg) + result["message"] = error_msg + return result - # Wait for file to be processed with timeout - max_wait_time = 300 # 5 minutes timeout - wait_start = time.time() - check_count = 0 + # Warn if approaching API limit (>900MB encoded) + if estimated_encoded_mb > 900: + logger.warning( + f"Chunk is large: {file_size_mb:.1f}MB raw, " + f"~{estimated_encoded_mb:.1f}MB encoded. " + f"Approaching {API_LIMIT_MB}MB API limit. Processing may be slower." + ) - while uploaded_file.state == "PROCESSING": - check_count += 1 - elapsed = time.time() - wait_start + # Use base64 encoding for all files (reliable and works consistently) + logger.info(f"Encoding video as base64 (file size: {file_size_mb:.2f}MB)") + encode_start = time.time() - if elapsed > max_wait_time: - error_msg = f"File processing timeout after {max_wait_time}s. File may be too large or corrupted." - logger.error(error_msg) - # Try to delete the file - try: - self.client.files.delete(name=uploaded_file.name) - except: - pass - result["message"] = error_msg - return result + with open(video_path, "rb") as video_file_obj: + video_data = video_file_obj.read() + video_base64 = base64.b64encode(video_data).decode('utf-8') - logger.info(f"File is still processing (check #{check_count}, {elapsed:.0f}s elapsed), waiting...") - time.sleep(3) + encode_time = time.time() - encode_start + encoded_size_mb = len(video_base64) / (1024 * 1024) + logger.info(f"Base64 encoding complete in {encode_time:.2f}s. Encoded size: {encoded_size_mb:.2f}MB ({len(video_base64)} chars)") - try: - uploaded_file = self.client.files.get(name=uploaded_file.name) - logger.info(f"Updated file state: {uploaded_file.state}") - except Exception as status_err: - logger.error(f"Error checking file status: {str(status_err)}") - # Wait a bit longer and try again - time.sleep(5) - try: - uploaded_file = self.client.files.get(name=uploaded_file.name) - except Exception as retry_err: - error_msg = f"Failed to check file upload status: {str(retry_err)}" - logger.error(error_msg) - result["message"] = error_msg - return result + # Verify actual encoded size is within API limits (1GB) + if encoded_size_mb > API_LIMIT_MB: + error_msg = ( + f"Encoded video size ({encoded_size_mb:.2f}MB) exceeds Gemini API limit of {API_LIMIT_MB}MB (1GB). " + f"Original file: {file_size_mb:.2f}MB. " + f"This chunk needs further splitting. Target: 500MB raw chunks. " + f"Variable bitrate caused larger-than-expected chunk size." + ) + logger.error(error_msg) + result["message"] = error_msg + return result - if uploaded_file.state != "ACTIVE": - error_msg = f"File upload failed. State: {uploaded_file.state}" - logger.error(error_msg) - logger.error("This may indicate the video is corrupted, unsupported format, or contains invalid data") - result["message"] = error_msg - return result + # Create the content parts using inline data + prompt_parts = [ + {"text": prompt}, + {"inline_data": { + "mime_type": mime_type, + "data": video_base64 + }} + ] - logger.info("File is ACTIVE and ready for processing") - - # Create content parts using file reference - prompt_parts = [ - {"text": prompt}, - {"file_data": { - "file_uri": uploaded_file.uri, - "mime_type": mime_type - }} - ] + uploaded_file = None # Not using File Upload API # Rate limiting: Wait to avoid hitting API limits # Free tier: 5 RPM, so minimum 12 seconds between requests diff --git a/backend/video_splitter.py b/backend/video_splitter.py index 848d060..abc8cc9 100644 --- a/backend/video_splitter.py +++ b/backend/video_splitter.py @@ -72,32 +72,102 @@ class VideoSplitter: logger.error(f"Error detecting video duration: {str(e)}") return None - def needs_splitting(self, video_path: str) -> bool: + def needs_splitting(self, video_path: str, max_chunk_size_mb: float = 500) -> bool: """ - Check if a video needs to be split based on its duration. + Check if a video needs to be split based on duration OR file size. + + A video needs splitting if: + 1. Duration > 54 minutes (Gemini API time limit), OR + 2. File size > 500MB (conservative target to handle variable bitrate) + With 30% variance: 500MB × 1.3 = 650MB max + After base64 encoding: 650MB × 1.37 = 891MB (well under 1GB API limit) Args: video_path: Path to the video file + max_chunk_size_mb: Maximum chunk size in MB (default: 500MB) Returns: - True if video duration exceeds chunk duration, False otherwise + True if video needs splitting based on duration or size, False otherwise """ duration = self.get_video_duration(video_path) if duration is None: - logger.warning("Could not determine if video needs splitting") + logger.warning("Could not determine video duration for splitting check") return False - needs_split = duration > self.chunk_duration_seconds + # Check duration + needs_split_duration = duration > self.chunk_duration_seconds + + # Check file size + file_size_bytes = os.path.getsize(video_path) + file_size_mb = file_size_bytes / (1024 * 1024) + needs_split_size = file_size_mb > max_chunk_size_mb + + needs_split = needs_split_duration or needs_split_size + if needs_split: - logger.info(f"Video needs splitting: {duration/60:.2f} min > {self.chunk_duration_minutes} min") + reasons = [] + if needs_split_duration: + reasons.append(f"duration {duration/60:.2f} min > {self.chunk_duration_minutes} min") + if needs_split_size: + reasons.append(f"file size {file_size_mb:.1f}MB > {max_chunk_size_mb}MB") + logger.info(f"Video needs splitting: {' AND '.join(reasons)}") else: - logger.info(f"Video does not need splitting: {duration/60:.2f} min <= {self.chunk_duration_minutes} min") + logger.info(f"Video does not need splitting: duration {duration/60:.2f} min <= {self.chunk_duration_minutes} min, size {file_size_mb:.1f}MB <= {max_chunk_size_mb:.0f}MB") return needs_split + def calculate_optimal_chunk_duration(self, video_path: str, max_chunk_size_mb: float = 500) -> int: + """ + Calculate optimal chunk duration based on file size and video duration + to ensure chunks don't exceed a maximum file size. + + IMPORTANT: Gemini API has a 1GB request payload limit. + Conservative target of 500MB accounts for variable bitrate (VBR). + With 30% VBR variance: 500MB × 1.3 = 650MB max + After base64 encoding: 650MB × 1.37 = 891MB (under 1GB limit) + + Args: + video_path: Path to the video file + max_chunk_size_mb: Maximum desired chunk size in MB (default: 500MB) + + Returns: + Optimal chunk duration in seconds + """ + duration = self.get_video_duration(video_path) + if duration is None: + logger.warning("Could not determine duration, using default chunk duration") + return self.chunk_duration_seconds + + # Get file size + file_size_bytes = os.path.getsize(video_path) + file_size_mb = file_size_bytes / (1024 * 1024) + file_size_gb = file_size_bytes / (1024 * 1024 * 1024) + + # Calculate average bitrate (bytes per second) + avg_bitrate = file_size_bytes / duration + + # Calculate chunk duration that would result in max_chunk_size_mb + max_chunk_size_bytes = max_chunk_size_mb * 1024 * 1024 + optimal_duration = max_chunk_size_bytes / avg_bitrate + + # Use the smaller of optimal duration or default chunk duration + final_duration = min(optimal_duration, self.chunk_duration_seconds) + + # Ensure minimum chunk duration of 5 minutes (300 seconds) + final_duration = max(final_duration, 300) + + logger.info( + f"Calculated optimal chunk duration: {final_duration:.0f}s ({final_duration/60:.1f} min) " + f"based on file size {file_size_mb:.1f}MB ({file_size_gb:.2f}GB) and duration {duration/60:.1f} min. " + f"Target chunk size: {max_chunk_size_mb}MB" + ) + + return int(final_duration) + def split_video(self, video_path: str, output_dir: Optional[str] = None) -> List[str]: """ Split a video into multiple chunks based on the configured chunk duration. + Automatically adjusts chunk duration if file size would result in chunks > 1.2GB. Args: video_path: Path to the video file to split @@ -110,6 +180,9 @@ class VideoSplitter: if duration is None: raise ValueError("Could not determine video duration") + # Calculate optimal chunk duration based on file size + chunk_duration = self.calculate_optimal_chunk_duration(video_path) + # Use temp directory if none specified if output_dir is None: output_dir = tempfile.mkdtemp(prefix="video_chunks_") @@ -118,17 +191,17 @@ class VideoSplitter: os.makedirs(output_dir, exist_ok=True) # Calculate number of chunks needed - num_chunks = int(duration / self.chunk_duration_seconds) + ( - 1 if duration % self.chunk_duration_seconds > 0 else 0 + num_chunks = int(duration / chunk_duration) + ( + 1 if duration % chunk_duration > 0 else 0 ) - logger.info(f"Splitting video into {num_chunks} chunks") + logger.info(f"Splitting video into {num_chunks} chunks (chunk duration: {chunk_duration/60:.1f} min)") chunk_paths = [] video_basename = os.path.splitext(os.path.basename(video_path))[0] video_extension = os.path.splitext(video_path)[1] for i in range(num_chunks): - start_time = i * self.chunk_duration_seconds + start_time = i * chunk_duration chunk_output = os.path.join( output_dir, f"{video_basename}_chunk_{i+1:02d}{video_extension}" @@ -140,7 +213,7 @@ class VideoSplitter: # Split the video using ffmpeg # Using -t to specify duration of this chunk # Using -c copy for fast processing (no re-encoding) - stream = ffmpeg.input(video_path, ss=start_time, t=self.chunk_duration_seconds) + stream = ffmpeg.input(video_path, ss=start_time, t=chunk_duration) stream = ffmpeg.output( stream, chunk_output, @@ -151,7 +224,20 @@ class VideoSplitter: ffmpeg.run(stream, capture_stdout=True, capture_stderr=True, overwrite_output=True) chunk_paths.append(chunk_output) - logger.info(f"Successfully created chunk {i+1}/{num_chunks}") + + # Log chunk size for monitoring + chunk_size_bytes = os.path.getsize(chunk_output) + chunk_size_mb = chunk_size_bytes / (1024 * 1024) + chunk_size_gb = chunk_size_bytes / (1024 * 1024 * 1024) + logger.info(f"Successfully created chunk {i+1}/{num_chunks} (size: {chunk_size_mb:.1f}MB / {chunk_size_gb:.2f}GB)") + + # Warn if chunk is approaching size limits (500MB target due to VBR variance) + if chunk_size_mb > 550: + logger.warning( + f"Chunk {i+1} is {chunk_size_mb:.1f}MB ({chunk_size_gb:.2f}GB), exceeding the 500MB target. " + f"After base64 encoding (~37% overhead), this will be ~{chunk_size_mb * 1.37:.1f}MB. " + f"API limit is 1000MB (1GB). If close to limit, consider reducing video quality." + ) except ffmpeg.Error as e: error_msg = e.stderr.decode() if e.stderr else str(e)