update

2025-11-13 18:45:59 +05:30 · 2025-11-13 18:45:59 +05:30 · 56fe6437b6
commit 56fe6437b6
parent f3186276c4
4 changed files with 159 additions and 95 deletions
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@ -47,7 +47,10 @@
      "Read(//tmp/**)",
      "WebFetch(domain:docs.cloud.google.com)",
      "Bash(journalctl:*)",
-      "Bash(sudo systemctl status:*)"
+      "Bash(sudo systemctl status:*)",
+      "WebFetch(domain:github.com)",
+      "WebFetch(domain:pypi.org)",
+      "Bash(lsof:*)"
    ],
    "deny": []
  }
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -16,7 +16,7 @@ google-api-core==2.25.0rc0
 google-api-python-client==2.169.0
 google-auth==2.40.0
 google-auth-httplib2==0.2.0
-google-genai>=1.44.0
+google-genai>=1.49.0
 googleapis-common-protos==1.70.0
 grpcio==1.71.0
 grpcio-status==1.71.0
--- a/backend/video_processor.py
+++ b/backend/video_processor.py
@ -228,96 +228,71 @@ class VideoProcessor:
            except Exception as val_err:
                logger.warning(f"Could not validate video file: {str(val_err)} - proceeding anyway")

-            # Use different approach based on file size
-            # Small files (< 10MB): use inline base64 data (faster, no upload wait)
-            # Large files (>= 10MB): use file upload API (handles larger files)
-            # Note: Base64 adds ~37% overhead, so 10MB file = ~13.7MB base64
-            SIZE_THRESHOLD_MB = 10
-            uploaded_file = None
+            # SIMPLIFIED APPROACH: Use inline base64 for all files
+            # File Upload API has known issues (KeyError: 'file' in SDK 1.45.0-1.49.0)
+            # Gemini API REQUEST limit: 1GB (1073741824 bytes) - confirmed by testing
+            # Base64 adds ~37% overhead, so we validate encoded size

-            if file_size_mb < SIZE_THRESHOLD_MB:
-                # Small file: Use base64 encoding for inline data
-                logger.info(f"File < {SIZE_THRESHOLD_MB}MB, using inline base64 data")
-                with open(video_path, "rb") as video_file_obj:
-                    video_data = video_file_obj.read()
-                    video_base64 = base64.b64encode(video_data).decode('utf-8')
+            # Calculate estimated encoded size (base64 overhead ~37%)
+            BASE64_OVERHEAD = 1.37
+            estimated_encoded_mb = file_size_mb * BASE64_OVERHEAD
+            API_LIMIT_MB = 1000  # 1GB API request limit

-                logger.info(f"Base64 encoding complete. Size: {len(video_base64)} characters")
-
-                # Create the content parts using inline data
-                prompt_parts = [
-                    {"text": prompt},
-                    {"inline_data": {
-                        "mime_type": mime_type,
-                        "data": video_base64
-                    }}
-                ]
-            else:
-                # Large file: Use file upload API
-                logger.info(f"File >= {SIZE_THRESHOLD_MB}MB, using file upload API")
-                upload_start = time.time()
-
-                uploaded_file = self.client.files.upload(
-                    file=video_path
+            # Check if encoded size would exceed API limit
+            if estimated_encoded_mb > API_LIMIT_MB:
+                error_msg = (
+                    f"Video chunk is too large: {file_size_mb:.2f}MB raw, "
+                    f"~{estimated_encoded_mb:.1f}MB after base64 encoding. "
+                    f"This exceeds the {API_LIMIT_MB}MB (1GB) API limit. "
+                    f"The video needs to be split into smaller chunks. "
+                    f"Target chunk size: 500MB (accounting for variable bitrate)."
                )
-                logger.info(f"Upload complete in {time.time() - upload_start:.1f}s. File URI: {uploaded_file.uri}")
-                logger.info(f"Initial file state: {uploaded_file.state}")
+                logger.error(error_msg)
+                result["message"] = error_msg
+                return result

-                # Wait for file to be processed with timeout
-                max_wait_time = 300  # 5 minutes timeout
-                wait_start = time.time()
-                check_count = 0
+            # Warn if approaching API limit (>900MB encoded)
+            if estimated_encoded_mb > 900:
+                logger.warning(
+                    f"Chunk is large: {file_size_mb:.1f}MB raw, "
+                    f"~{estimated_encoded_mb:.1f}MB encoded. "
+                    f"Approaching {API_LIMIT_MB}MB API limit. Processing may be slower."
+                )

-                while uploaded_file.state == "PROCESSING":
-                    check_count += 1
-                    elapsed = time.time() - wait_start
+            # Use base64 encoding for all files (reliable and works consistently)
+            logger.info(f"Encoding video as base64 (file size: {file_size_mb:.2f}MB)")
+            encode_start = time.time()

-                    if elapsed > max_wait_time:
-                        error_msg = f"File processing timeout after {max_wait_time}s. File may be too large or corrupted."
-                        logger.error(error_msg)
-                        # Try to delete the file
-                        try:
-                            self.client.files.delete(name=uploaded_file.name)
-                        except:
-                            pass
-                        result["message"] = error_msg
-                        return result
+            with open(video_path, "rb") as video_file_obj:
+                video_data = video_file_obj.read()
+                video_base64 = base64.b64encode(video_data).decode('utf-8')

-                    logger.info(f"File is still processing (check #{check_count}, {elapsed:.0f}s elapsed), waiting...")
-                    time.sleep(3)
+            encode_time = time.time() - encode_start
+            encoded_size_mb = len(video_base64) / (1024 * 1024)
+            logger.info(f"Base64 encoding complete in {encode_time:.2f}s. Encoded size: {encoded_size_mb:.2f}MB ({len(video_base64)} chars)")

-                    try:
-                        uploaded_file = self.client.files.get(name=uploaded_file.name)
-                        logger.info(f"Updated file state: {uploaded_file.state}")
-                    except Exception as status_err:
-                        logger.error(f"Error checking file status: {str(status_err)}")
-                        # Wait a bit longer and try again
-                        time.sleep(5)
-                        try:
-                            uploaded_file = self.client.files.get(name=uploaded_file.name)
-                        except Exception as retry_err:
-                            error_msg = f"Failed to check file upload status: {str(retry_err)}"
-                            logger.error(error_msg)
-                            result["message"] = error_msg
-                            return result
+            # Verify actual encoded size is within API limits (1GB)
+            if encoded_size_mb > API_LIMIT_MB:
+                error_msg = (
+                    f"Encoded video size ({encoded_size_mb:.2f}MB) exceeds Gemini API limit of {API_LIMIT_MB}MB (1GB). "
+                    f"Original file: {file_size_mb:.2f}MB. "
+                    f"This chunk needs further splitting. Target: 500MB raw chunks. "
+                    f"Variable bitrate caused larger-than-expected chunk size."
+                )
+                logger.error(error_msg)
+                result["message"] = error_msg
+                return result

-                if uploaded_file.state != "ACTIVE":
-                    error_msg = f"File upload failed. State: {uploaded_file.state}"
-                    logger.error(error_msg)
-                    logger.error("This may indicate the video is corrupted, unsupported format, or contains invalid data")
-                    result["message"] = error_msg
-                    return result
+            # Create the content parts using inline data
+            prompt_parts = [
+                {"text": prompt},
+                {"inline_data": {
+                    "mime_type": mime_type,
+                    "data": video_base64
+                }}
+            ]

-                logger.info("File is ACTIVE and ready for processing")
-
-                # Create content parts using file reference
-                prompt_parts = [
-                    {"text": prompt},
-                    {"file_data": {
-                        "file_uri": uploaded_file.uri,
-                        "mime_type": mime_type
-                    }}
-                ]
+            uploaded_file = None  # Not using File Upload API

            # Rate limiting: Wait to avoid hitting API limits
            # Free tier: 5 RPM, so minimum 12 seconds between requests
--- a/backend/video_splitter.py
+++ b/backend/video_splitter.py
@ -72,32 +72,102 @@ class VideoSplitter:
            logger.error(f"Error detecting video duration: {str(e)}")
            return None

-    def needs_splitting(self, video_path: str) -> bool:
+    def needs_splitting(self, video_path: str, max_chunk_size_mb: float = 500) -> bool:
        """
-        Check if a video needs to be split based on its duration.
+        Check if a video needs to be split based on duration OR file size.
+
+        A video needs splitting if:
+        1. Duration > 54 minutes (Gemini API time limit), OR
+        2. File size > 500MB (conservative target to handle variable bitrate)
+           With 30% variance: 500MB × 1.3 = 650MB max
+           After base64 encoding: 650MB × 1.37 = 891MB (well under 1GB API limit)

        Args:
            video_path: Path to the video file
+            max_chunk_size_mb: Maximum chunk size in MB (default: 500MB)

        Returns:
-            True if video duration exceeds chunk duration, False otherwise
+            True if video needs splitting based on duration or size, False otherwise
        """
        duration = self.get_video_duration(video_path)
        if duration is None:
-            logger.warning("Could not determine if video needs splitting")
+            logger.warning("Could not determine video duration for splitting check")
            return False

-        needs_split = duration > self.chunk_duration_seconds
+        # Check duration
+        needs_split_duration = duration > self.chunk_duration_seconds
+
+        # Check file size
+        file_size_bytes = os.path.getsize(video_path)
+        file_size_mb = file_size_bytes / (1024 * 1024)
+        needs_split_size = file_size_mb > max_chunk_size_mb
+
+        needs_split = needs_split_duration or needs_split_size
+
        if needs_split:
-            logger.info(f"Video needs splitting: {duration/60:.2f} min > {self.chunk_duration_minutes} min")
+            reasons = []
+            if needs_split_duration:
+                reasons.append(f"duration {duration/60:.2f} min > {self.chunk_duration_minutes} min")
+            if needs_split_size:
+                reasons.append(f"file size {file_size_mb:.1f}MB > {max_chunk_size_mb}MB")
+            logger.info(f"Video needs splitting: {' AND '.join(reasons)}")
        else:
-            logger.info(f"Video does not need splitting: {duration/60:.2f} min <= {self.chunk_duration_minutes} min")
+            logger.info(f"Video does not need splitting: duration {duration/60:.2f} min <= {self.chunk_duration_minutes} min, size {file_size_mb:.1f}MB <= {max_chunk_size_mb:.0f}MB")

        return needs_split

+    def calculate_optimal_chunk_duration(self, video_path: str, max_chunk_size_mb: float = 500) -> int:
+        """
+        Calculate optimal chunk duration based on file size and video duration
+        to ensure chunks don't exceed a maximum file size.
+
+        IMPORTANT: Gemini API has a 1GB request payload limit.
+        Conservative target of 500MB accounts for variable bitrate (VBR).
+        With 30% VBR variance: 500MB × 1.3 = 650MB max
+        After base64 encoding: 650MB × 1.37 = 891MB (under 1GB limit)
+
+        Args:
+            video_path: Path to the video file
+            max_chunk_size_mb: Maximum desired chunk size in MB (default: 500MB)
+
+        Returns:
+            Optimal chunk duration in seconds
+        """
+        duration = self.get_video_duration(video_path)
+        if duration is None:
+            logger.warning("Could not determine duration, using default chunk duration")
+            return self.chunk_duration_seconds
+
+        # Get file size
+        file_size_bytes = os.path.getsize(video_path)
+        file_size_mb = file_size_bytes / (1024 * 1024)
+        file_size_gb = file_size_bytes / (1024 * 1024 * 1024)
+
+        # Calculate average bitrate (bytes per second)
+        avg_bitrate = file_size_bytes / duration
+
+        # Calculate chunk duration that would result in max_chunk_size_mb
+        max_chunk_size_bytes = max_chunk_size_mb * 1024 * 1024
+        optimal_duration = max_chunk_size_bytes / avg_bitrate
+
+        # Use the smaller of optimal duration or default chunk duration
+        final_duration = min(optimal_duration, self.chunk_duration_seconds)
+
+        # Ensure minimum chunk duration of 5 minutes (300 seconds)
+        final_duration = max(final_duration, 300)
+
+        logger.info(
+            f"Calculated optimal chunk duration: {final_duration:.0f}s ({final_duration/60:.1f} min) "
+            f"based on file size {file_size_mb:.1f}MB ({file_size_gb:.2f}GB) and duration {duration/60:.1f} min. "
+            f"Target chunk size: {max_chunk_size_mb}MB"
+        )
+
+        return int(final_duration)
+
    def split_video(self, video_path: str, output_dir: Optional[str] = None) -> List[str]:
        """
        Split a video into multiple chunks based on the configured chunk duration.
+        Automatically adjusts chunk duration if file size would result in chunks > 1.2GB.

        Args:
            video_path: Path to the video file to split
@ -110,6 +180,9 @@ class VideoSplitter:
        if duration is None:
            raise ValueError("Could not determine video duration")

+        # Calculate optimal chunk duration based on file size
+        chunk_duration = self.calculate_optimal_chunk_duration(video_path)
+
        # Use temp directory if none specified
        if output_dir is None:
            output_dir = tempfile.mkdtemp(prefix="video_chunks_")
@ -118,17 +191,17 @@ class VideoSplitter:
            os.makedirs(output_dir, exist_ok=True)

        # Calculate number of chunks needed
-        num_chunks = int(duration / self.chunk_duration_seconds) + (
-            1 if duration % self.chunk_duration_seconds > 0 else 0
+        num_chunks = int(duration / chunk_duration) + (
+            1 if duration % chunk_duration > 0 else 0
        )
-        logger.info(f"Splitting video into {num_chunks} chunks")
+        logger.info(f"Splitting video into {num_chunks} chunks (chunk duration: {chunk_duration/60:.1f} min)")

        chunk_paths = []
        video_basename = os.path.splitext(os.path.basename(video_path))[0]
        video_extension = os.path.splitext(video_path)[1]

        for i in range(num_chunks):
-            start_time = i * self.chunk_duration_seconds
+            start_time = i * chunk_duration
            chunk_output = os.path.join(
                output_dir,
                f"{video_basename}_chunk_{i+1:02d}{video_extension}"
@ -140,7 +213,7 @@ class VideoSplitter:
                # Split the video using ffmpeg
                # Using -t to specify duration of this chunk
                # Using -c copy for fast processing (no re-encoding)
-                stream = ffmpeg.input(video_path, ss=start_time, t=self.chunk_duration_seconds)
+                stream = ffmpeg.input(video_path, ss=start_time, t=chunk_duration)
                stream = ffmpeg.output(
                    stream,
                    chunk_output,
@ -151,7 +224,20 @@ class VideoSplitter:
                ffmpeg.run(stream, capture_stdout=True, capture_stderr=True, overwrite_output=True)

                chunk_paths.append(chunk_output)
-                logger.info(f"Successfully created chunk {i+1}/{num_chunks}")
+
+                # Log chunk size for monitoring
+                chunk_size_bytes = os.path.getsize(chunk_output)
+                chunk_size_mb = chunk_size_bytes / (1024 * 1024)
+                chunk_size_gb = chunk_size_bytes / (1024 * 1024 * 1024)
+                logger.info(f"Successfully created chunk {i+1}/{num_chunks} (size: {chunk_size_mb:.1f}MB / {chunk_size_gb:.2f}GB)")
+
+                # Warn if chunk is approaching size limits (500MB target due to VBR variance)
+                if chunk_size_mb > 550:
+                    logger.warning(
+                        f"Chunk {i+1} is {chunk_size_mb:.1f}MB ({chunk_size_gb:.2f}GB), exceeding the 500MB target. "
+                        f"After base64 encoding (~37% overhead), this will be ~{chunk_size_mb * 1.37:.1f}MB. "
+                        f"API limit is 1000MB (1GB). If close to limit, consider reducing video quality."
+                    )

            except ffmpeg.Error as e:
                error_msg = e.stderr.decode() if e.stderr else str(e)