diff --git a/.claude/settings.local.json b/.claude/settings.local.json index d4c97c9..c5dd28e 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -52,7 +52,8 @@ "WebFetch(domain:pypi.org)", "Bash(lsof:*)", "Bash(wkhtmltopdf:*)", - "Bash(/usr/bin/wkhtmltopdf:*)" + "Bash(/usr/bin/wkhtmltopdf:*)", + "Bash(git check-ignore:*)" ], "deny": [] } diff --git a/backend/video_processor.py b/backend/video_processor.py index 45e8aa1..c8f8695 100644 --- a/backend/video_processor.py +++ b/backend/video_processor.py @@ -522,31 +522,75 @@ class VideoProcessor: return result def combine_chunk_responses(self, responses: List[str], prompt: str, - num_chunks: int) -> str: + num_chunks: int, video_name: str = "") -> str: """ - Combine responses from multiple video chunks using simple concatenation. - For single-video chunks split due to duration. + Combine responses from multiple chunks of a single video using AI synthesis. + Uses universal synthesis approach - no type detection. Args: responses: List of response texts from each chunk - prompt: Original prompt used for processing - num_chunks: Total number of chunks processed + prompt: Original user prompt + num_chunks: Total number of chunks + video_name: Name of the video file Returns: Combined response text """ - logger.info(f"Combining {len(responses)} chunk responses using simple concatenation") + logger.info(f"Combining {len(responses)} chunk responses using AI synthesis") - combined = [] - combined.append(f"# Complete Video Analysis\n") - combined.append(f"*This video was processed in {num_chunks} segments.*\n\n") + # Try AI synthesis first + try: + # Prepare chunk responses + summaries_text = "" + for i, response in enumerate(responses, 1): + summaries_text += f"\n--- Segment {i} ---\n{response.strip()}\n" - for i, response in enumerate(responses, 1): - combined.append(f"## Segment {i} of {num_chunks}\n\n") - combined.append(response.strip()) - combined.append(f"\n\n") + # Universal synthesis prompt for single video + synthesis_prompt = f"""You are creating a final unified response by combining multiple segment analyses from one video. - return "".join(combined) +Context: +- One video was split into {num_chunks} segments for processing +- Each segment was analyzed separately +- Below are the responses from each segment + +Original user request: +"{prompt}" + +Segment responses: +{summaries_text} + +Your task: +1. Combine these segment responses into ONE cohesive response that fulfills the user's request above +2. Do not reference segments, chunks, or parts in your output +3. Present as a unified analysis of the complete video + +Provide your unified response: +""" + + logger.info("[Chunk Synthesis] Sending synthesis request to Gemini") + response = self._make_api_request_with_retry( + model=self.synthesis_model, + contents=[{"text": synthesis_prompt}], + context="[Chunk Combination]" + ) + + if response.parts: + synthesized_content = "" + for part in response.parts: + if hasattr(part, 'text'): + synthesized_content += part.text + + if synthesized_content: + logger.info("[Chunk Synthesis] Successfully synthesized chunk responses") + return synthesized_content + + logger.warning("[Chunk Synthesis] Synthesis returned empty, falling back to concatenation") + + except Exception as e: + logger.warning(f"[Chunk Synthesis] Synthesis failed: {e}, falling back to concatenation") + + # Fallback: simple concatenation + return self._fallback_concatenation_single_video(responses, num_chunks, video_name) def _process_single_chunk(self, chunk_info: Tuple[int, str, str, int, str]) -> Tuple[int, Dict[str, Any]]: """ @@ -711,7 +755,8 @@ class VideoProcessor: combined_content = self.combine_chunk_responses( chunk_responses, prompt, - len(chunk_paths) + len(chunk_paths), + video_name=os.path.basename(video_path) ) result["success"] = True @@ -1137,16 +1182,14 @@ Your response will be combined with responses from other segments to create the def _create_chunk_summary_prompt(self, original_prompt: str, chunk_number: int, total_chunks: int, video_name: str) -> str: """ - Create a prompt that asks for a summary suitable for later synthesis. + Create prompt for individual video/chunk in batch processing. + User prompt is the primary instruction with minimal synthesis hint. """ - summary_prompt = f"""You are analyzing segment {chunk_number} of {total_chunks} from video "{video_name}". + prompt = f"""You are analyzing video {chunk_number} of {total_chunks}: "{video_name}". +Your response will be synthesized with responses from other videos to create a unified final result. -Original user request: -{original_prompt} - -Provide a concise summary of this segment. Your summary will be combined with other summaries to create the final result. -""" - return summary_prompt +{original_prompt}""" + return prompt def _synthesize_final_result(self, summaries: List[str], chunk_metadata: List[Dict], original_prompt: str, user_email: str) -> str: @@ -1240,3 +1283,31 @@ Your task: Create ONE cohesive response that fulfills the user's request. Integr result += summary.strip() + "\n\n" return result + + def _fallback_concatenation_single_video(self, responses: List[str], num_chunks: int, + video_name: str = "") -> str: + """ + Fallback method when AI synthesis fails for single video chunks. + Simple concatenation with minimal formatting. + + Args: + responses: List of chunk responses + num_chunks: Number of chunks + video_name: Video filename + + Returns: + Concatenated response + """ + logger.info("Using fallback concatenation for single video") + + result = f"# Complete Video Analysis" + if video_name: + result += f": {video_name}" + result += "\n\n" + result += f"*Note: This video was analyzed in {num_chunks} segments.*\n\n" + + for i, response in enumerate(responses, 1): + result += f"## Segment {i} of {num_chunks}\n\n" + result += response.strip() + "\n\n" + + return result