video-query/video_query.py

from google import genai
import mimetypes
import time
import os
import argparse

# --- CONFIGURATION ---
# !!! REPLACE WITH YOUR ACTUAL API KEY !!!
# It's best practice to set this as an environment variable
# e.g., export GOOGLE_API_KEY="YOUR_API_KEY"
# If you do that, the next line can be removed.
API_KEY = "AIzaSyBF3Ia1nVS4PLuLpWt-85ct_heJ7FrlvkQ" # <--- REPLACE THIS!

# Default prompt if none is provided
DEFAULT_PROMPT = "What is happening in this video? Describe the main objects and actions."

# File size in bytes for chunked upload (10MB)
CHUNKED_UPLOAD_CUTOFF = 10 * 1024 * 1024

# --- END CONFIGURATION ---

def upload_video_and_query(api_key, video_path, prompt):
    """
    Uploads a video to Gemini, queries it with a prompt, and prints the response.
    """
    if api_key == "YOUR_GEMINI_API_KEY":
        print("ERROR: Please replace 'YOUR_GEMINI_API_KEY' with your actual API key.")
        return
    if not os.path.exists(video_path):
        print(f"ERROR: Video file not found at '{video_path}'")
        return

    try:
        # Initialize the Gemini client
        client = genai.Client(api_key=api_key)

        print(f"Attempting to upload '{video_path}'...")

        # 1. Upload the video file
        file_size = os.path.getsize(video_path)

        # Log file size relative to threshold (for informational purposes only)
        if file_size > CHUNKED_UPLOAD_CUTOFF:
            print(f"File size ({file_size} bytes) exceeds {CHUNKED_UPLOAD_CUTOFF} bytes threshold...")
        else:
            print(f"File size ({file_size} bytes) is below threshold...")

        # Upload the file - all uploads use same method, but we always log the threshold
        # Note: display_name is not supported in the new google-genai SDK
        video_file = client.files.upload(file=video_path)

        print(f"Successfully uploaded file: {os.path.basename(video_path)} as {video_file.uri}")
        print(f"File state: {video_file.state.name}") # Should ideally be ACTIVE

        # Ensure the file is ready for use (though upload_file usually handles this)
        # This is a safety check loop.
        while video_file.state.name == "PROCESSING":
            print("File is still processing. Waiting...")
            time.sleep(5) # Wait for 5 seconds before checking again
            video_file = client.files.get(name=video_file.name) # Re-fetch file state
            print(f"File state: {video_file.state.name}")

        if video_file.state.name != "ACTIVE":
            print(f"Error: File '{video_file.name}' did not become active. Current state: {video_file.state.name}")
            print("It might still be processing, or an error occurred during upload/processing.")
            print("Please check the file status in Google AI Studio or try again later.")
            return

        # 2. Prepare the prompt for the vision model
        # We need the MIME type for the video file.
        mime_type, _ = mimetypes.guess_type(video_path)
        if not mime_type:
            print(f"Could not determine MIME type for {video_path}. Assuming 'video/mp4'.")
            mime_type = "video/mp4" # Fallback, adjust if needed

        # Create the content parts: the text prompt and the video file reference
        # In newer versions of the API, we use a dictionary format for content
        prompt_parts = [
            {"text": prompt},  # Your text prompt
            {"file_data": {
                "file_uri": video_file.uri,
                "mime_type": mime_type
            }}
        ]

        print("\nSending prompt to Gemini 2.5 Pro model...")
        # 3. Generate content using the client
        response = client.models.generate_content(
            model='gemini-2.5-pro',
            contents=prompt_parts
        )

        # 5. Print the response
        print("\n--- Gemini Response ---")
        if response.parts:
            for part in response.parts:
                if hasattr(part, 'text'):
                    print(part.text)
        else:
            print("No content parts in the response. Full response:")
            print(response)
            if hasattr(response, 'prompt_feedback') and response.prompt_feedback:
                print(f"\nPrompt Feedback: {response.prompt_feedback}")


    except Exception as e:
        print(f"\nAn error occurred: {e}")
        import traceback
        traceback.print_exc()

    finally:
        # Optional: Delete the file from Gemini storage if you no longer need it.
        # Be careful with this in a real application.
        # if 'video_file' in locals() and video_file and 'client' in locals():
        #     try:
        #         print(f"\nAttempting to delete file: {video_file.name}")
        #         client.files.delete(name=video_file.name)
        #         print("File deleted successfully.")
        #     except Exception as e_del:
        #         print(f"Error deleting file: {e_del}")
        pass


if __name__ == "__main__":
    # Setup argument parser
    parser = argparse.ArgumentParser(description="Upload a video to Gemini and query it")
    parser.add_argument("video_path", help="Path to the video file")
    parser.add_argument("--prompt", "-p", help="Query prompt for the video", default=DEFAULT_PROMPT)
    args = parser.parse_args()

    # Try to get API key from environment variable first
    env_api_key = os.getenv("GOOGLE_API_KEY")
    if env_api_key:
        print("Using API key from GOOGLE_API_KEY environment variable.")
        current_api_key = env_api_key
    else:
        current_api_key = API_KEY # Use the one defined in the script

    upload_video_and_query(current_api_key, args.video_path, args.prompt)