video-query/video_query.py

139 lines
5.6 KiB
Python

from google import genai
import mimetypes
import time
import os
import argparse
# --- CONFIGURATION ---
# !!! REPLACE WITH YOUR ACTUAL API KEY !!!
# It's best practice to set this as an environment variable
# e.g., export GOOGLE_API_KEY="YOUR_API_KEY"
# If you do that, the next line can be removed.
API_KEY = "AIzaSyBF3Ia1nVS4PLuLpWt-85ct_heJ7FrlvkQ" # <--- REPLACE THIS!
# Default prompt if none is provided
DEFAULT_PROMPT = "What is happening in this video? Describe the main objects and actions."
# File size in bytes for chunked upload (10MB)
CHUNKED_UPLOAD_CUTOFF = 10 * 1024 * 1024
# --- END CONFIGURATION ---
def upload_video_and_query(api_key, video_path, prompt):
"""
Uploads a video to Gemini, queries it with a prompt, and prints the response.
"""
if api_key == "YOUR_GEMINI_API_KEY":
print("ERROR: Please replace 'YOUR_GEMINI_API_KEY' with your actual API key.")
return
if not os.path.exists(video_path):
print(f"ERROR: Video file not found at '{video_path}'")
return
try:
# Initialize the Gemini client
client = genai.Client(api_key=api_key)
print(f"Attempting to upload '{video_path}'...")
# 1. Upload the video file
file_size = os.path.getsize(video_path)
# Log file size relative to threshold (for informational purposes only)
if file_size > CHUNKED_UPLOAD_CUTOFF:
print(f"File size ({file_size} bytes) exceeds {CHUNKED_UPLOAD_CUTOFF} bytes threshold...")
else:
print(f"File size ({file_size} bytes) is below threshold...")
# Upload the file - all uploads use same method, but we always log the threshold
# Note: display_name is not supported in the new google-genai SDK
video_file = client.files.upload(file=video_path)
print(f"Successfully uploaded file: {os.path.basename(video_path)} as {video_file.uri}")
print(f"File state: {video_file.state.name}") # Should ideally be ACTIVE
# Ensure the file is ready for use (though upload_file usually handles this)
# This is a safety check loop.
while video_file.state.name == "PROCESSING":
print("File is still processing. Waiting...")
time.sleep(5) # Wait for 5 seconds before checking again
video_file = client.files.get(name=video_file.name) # Re-fetch file state
print(f"File state: {video_file.state.name}")
if video_file.state.name != "ACTIVE":
print(f"Error: File '{video_file.name}' did not become active. Current state: {video_file.state.name}")
print("It might still be processing, or an error occurred during upload/processing.")
print("Please check the file status in Google AI Studio or try again later.")
return
# 2. Prepare the prompt for the vision model
# We need the MIME type for the video file.
mime_type, _ = mimetypes.guess_type(video_path)
if not mime_type:
print(f"Could not determine MIME type for {video_path}. Assuming 'video/mp4'.")
mime_type = "video/mp4" # Fallback, adjust if needed
# Create the content parts: the text prompt and the video file reference
# In newer versions of the API, we use a dictionary format for content
prompt_parts = [
{"text": prompt}, # Your text prompt
{"file_data": {
"file_uri": video_file.uri,
"mime_type": mime_type
}}
]
print("\nSending prompt to Gemini 2.5 Pro model...")
# 3. Generate content using the client
response = client.models.generate_content(
model='gemini-2.5-pro',
contents=prompt_parts
)
# 5. Print the response
print("\n--- Gemini Response ---")
if response.parts:
for part in response.parts:
if hasattr(part, 'text'):
print(part.text)
else:
print("No content parts in the response. Full response:")
print(response)
if hasattr(response, 'prompt_feedback') and response.prompt_feedback:
print(f"\nPrompt Feedback: {response.prompt_feedback}")
except Exception as e:
print(f"\nAn error occurred: {e}")
import traceback
traceback.print_exc()
finally:
# Optional: Delete the file from Gemini storage if you no longer need it.
# Be careful with this in a real application.
# if 'video_file' in locals() and video_file and 'client' in locals():
# try:
# print(f"\nAttempting to delete file: {video_file.name}")
# client.files.delete(name=video_file.name)
# print("File deleted successfully.")
# except Exception as e_del:
# print(f"Error deleting file: {e_del}")
pass
if __name__ == "__main__":
# Setup argument parser
parser = argparse.ArgumentParser(description="Upload a video to Gemini and query it")
parser.add_argument("video_path", help="Path to the video file")
parser.add_argument("--prompt", "-p", help="Query prompt for the video", default=DEFAULT_PROMPT)
args = parser.parse_args()
# Try to get API key from environment variable first
env_api_key = os.getenv("GOOGLE_API_KEY")
if env_api_key:
print("Using API key from GOOGLE_API_KEY environment variable.")
current_api_key = env_api_key
else:
current_api_key = API_KEY # Use the one defined in the script
upload_video_and_query(current_api_key, args.video_path, args.prompt)