139 lines
5.6 KiB
Python
139 lines
5.6 KiB
Python
from google import genai
|
|
import mimetypes
|
|
import time
|
|
import os
|
|
import argparse
|
|
|
|
# --- CONFIGURATION ---
|
|
# !!! REPLACE WITH YOUR ACTUAL API KEY !!!
|
|
# It's best practice to set this as an environment variable
|
|
# e.g., export GOOGLE_API_KEY="YOUR_API_KEY"
|
|
# If you do that, the next line can be removed.
|
|
API_KEY = "AIzaSyBF3Ia1nVS4PLuLpWt-85ct_heJ7FrlvkQ" # <--- REPLACE THIS!
|
|
|
|
# Default prompt if none is provided
|
|
DEFAULT_PROMPT = "What is happening in this video? Describe the main objects and actions."
|
|
|
|
# File size in bytes for chunked upload (10MB)
|
|
CHUNKED_UPLOAD_CUTOFF = 10 * 1024 * 1024
|
|
|
|
# --- END CONFIGURATION ---
|
|
|
|
def upload_video_and_query(api_key, video_path, prompt):
|
|
"""
|
|
Uploads a video to Gemini, queries it with a prompt, and prints the response.
|
|
"""
|
|
if api_key == "YOUR_GEMINI_API_KEY":
|
|
print("ERROR: Please replace 'YOUR_GEMINI_API_KEY' with your actual API key.")
|
|
return
|
|
if not os.path.exists(video_path):
|
|
print(f"ERROR: Video file not found at '{video_path}'")
|
|
return
|
|
|
|
try:
|
|
# Initialize the Gemini client
|
|
client = genai.Client(api_key=api_key)
|
|
|
|
print(f"Attempting to upload '{video_path}'...")
|
|
|
|
# 1. Upload the video file
|
|
file_size = os.path.getsize(video_path)
|
|
|
|
# Log file size relative to threshold (for informational purposes only)
|
|
if file_size > CHUNKED_UPLOAD_CUTOFF:
|
|
print(f"File size ({file_size} bytes) exceeds {CHUNKED_UPLOAD_CUTOFF} bytes threshold...")
|
|
else:
|
|
print(f"File size ({file_size} bytes) is below threshold...")
|
|
|
|
# Upload the file - all uploads use same method, but we always log the threshold
|
|
# Note: display_name is not supported in the new google-genai SDK
|
|
video_file = client.files.upload(file=video_path)
|
|
|
|
print(f"Successfully uploaded file: {os.path.basename(video_path)} as {video_file.uri}")
|
|
print(f"File state: {video_file.state.name}") # Should ideally be ACTIVE
|
|
|
|
# Ensure the file is ready for use (though upload_file usually handles this)
|
|
# This is a safety check loop.
|
|
while video_file.state.name == "PROCESSING":
|
|
print("File is still processing. Waiting...")
|
|
time.sleep(5) # Wait for 5 seconds before checking again
|
|
video_file = client.files.get(name=video_file.name) # Re-fetch file state
|
|
print(f"File state: {video_file.state.name}")
|
|
|
|
if video_file.state.name != "ACTIVE":
|
|
print(f"Error: File '{video_file.name}' did not become active. Current state: {video_file.state.name}")
|
|
print("It might still be processing, or an error occurred during upload/processing.")
|
|
print("Please check the file status in Google AI Studio or try again later.")
|
|
return
|
|
|
|
# 2. Prepare the prompt for the vision model
|
|
# We need the MIME type for the video file.
|
|
mime_type, _ = mimetypes.guess_type(video_path)
|
|
if not mime_type:
|
|
print(f"Could not determine MIME type for {video_path}. Assuming 'video/mp4'.")
|
|
mime_type = "video/mp4" # Fallback, adjust if needed
|
|
|
|
# Create the content parts: the text prompt and the video file reference
|
|
# In newer versions of the API, we use a dictionary format for content
|
|
prompt_parts = [
|
|
{"text": prompt}, # Your text prompt
|
|
{"file_data": {
|
|
"file_uri": video_file.uri,
|
|
"mime_type": mime_type
|
|
}}
|
|
]
|
|
|
|
print("\nSending prompt to Gemini 2.5 Pro model...")
|
|
# 3. Generate content using the client
|
|
response = client.models.generate_content(
|
|
model='gemini-2.5-pro',
|
|
contents=prompt_parts
|
|
)
|
|
|
|
# 5. Print the response
|
|
print("\n--- Gemini Response ---")
|
|
if response.parts:
|
|
for part in response.parts:
|
|
if hasattr(part, 'text'):
|
|
print(part.text)
|
|
else:
|
|
print("No content parts in the response. Full response:")
|
|
print(response)
|
|
if hasattr(response, 'prompt_feedback') and response.prompt_feedback:
|
|
print(f"\nPrompt Feedback: {response.prompt_feedback}")
|
|
|
|
|
|
except Exception as e:
|
|
print(f"\nAn error occurred: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
finally:
|
|
# Optional: Delete the file from Gemini storage if you no longer need it.
|
|
# Be careful with this in a real application.
|
|
# if 'video_file' in locals() and video_file and 'client' in locals():
|
|
# try:
|
|
# print(f"\nAttempting to delete file: {video_file.name}")
|
|
# client.files.delete(name=video_file.name)
|
|
# print("File deleted successfully.")
|
|
# except Exception as e_del:
|
|
# print(f"Error deleting file: {e_del}")
|
|
pass
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Setup argument parser
|
|
parser = argparse.ArgumentParser(description="Upload a video to Gemini and query it")
|
|
parser.add_argument("video_path", help="Path to the video file")
|
|
parser.add_argument("--prompt", "-p", help="Query prompt for the video", default=DEFAULT_PROMPT)
|
|
args = parser.parse_args()
|
|
|
|
# Try to get API key from environment variable first
|
|
env_api_key = os.getenv("GOOGLE_API_KEY")
|
|
if env_api_key:
|
|
print("Using API key from GOOGLE_API_KEY environment variable.")
|
|
current_api_key = env_api_key
|
|
else:
|
|
current_api_key = API_KEY # Use the one defined in the script
|
|
|
|
upload_video_and_query(current_api_key, args.video_path, args.prompt)
|