contract-query/backend/app/services/contract_summary_service.py
2025-08-14 15:03:33 -05:00

238 lines
No EOL
17 KiB
Python

import json
import asyncio
from typing import Dict, Any, Optional
from datetime import datetime
from openai import OpenAI
from ..config.settings import settings
from ..models.contract_summary import ContractSummary
class ContractSummaryService:
"""Service for extracting structured contract summaries using OpenAI GPT-4"""
def __init__(self):
self.client = OpenAI(api_key=settings.openai_api_key)
self.max_chars = settings.max_document_chars
async def extract_contract_summary(self, document_text: str, filename: str) -> Dict[str, Any]:
"""
Extract structured contract summary using OpenAI GPT-4
Args:
document_text (str): Full text of the document
filename (str): Name of the file being processed
Returns:
dict: Extraction result with success flag and summary data
"""
try:
print(f"Extracting contract summary from: {filename}")
# Check document length and raise error if too long
if len(document_text) > self.max_chars:
error_msg = f"Document too large: {len(document_text)} characters exceeds maximum of {self.max_chars} characters"
print(f"Error: {error_msg}")
return {
"success": False,
"error": error_msg,
"filename": filename
}
# Get the contract schema prompt
contract_schema = self._get_contract_schema()
# Create the prompt
prompt = f"""
Document filename: {filename}
Document content:
{document_text}
{contract_schema}
"""
# Call OpenAI API
response = await asyncio.to_thread(
self.client.chat.completions.create,
model="gpt-4o",
messages=[
{
"role": "system",
"content": "You are a contract analysis expert. Extract contract information accurately and return only valid JSON."
},
{
"role": "user",
"content": prompt
}
],
max_tokens=4000,
temperature=0.1
)
# Extract the response
content = response.choices[0].message.content.strip()
# Try to parse the JSON
try:
summary_json = json.loads(content)
print(f"Successfully extracted summary for {filename}")
return {
"success": True,
"summary": summary_json,
"filename": filename
}
except json.JSONDecodeError as e:
print(f"JSON parsing error: {e}")
print(f"Raw response length: {len(content)} characters")
# Try to extract JSON from the response if it's wrapped in text
import re
json_match = re.search(r'\{.*\}', content, re.DOTALL)
if json_match:
try:
summary_json = json.loads(json_match.group())
print(f"Successfully extracted JSON from wrapped response for {filename}")
return {
"success": True,
"summary": summary_json,
"filename": filename
}
except json.JSONDecodeError:
pass
return {
"success": False,
"error": f"Failed to parse JSON response: {e}. Response length: {len(content)} characters",
"filename": filename
}
except Exception as e:
print(f"Error calling OpenAI API: {e}")
return {
"success": False,
"error": f"OpenAI API error: {str(e)}",
"filename": filename
}
def validate_contract_summary(self, summary_json: Dict[str, Any]) -> ContractSummary:
"""
Validate and convert raw JSON to structured ContractSummary model
Args:
summary_json (dict): Raw JSON from OpenAI
Returns:
ContractSummary: Validated summary object
"""
try:
# Convert any None values to "N/A (Not found in Doc)" for consistency
def convert_none_values(obj):
if isinstance(obj, dict):
return {k: convert_none_values(v) for k, v in obj.items()}
elif obj is None or obj == "":
return "N/A (Not found in Doc)"
return obj
cleaned_summary = convert_none_values(summary_json)
# Validate using Pydantic model
contract_summary = ContractSummary(**cleaned_summary)
return contract_summary
except Exception as e:
print(f"Error validating contract summary: {e}")
# Return empty summary with error indication
return ContractSummary(
contract_type="Error in processing",
overview_purpose=f"Error validating summary: {str(e)}"
)
def _get_contract_schema(self) -> str:
"""Get the contract analysis schema prompt"""
return """
Please extract the following information from this contract document and return it in JSON format.
If any information is not found in the document, use "N/A (Not found in Doc)" as the value.
Required fields:
{
"contract_type": "Type of contract (MSA, SOW, Supplier Contract, Vendor Contract, Licensing Agreement, NDA, etc.)",
"overview_purpose": "Brief overview and purpose of the contract",
"relevant_account": "Client account name or relevant account",
"in_studio_name": "In-Studio Name (e.g., The Mix)",
"client_sender_name": "Client/Sender Name",
"client_sender_address": "Client/Sender Address",
"agency_name": "Agency Name (OLM, IIG, AYS, BTG, etc.)",
"agency_address": "Agency Address",
"dates_signed": "Date(s) when contract was signed",
"terms": "Contract terms/duration",
"date_expired": "Contract expiration date",
"pricing_payment_terms": "Pricing and payment terms overview",
"scope_of_work": {
"summary_tasks_deliverables": "Summary of tasks and deliverables",
"key_dates": "Key dates and milestones",
"key_kpis": "Key KPIs or performance indicators"
},
"terms_and_termination": {
"duration": "Look for contract duration, term length, effective period, validity period, or how long this agreement remains in force. Search for phrases like 'term of', 'duration', 'effective for', 'valid until', 'expires on', or specific time periods",
"termination_conditions": "Find termination clauses, conditions under which either party can end the agreement, notice periods required for termination, breach conditions, or circumstances that allow contract cancellation. Look for sections titled 'Termination', 'End of Agreement', or phrases like 'may be terminated', 'notice of termination'",
"penalties": "Search for financial penalties, liquidated damages, fees, or costs associated with early termination, breach of contract, or cancellation. Look for monetary amounts, penalty clauses, or consequences of termination"
},
"payment_terms": {
"payment_method": "Search for how payments are processed - check for bank transfer, wire transfer, check, ACH, credit card, electronic payment, or specific payment platforms. Look for banking details, payment processing instructions, or remittance information",
"payment_schedule": "Find when payments are due - look for payment frequency (monthly, quarterly, annually), due dates, billing cycles, invoice terms, or payment timing. Search for phrases like 'payable within', 'due on', 'payment schedule', or specific dates",
"pricing_details": "Look for detailed pricing structure including rates, fees, hourly rates, project costs, retainer amounts, or cost breakdowns. Search for currency amounts, pricing tables, rate cards, or cost schedules",
"mark_ups": "Find any markup percentages, additional fees, surcharges, or percentage-based charges applied to costs. Look for percentage symbols, markup clauses, or additional charges",
"payment_schedules": "Look for detailed payment timing including milestone payments, installment schedules, advance payments, or progress-based payment structures. Search for payment phases or staged payment plans",
"late_payment_penalties": "Search for late payment fees, interest charges, penalty rates, or consequences of delayed payment. Look for percentage rates for late fees, daily charges, or penalty clauses",
"discounts": "Find any available discounts, early payment incentives, volume discounts, or reduced rates for specific conditions. Look for percentage discounts or preferential pricing terms"
},
"liability_indemnification": {
"responsibilities_liabilities": "Search for sections defining each party's responsibilities, obligations, duties, or liabilities. Look for phrases like 'responsible for', 'liable for', 'obligations include', 'duties of', or specific responsibility assignments",
"damages_losses": "Find who bears responsibility for damages, losses, claims, or financial harm. Look for liability caps, exclusions, limitations of liability, or damage allocation clauses. Search for monetary limits or damage responsibility",
"indemnification_clauses": "Look for indemnification provisions, hold harmless clauses, or protection from third-party claims. Search for phrases like 'indemnify', 'hold harmless', 'defend against', or protection from lawsuits and claims"
},
"confidentiality": {
"scope": "Look for what information is considered confidential - proprietary data, trade secrets, business information, client data, technical information, or specific categories of protected information. Search for definitions of confidential information",
"duration": "Find how long confidentiality obligations last - look for time periods, survival clauses, or duration of non-disclosure obligations. Search for phrases like 'perpetual', 'for X years', 'survives termination', or confidentiality periods",
"exceptions": "Search for exceptions to confidentiality - publicly available information, independently developed information, or legally required disclosures. Look for carve-outs or situations where confidentiality doesn't apply",
"disclosures_by_law": "Find circumstances where confidential information may be disclosed due to legal requirements, court orders, regulatory demands, or government requests. Look for legal disclosure provisions",
"breach_consequences": "Search for penalties, damages, or consequences for violating confidentiality obligations. Look for monetary damages, injunctive relief, or specific penalties for breach of non-disclosure"
},
"intellectual_property": {
"licensor": "Find who is granting intellectual property rights - the party providing licenses, copyrights, trademarks, or other IP rights. Look for the entity or person licensing their intellectual property",
"licensee": "Identify who is receiving intellectual property rights - the party getting permission to use copyrights, trademarks, patents, or other IP. Look for the recipient of IP licensing",
"terms_renewal": "Search for intellectual property renewal terms, license extension conditions, or how IP rights can be renewed or continued. Look for renewal clauses, automatic extensions, or renewal procedures",
"pricing": "Find costs associated with intellectual property use - licensing fees, royalties, IP-related payments, or costs for using copyrighted or trademarked materials. Look for IP pricing structures",
"definitions": "Look for definitions of intellectual property terms, what constitutes IP in this agreement, or specific IP categories covered. Search for IP definitions and scope of protected materials",
"scope": "Find what intellectual property rights are included - copyrights, trademarks, patents, trade secrets, proprietary information, or specific IP assets covered by the agreement",
"duration": "Search for how long intellectual property rights last - license duration, IP protection periods, or time limits on IP usage. Look for IP term lengths or expiration dates",
"territory": "Find geographical limitations on IP rights - specific countries, regions, or territories where IP rights apply. Look for geographic restrictions or worldwide rights",
"use_ownership_rights": "Search for permitted uses of intellectual property, ownership transfers, usage restrictions, or what can be done with the licensed IP. Look for usage rights and ownership provisions"
},
"dispute_resolution": {
"methods": "Search for how disputes will be resolved - negotiation, mediation, arbitration, litigation, or alternative dispute resolution methods. Look for dispute resolution procedures or escalation processes",
"mediation_options": "Find if mediation is required or available for resolving disputes - look for mediation clauses, mediator selection, or mediation procedures. Search for mediation requirements or options",
"arbitration_options": "Look for arbitration clauses, arbitration requirements, arbitrator selection procedures, or binding arbitration provisions. Search for arbitration rules or arbitration organization references",
"litigation_options": "Find court jurisdiction, governing law, or litigation procedures if disputes go to court. Look for jurisdiction clauses, court selection, or legal venue specifications"
},
"warranties_representations": {
"service_standards": "Look for quality standards, performance expectations, service level agreements, or specific standards that services must meet. Search for performance metrics, quality requirements, or service benchmarks",
"service_assurances": "Find warranties, guarantees, representations, or assurances about service quality, performance, or outcomes. Look for warranty clauses, service guarantees, or performance assurances"
},
"compliance_with_laws": {
"relevant_laws": "Search for specific laws, regulations, statutes, or legal requirements that parties must comply with. Look for regulatory compliance, legal standards, or specific legislation mentioned in the contract",
"owner_obligations": "Find legal obligations, compliance responsibilities, or regulatory duties that each party must fulfill. Look for compliance requirements, legal duties, or regulatory obligations"
},
"amendments_versions": {
"change_management": "Look for how contract changes are managed - amendment procedures, modification processes, or change control mechanisms. Search for how the contract can be updated, modified, or amended",
"written_consent": "Find requirements for written consent, signatures, or formal approval needed for contract changes. Look for amendment approval processes or consent requirements for modifications"
},
"assignment_subcontracting": {
"delegation_assignment": "Search for rules about assigning contract rights, delegating obligations, or transferring responsibilities to third parties. Look for assignment clauses, subcontracting permissions, or restrictions on transferring contract duties. Find phrases like 'may not assign', 'assignment requires consent', or subcontracting limitations"
}
}
IMPORTANT: Return ONLY valid JSON. Do not include any explanatory text before or after the JSON.
"""
# Global service instance
contract_summary_service = ContractSummaryService()