ford_qc/v1-broken/bnpQC.py
2025-09-03 07:03:21 -05:00

183 lines
6.5 KiB
Python

import json
import os
import zipfile
import shutil
import time
from collections import defaultdict
from datetime import datetime
from boxsdk import OAuth2, Client
import fcntl
import sys
import errno
# Box.com configuration
BOX_APP_TOKEN = 'pm8PQR3AMIeSVy9sW3nZoeEQnbT4GdAt'
BOX_FOLDER_ID = '174746045260'
def file_is_locked(lockfile):
try:
lock_file = open(lockfile, 'w')
fcntl.lockf(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
return False
except IOError as e:
if e.errno == errno.EACCES or e.errno == errno.EAGAIN:
return True
raise
return False
def wait_for_file_to_settle(file_path, timeout=300, check_interval=5):
"""Wait for a file to settle (finish copying) before processing."""
start_time = time.time()
last_size = -1
while time.time() - start_time < timeout:
current_size = os.path.getsize(file_path)
if current_size == last_size:
return True
last_size = current_size
time.sleep(check_interval)
return False
def load_json_data(json_file_path):
with open(json_file_path, "r") as json_file:
return json.load(json_file)
def list_files_recursively(directory):
for root, _, files in os.walk(directory):
for file in files:
yield os.path.join(root, file)
def find_missing_files(data, base_path):
missing_files = defaultdict(lambda: defaultdict(set))
json_files = set()
for item in data["items"]:
for condition_key, condition_value in item["conditions"].items():
for record in item["records"]:
for feature in record["features"]:
for asset in record["assets"]:
filename = os.path.join(base_path, asset['filename'])
json_files.add(filename)
if not os.path.exists(filename):
missing_files[condition_key][condition_value].add(filename)
return missing_files, json_files
def report_missing_files(missing_files, output_file):
for condition_key, condition_values in missing_files.items():
for condition_value, filenames in condition_values.items():
output_file.write(f"\n\n{condition_key}: {condition_value}\n")
output_file.write(f" Number of Missing Files: {len(filenames)}\n\nMissing Files:\n\n")
output_file.write("\n".join(sorted(filenames)))
def find_orphan_files(json_files, directory):
all_files_in_directory = set(list_files_recursively(directory))
return all_files_in_directory - json_files
def report_orphan_files(orphan_files, output_file):
output_file.write("\n\nExtra files in Filesystem NOT in JSON:\n\n")
for file in sorted(orphan_files):
output_file.write(f"{file}\n")
def upload_to_box(client, file_path, folder_id):
folder = client.folder(folder_id).get()
file_name = os.path.basename(file_path)
try:
uploaded_file = folder.upload(file_path, file_name)
print(f"Successfully uploaded {file_name} to Box.com")
return True
except Exception as e:
print(f"Error uploading {file_name} to Box.com: {str(e)}")
return False
def process_zip_file(zip_path, output_dir, error_log_path, box_client):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
zip_name = os.path.splitext(os.path.basename(zip_path))[0]
output_file_path = os.path.join(output_dir, f"{zip_name}_{timestamp}_report.txt")
error_file_path = os.path.join(output_dir, f"{zip_name}_{timestamp}_report.error")
temp_dir = os.path.join(output_dir, f"temp_{zip_name}_{timestamp}")
os.makedirs(temp_dir, exist_ok=True)
try:
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(temp_dir)
json_file_path = None
for root, _, files in os.walk(temp_dir):
if "linkingrecord.json" in files:
json_file_path = os.path.join(root, "linkingrecord.json")
break
if json_file_path is None:
raise FileNotFoundError("linkingrecord.json not found in the zip file")
data = load_json_data(json_file_path)
missing_files, json_files = find_missing_files(data, temp_dir)
with open(output_file_path, 'w') as output_file:
output_file.write(f"Report for {zip_path}\n\n")
report_missing_files(missing_files, output_file)
orphan_files = find_orphan_files(json_files, temp_dir)
report_orphan_files(orphan_files, output_file)
# Upload the report to Box.com
if upload_to_box(box_client, output_file_path, BOX_FOLDER_ID):
print(f"Successfully processed and uploaded report for: {zip_path}")
else:
print(f"Failed to upload report for {zip_path}")
except Exception as e:
error_message = f"Error processing {zip_path}: {str(e)}\n"
with open(error_file_path, 'w') as error_file:
error_file.write(str(e))
# Upload the error file to Box.com
if upload_to_box(box_client, error_file_path, BOX_FOLDER_ID):
print(f"Error file uploaded for {zip_path}")
else:
print(f"Failed to upload error file for {zip_path}")
print(error_message.strip())
finally:
# Clean up: remove the temporary directory and the zip file
shutil.rmtree(temp_dir, ignore_errors=True)
if os.path.exists(zip_path):
os.remove(zip_path)
print(f"Deleted zip file: {zip_path}")
def main():
try:
input_dir = "/home/box-cli/FORD_SCRIPTS/FORD_ASSET_PACK_FINAL_QC/FORD_QC_V2/input_zips"
output_dir = "/home/box-cli/FORD_SCRIPTS/FORD_ASSET_PACK_FINAL_QC/FORD_QC_V2/output_reports"
error_log_path = "/home/box-cli/FORD_SCRIPTS/FORD_ASSET_PACK_FINAL_QC/FORD_QC_V2/error_log.txt"
os.makedirs(output_dir, exist_ok=True)
# Initialize Box client with app token
auth = OAuth2(
client_id='',
client_secret='',
access_token=BOX_APP_TOKEN
)
box_client = Client(auth)
for filename in os.listdir(input_dir):
if filename.endswith(".zip"):
zip_path = os.path.join(input_dir, filename)
if wait_for_file_to_settle(zip_path):
process_zip_file(zip_path, output_dir, error_log_path, box_client)
else:
print(f"File {filename} did not settle within the timeout period. Skipping.")
finally:
print("script finished")
if __name__ == "__main__":
main()