183 lines
6.5 KiB
Python
183 lines
6.5 KiB
Python
import json
|
|
import os
|
|
import zipfile
|
|
import shutil
|
|
import time
|
|
from collections import defaultdict
|
|
from datetime import datetime
|
|
from boxsdk import OAuth2, Client
|
|
import fcntl
|
|
import sys
|
|
import errno
|
|
|
|
|
|
# Box.com configuration
|
|
BOX_APP_TOKEN = 'pm8PQR3AMIeSVy9sW3nZoeEQnbT4GdAt'
|
|
BOX_FOLDER_ID = '174746045260'
|
|
|
|
def file_is_locked(lockfile):
|
|
try:
|
|
lock_file = open(lockfile, 'w')
|
|
fcntl.lockf(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
return False
|
|
except IOError as e:
|
|
if e.errno == errno.EACCES or e.errno == errno.EAGAIN:
|
|
return True
|
|
raise
|
|
return False
|
|
|
|
|
|
def wait_for_file_to_settle(file_path, timeout=300, check_interval=5):
|
|
"""Wait for a file to settle (finish copying) before processing."""
|
|
start_time = time.time()
|
|
last_size = -1
|
|
while time.time() - start_time < timeout:
|
|
current_size = os.path.getsize(file_path)
|
|
if current_size == last_size:
|
|
return True
|
|
last_size = current_size
|
|
time.sleep(check_interval)
|
|
return False
|
|
|
|
|
|
def load_json_data(json_file_path):
|
|
with open(json_file_path, "r") as json_file:
|
|
return json.load(json_file)
|
|
|
|
def list_files_recursively(directory):
|
|
for root, _, files in os.walk(directory):
|
|
for file in files:
|
|
yield os.path.join(root, file)
|
|
|
|
def find_missing_files(data, base_path):
|
|
missing_files = defaultdict(lambda: defaultdict(set))
|
|
json_files = set()
|
|
|
|
for item in data["items"]:
|
|
for condition_key, condition_value in item["conditions"].items():
|
|
for record in item["records"]:
|
|
for feature in record["features"]:
|
|
for asset in record["assets"]:
|
|
filename = os.path.join(base_path, asset['filename'])
|
|
json_files.add(filename)
|
|
|
|
if not os.path.exists(filename):
|
|
missing_files[condition_key][condition_value].add(filename)
|
|
|
|
return missing_files, json_files
|
|
|
|
def report_missing_files(missing_files, output_file):
|
|
for condition_key, condition_values in missing_files.items():
|
|
for condition_value, filenames in condition_values.items():
|
|
output_file.write(f"\n\n{condition_key}: {condition_value}\n")
|
|
output_file.write(f" Number of Missing Files: {len(filenames)}\n\nMissing Files:\n\n")
|
|
output_file.write("\n".join(sorted(filenames)))
|
|
|
|
def find_orphan_files(json_files, directory):
|
|
all_files_in_directory = set(list_files_recursively(directory))
|
|
return all_files_in_directory - json_files
|
|
|
|
def report_orphan_files(orphan_files, output_file):
|
|
output_file.write("\n\nExtra files in Filesystem NOT in JSON:\n\n")
|
|
for file in sorted(orphan_files):
|
|
output_file.write(f"{file}\n")
|
|
|
|
def upload_to_box(client, file_path, folder_id):
|
|
folder = client.folder(folder_id).get()
|
|
file_name = os.path.basename(file_path)
|
|
|
|
try:
|
|
uploaded_file = folder.upload(file_path, file_name)
|
|
print(f"Successfully uploaded {file_name} to Box.com")
|
|
return True
|
|
except Exception as e:
|
|
print(f"Error uploading {file_name} to Box.com: {str(e)}")
|
|
return False
|
|
|
|
def process_zip_file(zip_path, output_dir, error_log_path, box_client):
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
zip_name = os.path.splitext(os.path.basename(zip_path))[0]
|
|
output_file_path = os.path.join(output_dir, f"{zip_name}_{timestamp}_report.txt")
|
|
error_file_path = os.path.join(output_dir, f"{zip_name}_{timestamp}_report.error")
|
|
temp_dir = os.path.join(output_dir, f"temp_{zip_name}_{timestamp}")
|
|
os.makedirs(temp_dir, exist_ok=True)
|
|
|
|
try:
|
|
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
|
zip_ref.extractall(temp_dir)
|
|
|
|
json_file_path = None
|
|
for root, _, files in os.walk(temp_dir):
|
|
if "linkingrecord.json" in files:
|
|
json_file_path = os.path.join(root, "linkingrecord.json")
|
|
break
|
|
|
|
if json_file_path is None:
|
|
raise FileNotFoundError("linkingrecord.json not found in the zip file")
|
|
|
|
data = load_json_data(json_file_path)
|
|
missing_files, json_files = find_missing_files(data, temp_dir)
|
|
|
|
with open(output_file_path, 'w') as output_file:
|
|
output_file.write(f"Report for {zip_path}\n\n")
|
|
report_missing_files(missing_files, output_file)
|
|
|
|
orphan_files = find_orphan_files(json_files, temp_dir)
|
|
report_orphan_files(orphan_files, output_file)
|
|
|
|
# Upload the report to Box.com
|
|
if upload_to_box(box_client, output_file_path, BOX_FOLDER_ID):
|
|
print(f"Successfully processed and uploaded report for: {zip_path}")
|
|
else:
|
|
print(f"Failed to upload report for {zip_path}")
|
|
|
|
except Exception as e:
|
|
error_message = f"Error processing {zip_path}: {str(e)}\n"
|
|
with open(error_file_path, 'w') as error_file:
|
|
error_file.write(str(e))
|
|
|
|
# Upload the error file to Box.com
|
|
if upload_to_box(box_client, error_file_path, BOX_FOLDER_ID):
|
|
print(f"Error file uploaded for {zip_path}")
|
|
else:
|
|
print(f"Failed to upload error file for {zip_path}")
|
|
|
|
print(error_message.strip())
|
|
|
|
finally:
|
|
# Clean up: remove the temporary directory and the zip file
|
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
if os.path.exists(zip_path):
|
|
os.remove(zip_path)
|
|
print(f"Deleted zip file: {zip_path}")
|
|
|
|
|
|
def main():
|
|
try:
|
|
input_dir = "/home/box-cli/FORD_SCRIPTS/FORD_ASSET_PACK_FINAL_QC/FORD_QC_V2/input_zips"
|
|
output_dir = "/home/box-cli/FORD_SCRIPTS/FORD_ASSET_PACK_FINAL_QC/FORD_QC_V2/output_reports"
|
|
error_log_path = "/home/box-cli/FORD_SCRIPTS/FORD_ASSET_PACK_FINAL_QC/FORD_QC_V2/error_log.txt"
|
|
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
# Initialize Box client with app token
|
|
auth = OAuth2(
|
|
client_id='',
|
|
client_secret='',
|
|
access_token=BOX_APP_TOKEN
|
|
)
|
|
box_client = Client(auth)
|
|
|
|
for filename in os.listdir(input_dir):
|
|
if filename.endswith(".zip"):
|
|
zip_path = os.path.join(input_dir, filename)
|
|
if wait_for_file_to_settle(zip_path):
|
|
process_zip_file(zip_path, output_dir, error_log_path, box_client)
|
|
else:
|
|
print(f"File {filename} did not settle within the timeout period. Skipping.")
|
|
|
|
finally:
|
|
print("script finished")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|