adobe-ps-scripts-loreal/mac_ps_extract.py
DJP 4a192a8c97 Initial commit: Adobe Photoshop API text management scripts
Local and cloud-based workflows for extracting and updating
text layers in PSD files via ExtendScript and Adobe PS API.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 13:46:52 -05:00

873 lines
No EOL
43 KiB
Python

#!/usr/bin/env python3
"""
Mac Photoshop Text Extractor
----------------------------
A macOS-specific script to extract text from PSD files using AppleScript
to control Photoshop and execute ExtendScript (JSX) code.
This is designed to work on macOS without requiring the photoshop-python-api
package which has Windows dependencies.
"""
import os
import sys
import time
import json
import argparse
import subprocess
from pathlib import Path
import logging
from typing import List, Optional
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)
# The ExtractTextWithBreaks.jsx script as a string
EXTRACT_TEXT_SCRIPT = r"""
// Photoshop Script to Extract Text Layers With Exact Line Breaks
#target photoshop
function writeTextFile(e,t){e.encoding="UTF8",e.open("w"),e.write(t),e.close()}function escapeJsonString(e){return e?e.replace(/\\/g,"\\\\").replace(/"/g,'\\"').replace(/\n/g,"\\n").replace(/\r/g,"\\r").replace(/\t/g,"\\t").replace(/\f/g,"\\f"):""}function extractTextLayers(e){function t(e,r){r=r||"";for(var n=0;n<e.length;n++){var o=e[n],a=r?r+"/"+o.name:o.name;if(o.kind===LayerKind.TEXT){$.writeln("Found text layer: "+o.name);try{var s=o.textItem.contents;$.writeln("Text: "+s);var i=12;try{if(o.textItem.size){var l=o.textItem.size.toString();(i=parseInt(l,10))&&isNaN(i)&&(i=12)}}catch(e){$.writeln("Could not get font size: "+e)}var c=[];try{app.activeDocument.activeLayer=o;$.writeln("Extracting detailed text formatting for layer: "+o.name);var f=s.split(/[\r\n]/);$.writeln("Text has "+f.length+" paragraphs"),f.length>1?($.writeln("Multi-paragraph text detected - treating each paragraph separately"),function(){for(var e=0,t=0;t<f.length;t++){var r=f[t];if(0!==r.length){var n=e,l=n+r.length;$.writeln("Paragraph "+(t+1)+" ["+n+"-"+l+']: "'+r.substring(0,Math.min(20,r.length))+(r.length>20?"...":"")+"\"");var d=0===t;c.push({start:n,end:l,text:r,font:o.textItem.font||"Unknown",style:d?"Bold":"Regular",size:i,color:d?[0,0,0]:[80,80,80],isPrimary:d}),e=l,t<f.length-1&&e++}}c.length>1&&($.writeln("Created "+c.length+" different style entries for paragraphs"),window.forceRichTextFormatting=!0)}()):($.writeln("Single paragraph text - checking for character-level formatting"),function(){var e=new ActionReference;e.putEnumerated(charIDToTypeID("Lyr "),charIDToTypeID("Ordn"),charIDToTypeID("Trgt"));var t=executeActionGet(e);if(t.hasKey(stringIDToTypeID("textKey"))){var r=t.getObjectValue(stringIDToTypeID("textKey"));if(r.hasKey(stringIDToTypeID("textStyleRange"))){var n=r.getList(stringIDToTypeID("textStyleRange"));$.writeln("Found "+n.count+" text style ranges");for(var a=0;a<n.count;a++)try{var l=n.getObjectValue(a),f=l.getObjectValue(stringIDToTypeID("from")),d=l.getObjectValue(stringIDToTypeID("textStyle")),p=f.getInteger(stringIDToTypeID("from")),g=f.getInteger(stringIDToTypeID("to")),u=s.substring(p,g),y=o.textItem.font||"Unknown",m="Regular",T=null,h=i;d.hasKey(stringIDToTypeID("fontName"))&&(y=d.getString(stringIDToTypeID("fontName"))),d.hasKey(stringIDToTypeID("fontStyleName"))&&(m=d.getString(stringIDToTypeID("fontStyleName"))),d.hasKey(stringIDToTypeID("size"))&&(h=d.getDouble(stringIDToTypeID("size"))),c.push({start:p,end:g,text:u,font:y,style:m,size:h,color:T})}catch(e){$.writeln("Error processing style range: "+e)}}}()}),0===c.length&&($.writeln("No style ranges detected, adding default style for entire text"),c.push({start:0,end:s.length,text:s,font:o.textItem.font||"Unknown",style:"Regular",size:i,color:null})),$.writeln("Total style ranges found: "+c.length)}catch(e){$.writeln("Could not extract text styles: "+e),c.push({start:0,end:s.length,text:s,font:o.textItem.font||"Unknown",style:"Regular",size:i,color:null})}d.push({id:"",name:o.name,path:a,text:s,updatedText:s,visible:o.visible,styleInfo:{font:o.textItem.font||"Unknown",size:i,color:null,alignment:"left",styles:c},hasRichTextFormatting:function(){var e=s.split(/[\r\n]/).length;return e>1?($.writeln("Multi-paragraph text found: "+e+" paragraphs, marking as rich formatted"),!0):c.length>1?($.writeln("Multiple style ranges found, marking as rich formatted"),!0):function(){for(var e=0;e<c.length;e++)if(c[e].color)return $.writeln("Color information found in style, marking as rich formatted"),!0;return!1}()||window.forceRichTextFormatting||(e=["","","","*",":","|"],t=!1,e.forEach(function(e){-1!==s.indexOf(e)&&($.writeln("Found formatting indicator character: "+e),t=!0)}),t);var e,t}()})}catch(e){$.writeln("Error extracting from layer "+o.name+": "+e)}}o.typename==="LayerSet"&&t(o.layers,a)}}var d=[];return t(e.layers),d}function main(){try{if(!documents.length)return void alert("Please open a PSD file before running this script.");var e=app.activeDocument,t=e.name;$.writeln("Extracting text layers from: "+t);var r=extractTextLayers(e);if(0===r.length)return void alert("No text layers found in this document.");$.writeln("Found "+r.length+" text layer(s)");var n=t.replace(/\.[^\.]+$/,"-textonly.json"),o=File(OUTPUT_PATH);if(!o)return;var a="{\n";a+=' "documentName": "'+escapeJsonString(t)+'",\n',a+=' "psdPath": "'+escapeJsonString(e.path?e.path+"/"+e.name:e.name)+'",\n',a+=' "extractedAt": "'+new Date().toString()+'",\n',a+=' "dimensions": {\n';var s=0,i=0;try{e.width&&(s=parseInt(e.width.toString(),10)),e.height&&(i=parseInt(e.height.toString(),10))}catch(e){$.writeln("Error getting dimensions: "+e)}a+=" \"width\": "+s+",\n",a+=" \"height\": "+i+"\n",a+=" },\n",a+=' "textLayerCount": '+r.length+",\n",a+=' "textLayers": [\n';for(var l=0;l<r.length;l++){var c=r[l];a+=" {\n",a+=' "id": "",\n',a+=' "name": "'+escapeJsonString(c.name)+'",\n',a+=' "path": "'+escapeJsonString(c.path)+'",\n',a+=' "text": "'+escapeJsonString(c.text)+'",\n',a+=' "updatedText": "'+escapeJsonString(c.text)+'",\n',a+=' "visible": '+(c.visible?"true":"false")+",\n",a+=' "styleInfo": {\n',a+=' "font": "'+escapeJsonString(c.styleInfo.font)+'",\n',a+=' "size": '+c.styleInfo.size+",\n",a+=' "color": null,\n',a+=' "alignment": "left",\n',a+=' "styles": [\n';if(c.styleInfo.styles&&c.styleInfo.styles.length>0)for(var f=0;f<c.styleInfo.styles.length;f++){var d=c.styleInfo.styles[f];a+=" {\n",a+=' "start": '+d.start+",\n",a+=' "end": '+d.end+",\n",a+=' "text": "'+escapeJsonString(d.text)+'",\n',a+=' "font": "'+escapeJsonString(d.font)+'",\n',a+=' "style": "'+escapeJsonString(d.style)+'",\n',a+=' "size": '+d.size,d.color&&d.color.length?(a+=",\n",a+=' "color": ['+d.color.join(", ")+"]\n"):a+="\n",a+=" }"+(f<c.styleInfo.styles.length-1?",\n":"\n")}a+=" ]\n",a+=" },\n",a+=' "hasRichTextFormatting": '+(c.hasRichTextFormatting?"true":"false")+"\n",a+=" }"+(l<r.length-1?",\n":"\n")}a+=" ]\n",a+="}",writeTextFile(o,a);var p="Extracted "+r.length+' text layers from document "'+t+'".\n\n';p+="Text data saved to: "+o.fsName,alert(p)}catch(e){alert("Error: "+e.message)}}main();
"""
class MacPhotoshop:
"""A class for controlling Photoshop on macOS using AppleScript"""
def __init__(self):
"""Initialize and connect to Photoshop"""
self.ps_path = self._find_photoshop()
if not self.ps_path:
logger.warning("Could not find Photoshop installation path")
# Default application name (will be updated during launch)
self.ps_app_name = "Adobe Photoshop 2025"
# Launch or connect to Photoshop
self._launch_photoshop()
def _find_photoshop(self) -> Optional[str]:
"""Find the Photoshop application path"""
ps_paths = [
"/Applications/Adobe Photoshop 2025/Adobe Photoshop 2025.app",
"/Applications/Adobe Photoshop 2024/Adobe Photoshop 2024.app",
"/Applications/Adobe Photoshop/Adobe Photoshop.app",
"/Applications/Adobe Photoshop CC 2025/Adobe Photoshop CC 2025.app",
"/Applications/Adobe Photoshop CC 2024/Adobe Photoshop CC 2024.app",
]
for path in ps_paths:
if os.path.exists(path):
logger.info(f"Found Photoshop at: {path}")
return path
return None
def _launch_photoshop(self) -> bool:
"""Launch Photoshop if it's not already running"""
try:
# First determine the correct AppleScript name for Photoshop
ps_names = [
"Adobe Photoshop 2025",
"Adobe Photoshop 2024",
"Adobe Photoshop CC 2025",
"Adobe Photoshop CC 2024",
"Adobe Photoshop"
]
# Try to find a running instance first
ps_running_script = """
tell application "System Events"
set ps_processes to (every process whose name begins with "Adobe Photoshop")
if (count of ps_processes) > 0 then
return true
else
return false
end if
end tell
"""
result = subprocess.run(
["osascript", "-e", ps_running_script],
capture_output=True, text=True, check=True
)
is_running = result.stdout.strip() == "true"
if is_running:
logger.info("Photoshop is already running")
return True
# Try each possible application name
for ps_name in ps_names:
logger.info(f"Trying to launch Photoshop as: {ps_name}")
try:
# Check if this application exists
check_app_script = f"""
tell application "System Events"
return exists application process "{ps_name}"
end tell
"""
check_result = subprocess.run(
["osascript", "-e", check_app_script],
capture_output=True, text=True, check=False
)
if check_result.returncode == 0 and check_result.stdout.strip() == "true":
# Launch this version of Photoshop
launch_script = f"""
tell application "{ps_name}"
activate
end tell
"""
subprocess.run(["osascript", "-e", launch_script], check=True)
logger.info(f"Photoshop ({ps_name}) launched successfully")
time.sleep(1) # Give Photoshop just a moment to initialize
self.ps_app_name = ps_name
return True
else:
# Try launching it anyway
try:
launch_script = f"""
tell application "{ps_name}"
activate
end tell
"""
result = subprocess.run(["osascript", "-e", launch_script],
check=False, capture_output=True, text=True)
if result.returncode == 0:
logger.info(f"Photoshop ({ps_name}) launched successfully")
time.sleep(1) # Give Photoshop just a moment to initialize
self.ps_app_name = ps_name
return True
except Exception as ex:
logger.debug(f"Failed to launch {ps_name}: {ex}")
continue
except Exception as ex:
logger.debug(f"Failed to check {ps_name}: {ex}")
continue
# If we got here, we couldn't launch any version
logger.error("Couldn't launch any version of Photoshop")
return False
except Exception as e:
logger.error(f"Error launching Photoshop: {e}")
return False
def open_file(self, file_path: str) -> bool:
"""Open a PSD file in Photoshop"""
try:
file_path = os.path.abspath(file_path)
logger.debug(f"Attempting to open file: {file_path}")
# Escape quotes and backslashes in the path
file_path_escaped = file_path.replace('\\', '\\\\').replace('"', '\\"')
# Use a more reliable approach with shell quoting
# Create a temporary AppleScript file with properly formatted path
temp_script_path = os.path.expanduser("~/Desktop/temp_ps_open.scpt")
# The key is to use single quotes for the outer string and double quotes for the inner POSIX file path
script_content = f'''
tell application "{self.ps_app_name}"
set theFile to POSIX file "{file_path_escaped}"
open theFile
end tell
'''
with open(temp_script_path, "w") as f:
f.write(script_content)
# Run the AppleScript directly from the file
result = subprocess.run(["osascript", temp_script_path],
capture_output=True, text=True)
if result.returncode != 0:
logger.error(f"AppleScript error: {result.stderr}")
# Try alternate method using the 'do shell script' approach
logger.debug("Trying alternate method...")
alt_script_content = f'''
tell application "{self.ps_app_name}"
activate
end tell
do shell script "open -a '{self.ps_app_name}' '{file_path_escaped}'"
'''
with open(temp_script_path, "w") as f:
f.write(alt_script_content)
result = subprocess.run(["osascript", temp_script_path],
capture_output=True, text=True)
if result.returncode != 0:
logger.error(f"Alternate method also failed: {result.stderr}")
return False
# Clean up the temporary script file
try:
os.remove(temp_script_path)
except:
pass
logger.info(f"Opened file: {file_path}")
return True
except Exception as e:
logger.error(f"Error opening file: {e}")
return False
def run_jsx_script(self, script: str, script_args: dict = None) -> bool:
"""Run a JSX script in Photoshop with optional arguments"""
try:
# First disable all dialogs in Photoshop to avoid any user interaction
self._disable_dialogs()
# First try to use the ExtractTextWithBreaks.jsx file directly if it's available
script_dir = os.path.dirname(os.path.abspath(__file__))
jsx_file_path = os.path.join(script_dir, "ExtractTextWithBreaks.jsx")
# Check if the JSX file exists
if os.path.exists(jsx_file_path):
logger.info(f"Using existing JSX file: {jsx_file_path}")
output_path = script_args.get("OUTPUT_PATH", "")
# Modify the JSX file to add OUTPUT_PATH
with open(jsx_file_path, "r") as f:
jsx_content = f.read()
# Create a temporary version with our output path variable
temp_jsx_path = os.path.expanduser("~/Desktop/temp_extract_text.jsx")
with open(temp_jsx_path, "w") as f:
# Add the output path variable declaration
f.write(f'var OUTPUT_PATH = "{output_path}";\n\n')
# Add code to suppress all dialogs
f.write('// Disable all dialogs\n')
f.write('app.displayDialogs = DialogModes.NO;\n')
f.write('app.displayStatusDialogs = false;\n\n')
f.write(jsx_content)
# Use the direct do script AppleScript approach - more reliable and faster than 'open'
try:
script_content = f'''
tell application "{self.ps_app_name}"
do javascript file "{temp_jsx_path}"
end tell
'''
result = subprocess.run(
["osascript", "-e", script_content],
check=False, capture_output=True, text=True
)
if result.returncode == 0:
logger.info("Successfully executed JSX script using 'do javascript' approach")
return True
else:
logger.info("'do javascript' approach returned non-zero: falling back to open method")
# Fall back to the open command as a backup
result = subprocess.run(
["open", "-a", self.ps_app_name, temp_jsx_path],
check=False, capture_output=True, text=True
)
if result.returncode == 0:
logger.info("Successfully executed JSX script using fallback 'open' command")
return True
else:
logger.error(f"Error using fallback 'open' command: {result.stderr}")
except Exception as e:
logger.error(f"Error executing JSX script: {e}")
# Fall back to our original script approach
# Create a temporary script file
script_path = os.path.expanduser("~/Desktop/temp_ps_script.jsx")
# If we have arguments to pass to the script
if script_args:
# Add variable declarations at the top of the script
var_declarations = ""
for var_name, var_value in script_args.items():
if isinstance(var_value, str):
# Escape backslashes and quotes in string values
escaped_value = var_value.replace('\\', '\\\\').replace('"', '\\"')
var_declarations += f'var {var_name} = "{escaped_value}";\n'
else:
var_declarations += f'var {var_name} = {var_value};\n'
script = var_declarations + script
# Write the script to a file
with open(script_path, "w") as f:
f.write(script)
logger.debug(f"JSX script written to: {script_path}")
# Try to run the script using the direct 'do javascript' approach
try:
script_content = f'''
tell application "{self.ps_app_name}"
do javascript file "{script_path}"
end tell
'''
result = subprocess.run(
["osascript", "-e", script_content],
check=False, capture_output=True, text=True
)
if result.returncode == 0:
logger.info("Executed JSX script using 'do javascript' approach")
return True
else:
# Fall back to the open command if do javascript fails
result = subprocess.run(
["open", "-a", self.ps_app_name, script_path],
check=False, capture_output=True, text=True
)
if result.returncode == 0:
logger.info("Executed JSX script by opening it directly")
return True
else:
logger.error(f"Error opening JSX script directly: {result.stderr}")
except Exception as e:
logger.error(f"Error executing JSX script: {e}")
# Clean up the temporary files
try:
if os.path.exists(script_path):
os.remove(script_path)
if os.path.exists(temp_jsx_path):
os.remove(temp_jsx_path)
except:
pass
logger.warning("JSX script execution attempts failed")
return False
except Exception as e:
logger.error(f"Error running JSX script: {e}")
return False
def _disable_dialogs(self) -> bool:
"""Disable all dialogs in Photoshop to prevent user interaction"""
try:
# Execute the JavaScript directly via AppleScript - faster than creating a temporary file
disable_dialogs_js = """
// Script to disable all dialogs in Photoshop
app.displayDialogs = DialogModes.NO;
app.displayStatusDialogs = false;
// Disable all other dialog types
try {
// General preferences for dialog suppression
var desc = new ActionDescriptor();
desc.putBoolean(stringIDToTypeID("dontShowAgain"), true);
app.putCustomOptions("dontShowDialog", desc, true);
} catch (e) {
// Ignore errors
}
"""
# Execute JavaScript directly - much faster
applescript = f'''
tell application "{self.ps_app_name}"
do javascript "{disable_dialogs_js.replace('"', '\\"').replace("\n", "\\n")}"
end tell
'''
result = subprocess.run(
["osascript", "-e", applescript],
check=False, capture_output=True, text=True
)
if result.returncode == 0:
logger.info("Disabled dialogs in Photoshop using direct JavaScript execution")
return True
else:
logger.warning(f"Direct JavaScript execution failed: {result.stderr}")
# Fall back to the file-based approach if direct execution fails
disable_script_path = os.path.expanduser("~/Desktop/disable_dialogs.jsx")
with open(disable_script_path, "w") as f:
f.write(disable_dialogs_js)
# Use the 'do javascript file' approach
applescript = f'''
tell application "{self.ps_app_name}"
do javascript file "{disable_script_path}"
end tell
'''
result = subprocess.run(
["osascript", "-e", applescript],
check=False, capture_output=True, text=True
)
if result.returncode == 0:
logger.info("Disabled dialogs in Photoshop using file-based JavaScript")
else:
# Last resort - use open command
subprocess.run(
["open", "-a", self.ps_app_name, disable_script_path],
check=False, capture_output=True, text=True
)
logger.info("Disabled dialogs in Photoshop using open command")
# Clean up
try:
os.remove(disable_script_path)
except:
pass
return True
except Exception as e:
logger.error(f"Error disabling dialogs: {e}")
return False
def close_document(self, save_changes: bool = False) -> bool:
"""Close the active document"""
try:
# Create a temporary AppleScript file
temp_script = os.path.expanduser("~/Desktop/temp_ps_close.scpt")
with open(temp_script, "w") as f:
f.write(f"""
tell application "{self.ps_app_name}"
close current document saving {"yes" if save_changes else "no"}
end tell
""")
# Run the AppleScript file directly
subprocess.run(["osascript", temp_script], check=True)
# Clean up the temporary script file
os.remove(temp_script)
logger.info(f"Closed document (save={save_changes})")
return True
except Exception as e:
logger.error(f"Error closing document: {e}")
return False
def extract_text_from_psd(psd_path: Path, output_dir: Path) -> str:
"""Extract text from a PSD file using the Mac Photoshop controller"""
# Create output filename in the same directory as the PSD file
# Use the same filename but with -textonly.json suffix
output_filename = f"{psd_path.stem}-textonly.json"
# Place the output file in the same directory as the PSD file
output_path = psd_path.parent / output_filename
# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)
# Make sure the output path is writable
test_output = output_path.as_posix()
try:
with open(test_output, 'w') as f:
f.write('test')
os.remove(test_output)
logger.debug(f"Output path is writable: {test_output}")
except Exception as e:
logger.error(f"Output path is not writable: {test_output} - {e}")
# Try using the Desktop as a fallback
output_path = Path(os.path.expanduser("~/Desktop")) / output_filename
logger.info(f"Using fallback output path: {output_path}")
ps = MacPhotoshop()
try:
# Open the PSD file
if not ps.open_file(str(psd_path)):
logger.error(f"Failed to open {psd_path}")
return None
# No need to wait extra time here
# Create a modified version of the script that doesn't prompt for save
# by replacing the file dialog code with a direct file path
script_dir = os.path.dirname(os.path.abspath(__file__))
jsx_file_path = os.path.join(script_dir, "ExtractTextWithBreaks.jsx")
# Create a temporary modified version of the JSX script
temp_jsx_path = os.path.expanduser("~/Desktop/temp_extract_text.jsx")
if os.path.exists(jsx_file_path):
# Read the original script
with open(jsx_file_path, "r") as f:
jsx_content = f.read()
# Modify the script to automatically save to our specified location
# This replaces any File.saveDialog code with direct file creation
modified_content = jsx_content.replace(
'var n=t.replace(/\\.[^\\.]+$/,"-textonly.json"),o=File.saveDialog("Save text layer data as:",n);if(!o)return;',
f'var n=t.replace(/\\.[^\\.]+$/,"-textonly.json"),o=new File("{output_path.as_posix()}");'
)
# Also check for another possible pattern for the dialog
modified_content = modified_content.replace(
'var o=File.saveDialog("Save text layer data as:",n);if(!o)return;',
f'var o=new File("{output_path.as_posix()}");'
)
# Write the modified script
with open(temp_jsx_path, "w") as f:
f.write(modified_content)
# Run the modified script
logger.info(f"Running modified JSX script from: {temp_jsx_path}")
# Use 'open' command to run the script directly
result = subprocess.run(
["open", "-a", ps.ps_app_name, temp_jsx_path],
check=False, capture_output=True, text=True
)
if result.returncode != 0:
logger.error(f"Error running JSX script via open command: {result.stderr}")
else:
logger.info("JSX script executed successfully")
else:
# If the JSX file doesn't exist, fall back to the embedded script
logger.warning(f"JSX file not found at {jsx_file_path}, using embedded script")
# Use a simplified script with direct file path assignment
script_args = {
"OUTPUT_PATH": output_path.as_posix()
}
# Make a copy of the original script with the output path directly set
modified_script = EXTRACT_TEXT_SCRIPT.replace(
'var n=t.replace(/\\.[^\\.]+$/,"-textonly.json"),o=File.saveDialog("Save text layer data as:",n);if(!o)return;',
f'var n=t.replace(/\\.[^\\.]+$/,"-textonly.json"),o=new File("{output_path.as_posix()}");'
)
if not ps.run_jsx_script(modified_script, script_args):
logger.error(f"Failed to run extraction script on {psd_path}")
return None
# Wait for file to be created with a more efficient approach
timeout = 10 # seconds - reduced timeout
start_time = time.time()
check_interval = 0.1 # Check more frequently but with less logging
next_log_time = start_time + 1 # Log only every second
# Check both the output file and the completion signal file
signal_file = Path(output_path.parent) / "complete_signal.tmp"
while not (output_path.exists() or signal_file.exists()) and time.time() - start_time < timeout:
time.sleep(check_interval)
# Only log periodically to reduce overhead
current_time = time.time()
if current_time >= next_log_time:
logger.debug(f"Waiting for output file: {output_path}")
next_log_time = current_time + 1
# Remove the signal file if it exists
if signal_file.exists():
try:
os.remove(signal_file)
logger.debug("Removed completion signal file")
except:
pass
# Close the document
ps.close_document(save_changes=False)
# Clean up the temporary script file
if os.path.exists(temp_jsx_path):
try:
os.remove(temp_jsx_path)
except:
pass
if output_path.exists():
logger.info(f"Successfully saved text to {output_path}")
return output_path.as_posix()
else:
# Check if file was created with a different name or in a different location
logger.warning(f"Output file not created at expected path: {output_path}")
# First check for files on the desktop that might have actual content
desktop_path = Path(os.path.expanduser("~/Desktop"))
# Look for recently created JSON files on desktop with same base name
desktop_base_name = psd_path.stem + "-textonly.json"
desktop_file_path = desktop_path / desktop_base_name
if desktop_file_path.exists() and desktop_file_path.stat().st_mtime > start_time:
logger.info(f"Found matching JSON file on desktop: {desktop_file_path}")
# Check if it has content (not empty)
try:
with open(desktop_file_path, 'r') as f:
content = f.read()
# Check if it has text layers
if '"textLayerCount": 0' not in content and '"textLayers": []' not in content:
logger.info("Desktop file appears to have text layer content")
# Try to copy the file to be next to the original PSD file
target_path = psd_path.parent / f"{psd_path.stem}-textonly.json"
import shutil
shutil.copy2(str(desktop_file_path), str(target_path))
logger.info(f"Copied file with text content from {desktop_file_path} to {target_path}")
return target_path.as_posix()
except Exception as e:
logger.error(f"Error checking desktop file: {e}")
# Check if there's a signal file even if the JSON wasn't created
signal_file_on_desktop = desktop_path / "complete_signal.tmp"
if signal_file_on_desktop.exists():
logger.info("Found completion signal file but no output file - possibly a document with no text layers")
try:
os.remove(signal_file_on_desktop)
except:
pass
# Create an empty JSON file at the expected location, but only if we couldn't find content
try:
empty_json = {
"documentName": psd_path.name,
"psdPath": str(psd_path),
"extractedAt": time.strftime("%Y-%m-%d %H:%M:%S"),
"dimensions": {"width": 0, "height": 0},
"textLayerCount": 0,
"textLayers": []
}
with open(output_path, 'w') as f:
json.dump(empty_json, f, indent=2)
logger.info(f"Created empty JSON result for document with no text layers at {output_path}")
return output_path.as_posix()
except Exception as e:
logger.error(f"Failed to create empty JSON file: {e}")
# Look for recently created JSON files
recent_json_files = [f for f in desktop_path.glob("*.json")
if f.stat().st_mtime > start_time]
if recent_json_files:
logger.info(f"Found potentially related JSON files: {recent_json_files}")
# Find the first file that matches our PSD name pattern or has non-empty content
best_match = None
for json_file in recent_json_files:
# Check if the filename matches our PSD (highest priority)
if psd_path.stem in json_file.stem:
best_match = json_file
logger.info(f"Found JSON file matching PSD name: {json_file}")
break
# Check file content for text layers
try:
with open(json_file, 'r') as f:
content = f.read()
# Check if it has text layers
if '"textLayerCount": 0' not in content and '"textLayers": []' not in content:
best_match = json_file
logger.info(f"Found JSON file with text layer content: {json_file}")
break
except:
pass
# If no best match found, just use the first file
if not best_match and recent_json_files:
best_match = recent_json_files[0]
logger.info(f"Using first available JSON file: {best_match}")
# Try to move the file to be next to the original PSD file
if best_match:
try:
# Create a destination file path next to the original PSD
target_path = psd_path.parent / f"{psd_path.stem}-textonly.json"
# Copy the file to be next to the PSD
import shutil
shutil.copy2(str(best_match), str(target_path))
logger.info(f"Copied file from {best_match} to {target_path}")
# Return the new path
return target_path.as_posix()
except Exception as e:
logger.error(f"Failed to copy file to output directory: {e}")
# Return the original file path if copy fails
return best_match.as_posix()
return None
except Exception as e:
logger.error(f"Error extracting text from {psd_path}: {str(e)}")
return None
def batch_extract_text(input_dir: str, output_dir: str = None, recursive: bool = False) -> List[str]:
"""Extract text from all PSD files in the input directory
output_dir is now optional - if None, files will be placed next to their PSDs
"""
input_path = Path(input_dir).resolve()
output_path = Path(input_dir).resolve() # Default to same as input
if output_dir:
output_path = Path(output_dir).resolve()
# Create output directory if it doesn't exist and explicitly specified
if not output_path.exists():
output_path.mkdir(parents=True)
# Find all PSD files - first check what's available directly
logger.debug(f"Listing all files in directory to debug:")
try:
all_files = [f for f in os.listdir(input_dir) if f.lower().endswith('.psd')]
logger.debug(f"Found {len(all_files)} PSD files directly: {all_files}")
except Exception as e:
logger.error(f"Error listing files in directory: {e}")
all_files = []
# Find all PSD files using glob pattern
pattern = '**/*.psd' if recursive else '*.psd'
psd_files = list(input_path.glob(pattern))
if not psd_files:
logger.warning(f"No PSD files found in {input_path}")
return []
logger.info(f"Found {len(psd_files)} PSD files to process")
# Extract text from each PSD file
results = []
for psd_file in psd_files:
# Replace any spaces or special characters in filename for safer handling
logger.debug(f"Processing file: {psd_file}")
# Try to rename the file temporarily to remove spaces (optional approach)
# This is commented out as it's a more intrusive option
# can be enabled if the file path escaping doesn't work
# temp_filename = str(psd_file).replace(" ", "_")
# try:
# os.rename(str(psd_file), temp_filename)
# logger.debug(f"Temporarily renamed to: {temp_filename}")
# result = extract_text_from_psd(Path(temp_filename), output_path)
# # Rename back after processing
# os.rename(temp_filename, str(psd_file))
# except Exception as e:
# logger.error(f"Error with temporary rename: {e}")
# result = extract_text_from_psd(psd_file, output_path)
result = extract_text_from_psd(psd_file, output_path)
if result:
results.append(result)
logger.info(f"Successfully processed {len(results)} of {len(psd_files)} files")
return results
def parse_arguments():
"""Parse command line arguments"""
parser = argparse.ArgumentParser(
description='Extract text from PSD files on macOS',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Extract text from all PSD files in the current directory
python mac_ps_extract.py .
# Extract text from all PSD files in a specific directory
python mac_ps_extract.py /path/to/psd_files
# Extract text and save JSON files to a different directory
python mac_ps_extract.py /path/to/psd_files -o /path/to/output
# Extract text from all PSD files including subdirectories
python mac_ps_extract.py /path/to/psd_files -r
"""
)
parser.add_argument('input_dir',
help='Directory containing PSD files')
parser.add_argument('--output-dir', '-o', default=None,
help='Directory to save extracted JSON files (defaults to input_dir)')
parser.add_argument('--recursive', '-r', action='store_true',
help='Search for PSD files in subdirectories')
parser.add_argument('--verbose', '-v', action='store_true',
help='Enable verbose logging')
return parser.parse_args()
def main():
"""Main function"""
args = parse_arguments()
input_dir = args.input_dir
output_dir = args.output_dir # This can now be None - files will be placed next to PSDs
# Set logging level based on verbose flag
if args.verbose:
logger.setLevel(logging.DEBUG)
# Set log format to include more details
for handler in logger.handlers:
handler.setFormatter(logging.Formatter(
'%(asctime)s - %(levelname)s - %(message)s',
'%Y-%m-%d %H:%M:%S'
))
logger.info(f"Processing PSD files from: {input_dir}")
if output_dir:
logger.info(f"Saving extracted text to: {output_dir}")
else:
logger.info(f"Saving extracted text next to PSD files")
logger.info(f"Recursive search: {args.recursive}")
# Get the list of all PSD files first
input_path = Path(input_dir).resolve()
pattern = '**/*.psd' if args.recursive else '*.psd'
psd_files = list(input_path.glob(pattern))
# Process the files
results = batch_extract_text(input_dir, output_dir, args.recursive)
# All files were processed successfully, but we'll keep track of how many had text
# vs. how many were empty files (no text layers)
processed_stems = [Path(r).stem.replace('-textonly', '') for r in results]
files_with_text = [] # We can't tell directly which files had text, but we processed all files
if results:
logger.info(f"Extraction complete. Processed {len(results)} of {len(psd_files)} files:")
for result in results:
logger.info(f" - {result}")
print(f"\nSuccessfully processed {len(results)} of {len(psd_files)} PSD files.")
if output_dir:
print(f"JSON files saved to: {output_dir}")
else:
print(f"JSON files saved next to their PSD files")
print("\nNaming convention: [psd_filename]-textonly.json")
else:
logger.warning("No text was extracted from any files.")
print("\nNo PSD files were processed successfully.")
if len(psd_files) > 0:
print(f"Found {len(psd_files)} PSD files but none could be processed:")
for f in psd_files[:5]: # Show only first 5 to avoid overwhelming output
print(f" - {f.name}")
if len(psd_files) > 5:
print(f" ... and {len(psd_files) - 5} more")
print("\nCheck for errors in the log or try running with -v for verbose output.")
else:
print("\nNo PSD files were found. Check the input directory or enable recursive search with -r.")
if __name__ == "__main__":
main()