adobe-ps-scripts-loreal/batch_extract_text.py
DJP 4a192a8c97 Initial commit: Adobe Photoshop API text management scripts
Local and cloud-based workflows for extracting and updating
text layers in PSD files via ExtendScript and Adobe PS API.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 13:46:52 -05:00

511 lines
No EOL
25 KiB
Python

#!/usr/bin/env python3
"""
Batch Text Extractor for Photoshop
----------------------------------
This script automates extracting text layers from multiple PSD files in a folder.
It uses the ExtractTextWithBreaks.jsx script to get text with formatting preserved.
Requirements:
- Python 3.6+
- Adobe Photoshop installed
- photoshop_python_api package (install with: pip install photoshop_python_api)
"""
import os
import sys
import time
import json
import argparse
import platform
from pathlib import Path
import logging
from typing import List, Dict, Any
# Check platform
is_windows = platform.system() == "Windows"
is_mac = platform.system() == "Darwin"
if is_mac:
# On macOS, we need to ensure we're using the right Python environment
try:
# Try to fix Mac-specific PATH issues
if "PYTHONPATH" not in os.environ:
os.environ["PYTHONPATH"] = ""
# Add the site-packages directory to path - this helps with venv environments
import site
site_packages = site.getsitepackages()
for site_path in site_packages:
if site_path not in sys.path:
sys.path.append(site_path)
print(f"Added {site_path} to Python path")
except Exception as e:
print(f"Warning: Could not set paths: {e}")
# Print some debug info on Mac
print(f"Python: {sys.version}")
print(f"System: {platform.system()} {platform.release()}")
print(f"Site packages: {', '.join(site.getsitepackages())}")
# First try to import the photoshop module - the import might be in different places
# depending on how the package was installed
found_ps_module = False
# Try to directly import the photoshop module - work around importing issues by adding to sys.modules
import importlib
import inspect
# Print path information to help debug
print("Python module search paths:")
for path in sys.path:
if 'site-packages' in path:
print(f" - {path}")
# Try to directly locate the module file
found_ps_path = None
for path in sys.path:
ps_module_path = os.path.join(path, 'photoshop')
if os.path.exists(ps_module_path):
found_ps_path = ps_module_path
print(f"Found photoshop module at: {ps_module_path}")
break
if found_ps_path:
# Check for specific module structure
init_path = os.path.join(found_ps_path, '__init__.py')
if os.path.exists(init_path):
with open(init_path, 'r') as f:
init_content = f.read()
print(f"Found __init__.py, size: {len(init_content)} bytes")
# Attempt various import strategies, trying to be very flexible
try_imports = [
# Standard import
lambda: exec('from photoshop import Session'),
# Alternative path
lambda: exec('from photoshop.api import Session'),
# Manual import construction
lambda: exec('import importlib; photoshop = importlib.import_module("photoshop"); Session = photoshop.Session'),
# Import for photoshop-python-api (with hyphen)
lambda: exec('import photoshop_python_api as photoshop; Session = photoshop.Session')
]
found_ps_module = False
last_error = None
for importer in try_imports:
try:
importer()
found_ps_module = True
break
except Exception as e:
last_error = str(e)
print(f"Import attempt failed: {e}")
continue
# Last resort - try to manually find and load the module
if not found_ps_module:
print("Attempting manual module discovery...")
try:
import subprocess
# Find where the module is installed
result = subprocess.run(
[sys.executable, "-m", "pip", "show", "photoshop-python-api"],
capture_output=True, text=True
)
if result.returncode == 0:
location_line = [line for line in result.stdout.split('\n') if line.startswith('Location:')]
if location_line:
location = location_line[0].split('Location:')[1].strip()
print(f"Package location: {location}")
# Add to Python path
if location not in sys.path:
sys.path.append(location)
print(f"Added {location} to Python path")
# Try importing again after adjusting the path
try:
from photoshop import Session
found_ps_module = True
print("Successfully imported after path adjustment")
except ImportError as e:
print(f"Still cannot import after path adjustment: {e}")
except Exception as e:
print(f"Manual module discovery failed: {e}")
# If the import failed because of 'winreg' on macOS, try creating a compatibility layer
if not found_ps_module and is_mac and "No module named 'winreg'" in str(last_error):
print("Detected 'winreg' compatibility issue on macOS.")
print("Creating a compatibility layer for Windows-specific modules...")
# Create a mock winreg module to satisfy the import
try:
import types
# Create a fake winreg module
mock_winreg = types.ModuleType("winreg")
# Add necessary constants and functions
mock_winreg.HKEY_CURRENT_USER = 0
mock_winreg.HKEY_LOCAL_MACHINE = 1
mock_winreg.KEY_ALL_ACCESS = 2
# Add mock functions
def mock_open_key(*args, **kwargs):
return None
def mock_query_value(*args, **kwargs):
# Return default Photoshop path for macOS
return "/Applications/Adobe Photoshop 2025/Adobe Photoshop 2025.app"
def mock_close_key(*args, **kwargs):
pass
# Attach functions to the mock module
mock_winreg.OpenKey = mock_open_key
mock_winreg.QueryValueEx = mock_query_value
mock_winreg.CloseKey = mock_close_key
# Add the mock module to sys.modules
sys.modules["winreg"] = mock_winreg
print("Mock winreg module created. Trying to import photoshop again...")
# Try importing again
try:
from photoshop import Session
found_ps_module = True
print("Successfully imported after adding compatibility layer")
except Exception as e:
print(f"Still cannot import after adding compatibility layer: {e}")
except Exception as e:
print(f"Failed to create compatibility layer: {e}")
# Create a custom Session class for macOS if needed
if not found_ps_module and is_mac:
print("Attempting to create a custom Photoshop Session class for macOS...")
try:
# Define a basic Session class that will work on macOS
class Session:
def __init__(self, ps_version=None):
self.app = None
self.version = ps_version or "2023"
# Try to locate Photoshop on macOS
ps_paths = [
f"/Applications/Adobe Photoshop {self.version}/Adobe Photoshop {self.version}.app",
f"/Applications/Adobe Photoshop {self.version}/Adobe Photoshop.app",
f"/Applications/Adobe Photoshop/Adobe Photoshop.app",
"/Applications/Adobe Photoshop CC 2025/Adobe Photoshop CC 2025.app",
"/Applications/Adobe Photoshop 2025/Adobe Photoshop 2025.app",
"/Applications/Adobe Photoshop 2024/Adobe Photoshop 2024.app"
]
self.ps_path = None
for path in ps_paths:
if os.path.exists(path):
self.ps_path = path
print(f"Found Photoshop at: {path}")
break
if not self.ps_path:
print("Warning: Couldn't find Photoshop application path")
# Initialize the app through AppleScript
self._initialize_app()
def _initialize_app(self):
try:
import subprocess
# Check if Photoshop is running
ps_running_script = """
tell application "System Events"
set isRunning to (count of (every process whose name is "Adobe Photoshop")) > 0
end tell
"""
# Launch Photoshop if needed
launch_script = f"""
tell application "Adobe Photoshop"
activate
end tell
"""
# Execute AppleScript to launch Photoshop
subprocess.run(["osascript", "-e", launch_script], check=True)
print("Photoshop launched successfully")
# Create a simple app object with the required methods
class PhotoshopApp:
def __init__(self):
self.activeDocument = None
def Open(self, file_path):
print(f"Opening file: {file_path}")
open_script = f"""
tell application "Adobe Photoshop"
open POSIX file "{file_path}"
end tell
"""
subprocess.run(["osascript", "-e", open_script], check=True)
self.activeDocument = self.ActiveDocument()
return True
def DoJavaScript(self, script):
# Save the script to a temporary file
script_path = os.path.expanduser("~/Desktop/temp_ps_script.jsx")
with open(script_path, "w") as f:
f.write(script)
# Run the script
run_script = f"""
tell application "Adobe Photoshop"
do javascript POSIX file "{script_path}"
end tell
"""
subprocess.run(["osascript", "-e", run_script], check=True)
# Clean up the temporary file
os.remove(script_path)
return True
class ActiveDocument:
def __init__(self):
pass
def Close(self, save_option):
close_script = f"""
tell application "Adobe Photoshop"
close current document saving {'yes' if save_option == 3 else 'no'}
end tell
"""
subprocess.run(["osascript", "-e", close_script], check=True)
self.app = PhotoshopApp()
except Exception as e:
print(f"Error initializing Photoshop on macOS: {e}")
self.app = None
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
pass # We'll let AppleScript handle the cleanup
# Set the Session class
found_ps_module = True
print("Created custom macOS Session class for Photoshop")
except Exception as e:
print(f"Failed to create custom Session class: {e}")
# If all attempts fail, notify the user
if not found_ps_module:
print("Error: Could not import the Photoshop Python API.")
print(f"Last error: {last_error}")
print("\nTroubleshooting steps:")
print("1. Verify the package is installed: pip list | grep photoshop")
print("2. Try reinstalling: pip uninstall photoshop-python-api; pip install photoshop-python-api")
print("3. Check if you're using the right version of Python")
print("4. On macOS, the photoshop-python-api package might not be compatible")
print(" - The package was designed for Windows and uses Windows-specific modules")
sys.exit(1)
else:
print("Successfully imported or created Photoshop API session handler")
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)
# The ExtractTextWithBreaks.jsx script as a string (minified version)
EXTRACT_TEXT_SCRIPT = r"""
// Photoshop Script to Extract Text Layers With Exact Line Breaks
#target photoshop
function writeTextFile(e,t){e.encoding="UTF8",e.open("w"),e.write(t),e.close()}function escapeJsonString(e){return e?e.replace(/\\/g,"\\\\").replace(/"/g,'\\"').replace(/\n/g,"\\n").replace(/\r/g,"\\r").replace(/\t/g,"\\t").replace(/\f/g,"\\f"):""}function extractTextLayers(e){function t(e,r){r=r||"";for(var n=0;n<e.length;n++){var o=e[n],a=r?r+"/"+o.name:o.name;if(o.kind===LayerKind.TEXT){$.writeln("Found text layer: "+o.name);try{var s=o.textItem.contents;$.writeln("Text: "+s);var i=12;try{if(o.textItem.size){var l=o.textItem.size.toString();(i=parseInt(l,10))&&isNaN(i)&&(i=12)}}catch(e){$.writeln("Could not get font size: "+e)}var c=[];try{app.activeDocument.activeLayer=o;$.writeln("Extracting detailed text formatting for layer: "+o.name);var f=s.split(/[\r\n]/);$.writeln("Text has "+f.length+" paragraphs"),f.length>1?($.writeln("Multi-paragraph text detected - treating each paragraph separately"),function(){for(var e=0,t=0;t<f.length;t++){var r=f[t];if(0!==r.length){var n=e,l=n+r.length;$.writeln("Paragraph "+(t+1)+" ["+n+"-"+l+']: "'+r.substring(0,Math.min(20,r.length))+(r.length>20?"...":"")+"\"");var d=0===t;c.push({start:n,end:l,text:r,font:o.textItem.font||"Unknown",style:d?"Bold":"Regular",size:i,color:d?[0,0,0]:[80,80,80],isPrimary:d}),e=l,t<f.length-1&&e++}}c.length>1&&($.writeln("Created "+c.length+" different style entries for paragraphs"),window.forceRichTextFormatting=!0)}()):($.writeln("Single paragraph text - checking for character-level formatting"),function(){var e=new ActionReference;e.putEnumerated(charIDToTypeID("Lyr "),charIDToTypeID("Ordn"),charIDToTypeID("Trgt"));var t=executeActionGet(e);if(t.hasKey(stringIDToTypeID("textKey"))){var r=t.getObjectValue(stringIDToTypeID("textKey"));if(r.hasKey(stringIDToTypeID("textStyleRange"))){var n=r.getList(stringIDToTypeID("textStyleRange"));$.writeln("Found "+n.count+" text style ranges");for(var a=0;a<n.count;a++)try{var l=n.getObjectValue(a),f=l.getObjectValue(stringIDToTypeID("from")),d=l.getObjectValue(stringIDToTypeID("textStyle")),p=f.getInteger(stringIDToTypeID("from")),g=f.getInteger(stringIDToTypeID("to")),u=s.substring(p,g),y=o.textItem.font||"Unknown",m="Regular",T=null,h=i;d.hasKey(stringIDToTypeID("fontName"))&&(y=d.getString(stringIDToTypeID("fontName"))),d.hasKey(stringIDToTypeID("fontStyleName"))&&(m=d.getString(stringIDToTypeID("fontStyleName"))),d.hasKey(stringIDToTypeID("size"))&&(h=d.getDouble(stringIDToTypeID("size"))),c.push({start:p,end:g,text:u,font:y,style:m,size:h,color:T})}catch(e){$.writeln("Error processing style range: "+e)}}}()}),0===c.length&&($.writeln("No style ranges detected, adding default style for entire text"),c.push({start:0,end:s.length,text:s,font:o.textItem.font||"Unknown",style:"Regular",size:i,color:null})),$.writeln("Total style ranges found: "+c.length)}catch(e){$.writeln("Could not extract text styles: "+e),c.push({start:0,end:s.length,text:s,font:o.textItem.font||"Unknown",style:"Regular",size:i,color:null})}d.push({id:"",name:o.name,path:a,text:s,updatedText:s,visible:o.visible,styleInfo:{font:o.textItem.font||"Unknown",size:i,color:null,alignment:"left",styles:c},hasRichTextFormatting:function(){var e=s.split(/[\r\n]/).length;return e>1?($.writeln("Multi-paragraph text found: "+e+" paragraphs, marking as rich formatted"),!0):c.length>1?($.writeln("Multiple style ranges found, marking as rich formatted"),!0):function(){for(var e=0;e<c.length;e++)if(c[e].color)return $.writeln("Color information found in style, marking as rich formatted"),!0;return!1}()||window.forceRichTextFormatting||(e=["","","","*",":","|"],t=!1,e.forEach(function(e){-1!==s.indexOf(e)&&($.writeln("Found formatting indicator character: "+e),t=!0)}),t);var e,t}()})}catch(e){$.writeln("Error extracting from layer "+o.name+": "+e)}}o.typename==="LayerSet"&&t(o.layers,a)}}var d=[];return t(e.layers),d}function main(){try{if(!documents.length)return void alert("Please open a PSD file before running this script.");var e=app.activeDocument,t=e.name;$.writeln("Extracting text layers from: "+t);var r=extractTextLayers(e);if(0===r.length)return void alert("No text layers found in this document.");$.writeln("Found "+r.length+" text layer(s)");var n=t.replace(/\.[^\.]+$/,"-text.json"),o=File.saveDialog("Save text layer data as:",n);if(!o)return;var a="{\n";a+=' "documentName": "'+escapeJsonString(t)+'",\n',a+=' "psdPath": "'+escapeJsonString(e.path?e.path+"/"+e.name:e.name)+'",\n',a+=' "extractedAt": "'+new Date.toString()+'",\n',a+=' "dimensions": {\n';var s=0,i=0;try{e.width&&(s=parseInt(e.width.toString(),10)),e.height&&(i=parseInt(e.height.toString(),10))}catch(e){$.writeln("Error getting dimensions: "+e)}a+=" \"width\": "+s+",\n",a+=" \"height\": "+i+"\n",a+=" },\n",a+=' "textLayerCount": '+r.length+",\n",a+=' "textLayers": [\n';for(var l=0;l<r.length;l++){var c=r[l];a+=" {\n",a+=' "id": "",\n',a+=' "name": "'+escapeJsonString(c.name)+'",\n',a+=' "path": "'+escapeJsonString(c.path)+'",\n',a+=' "text": "'+escapeJsonString(c.text)+'",\n',a+=' "updatedText": "'+escapeJsonString(c.text)+'",\n',a+=' "visible": '+(c.visible?"true":"false")+",\n",a+=' "styleInfo": {\n',a+=' "font": "'+escapeJsonString(c.styleInfo.font)+'",\n',a+=' "size": '+c.styleInfo.size+",\n",a+=' "color": null,\n',a+=' "alignment": "left",\n',a+=' "styles": [\n';if(c.styleInfo.styles&&c.styleInfo.styles.length>0)for(var f=0;f<c.styleInfo.styles.length;f++){var d=c.styleInfo.styles[f];a+=" {\n",a+=' "start": '+d.start+",\n",a+=' "end": '+d.end+",\n",a+=' "text": "'+escapeJsonString(d.text)+'",\n',a+=' "font": "'+escapeJsonString(d.font)+'",\n',a+=' "style": "'+escapeJsonString(d.style)+'",\n',a+=' "size": '+d.size,d.color&&d.color.length?(a+=",\n",a+=' "color": ['+d.color.join(", ")+"]\n"):a+="\n",a+=" }"+(f<c.styleInfo.styles.length-1?",\n":"\n")}a+=" ]\n",a+=" },\n",a+=' "hasRichTextFormatting": '+(c.hasRichTextFormatting?"true":"false")+"\n",a+=" }"+(l<r.length-1?",\n":"\n")}a+=" ]\n",a+="}",writeTextFile(o,a);var p="Extracted "+r.length+' text layers from document "'+t+'".\n\n';p+="Text data saved to: "+o.fsName,alert(p)}catch(e){alert("Error: "+e.message)}}main();
"""
def extract_text_from_psd(ps_app, psd_path: Path, output_dir: Path) -> str:
"""
Opens a PSD file in Photoshop and extracts text layers using the JSX script.
Args:
ps_app: The Photoshop application instance
psd_path: Path to the PSD file
output_dir: Directory to save the extracted JSON
Returns:
Path to the saved JSON file or None if extraction failed
"""
# Create output filename (same as PSD but with -textonly.json suffix)
output_filename = f"{psd_path.stem}-textonly.json"
output_path = output_dir / output_filename
try:
# Open the PSD file
logger.info(f"Opening {psd_path}")
ps_app.Open(psd_path.as_posix())
# Modify the script to automatically save to our output path with -textonly.json suffix
modified_script = EXTRACT_TEXT_SCRIPT.replace(
'var n=t.replace(/\\.[^\\.]+$/, "-text.json");',
f'var n=t.replace(/\\.[^\\.]+$/, "-textonly.json");'
)
# Also replace the file dialog with direct file saving
modified_script = modified_script.replace(
'var o=File.saveDialog("Save text layer data as:",n);if(!o)return;',
f'var o=new File("{output_path.as_posix().replace("\\", "\\\\")}");'
)
# Execute the script
logger.info(f"Extracting text from {psd_path.name}")
ps_app.DoJavaScript(modified_script)
# Wait for file to be created
timeout = 10 # seconds
start_time = time.time()
while not output_path.exists() and time.time() - start_time < timeout:
time.sleep(0.5)
if output_path.exists():
logger.info(f"Successfully saved text to {output_path}")
return output_path.as_posix()
else:
logger.warning(f"Failed to extract text from {psd_path.name} (timeout)")
return None
except Exception as e:
logger.error(f"Error extracting text from {psd_path.name}: {str(e)}")
return None
finally:
# Close the document without saving
try:
ps_app.ActiveDocument.Close(2) # 2 = Don't save changes
except:
pass
def batch_extract_text(input_dir: str, output_dir: str, recursive: bool = False) -> List[str]:
"""
Processes all PSD files in the input directory and extracts text layers.
Args:
input_dir: Directory containing PSD files
output_dir: Directory to save extracted JSON files
recursive: Whether to search for PSD files in subdirectories
Returns:
List of paths to the saved JSON files
"""
input_path = Path(input_dir).resolve()
output_path = Path(output_dir).resolve()
# Create output directory if it doesn't exist
if not output_path.exists():
output_path.mkdir(parents=True)
# Find all PSD files
pattern = '**/*.psd' if recursive else '*.psd'
psd_files = list(input_path.glob(pattern))
if not psd_files:
logger.warning(f"No PSD files found in {input_path}")
return []
logger.info(f"Found {len(psd_files)} PSD files to process")
# Extract text from each PSD file
results = []
with Session() as ps:
app = ps.app
for psd_file in psd_files:
result = extract_text_from_psd(app, psd_file, output_path)
if result:
results.append(result)
logger.info(f"Successfully processed {len(results)} of {len(psd_files)} files")
return results
def parse_arguments():
"""Parse command line arguments"""
parser = argparse.ArgumentParser(
description='Batch extract text from PSD files',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Extract text from all PSD files in the current directory
python batch_extract_text.py .
# Extract text from all PSD files in a specific directory
python batch_extract_text.py /path/to/psd_files
# Extract text and save JSON files to a different directory
python batch_extract_text.py /path/to/psd_files -o /path/to/output
# Extract text from all PSD files including subdirectories
python batch_extract_text.py /path/to/psd_files -r
"""
)
parser.add_argument('input_dir',
help='Directory containing PSD files')
parser.add_argument('--output-dir', '-o', default=None,
help='Directory to save extracted JSON files (defaults to input_dir)')
parser.add_argument('--recursive', '-r', action='store_true',
help='Search for PSD files in subdirectories')
parser.add_argument('--verbose', '-v', action='store_true',
help='Enable verbose logging')
return parser.parse_args()
def main():
"""Main function"""
args = parse_arguments()
input_dir = args.input_dir
output_dir = args.output_dir or input_dir
# Set logging level based on verbose flag
if args.verbose:
logger.setLevel(logging.DEBUG)
# Set log format to include more details
for handler in logger.handlers:
handler.setFormatter(logging.Formatter(
'%(asctime)s - %(levelname)s - %(message)s',
'%Y-%m-%d %H:%M:%S'
))
logger.info(f"Processing PSD files from: {input_dir}")
logger.info(f"Saving extracted text to: {output_dir}")
logger.info(f"Recursive search: {args.recursive}")
results = batch_extract_text(input_dir, output_dir, args.recursive)
if results:
logger.info(f"Extraction complete. Processed {len(results)} files:")
for result in results:
logger.info(f" - {result}")
print(f"\nSuccessfully extracted text from {len(results)} PSD files.")
print(f"JSON files saved to: {output_dir}")
print("\nNaming convention: [psd_filename]-textonly.json")
else:
logger.warning("No text was extracted from any files.")
print("\nNo PSD files were processed. Check the input directory or enable recursive search with -r.")
if __name__ == "__main__":
main()