#!/usr/bin/env python3 """ Batch Text Extractor for Photoshop ---------------------------------- This script automates extracting text layers from multiple PSD files in a folder. It uses the ExtractTextWithBreaks.jsx script to get text with formatting preserved. Requirements: - Python 3.6+ - Adobe Photoshop installed - photoshop_python_api package (install with: pip install photoshop_python_api) """ import os import sys import time import json import argparse import platform from pathlib import Path import logging from typing import List, Dict, Any # Check platform is_windows = platform.system() == "Windows" is_mac = platform.system() == "Darwin" if is_mac: # On macOS, we need to ensure we're using the right Python environment try: # Try to fix Mac-specific PATH issues if "PYTHONPATH" not in os.environ: os.environ["PYTHONPATH"] = "" # Add the site-packages directory to path - this helps with venv environments import site site_packages = site.getsitepackages() for site_path in site_packages: if site_path not in sys.path: sys.path.append(site_path) print(f"Added {site_path} to Python path") except Exception as e: print(f"Warning: Could not set paths: {e}") # Print some debug info on Mac print(f"Python: {sys.version}") print(f"System: {platform.system()} {platform.release()}") print(f"Site packages: {', '.join(site.getsitepackages())}") # First try to import the photoshop module - the import might be in different places # depending on how the package was installed found_ps_module = False # Try to directly import the photoshop module - work around importing issues by adding to sys.modules import importlib import inspect # Print path information to help debug print("Python module search paths:") for path in sys.path: if 'site-packages' in path: print(f" - {path}") # Try to directly locate the module file found_ps_path = None for path in sys.path: ps_module_path = os.path.join(path, 'photoshop') if os.path.exists(ps_module_path): found_ps_path = ps_module_path print(f"Found photoshop module at: {ps_module_path}") break if found_ps_path: # Check for specific module structure init_path = os.path.join(found_ps_path, '__init__.py') if os.path.exists(init_path): with open(init_path, 'r') as f: init_content = f.read() print(f"Found __init__.py, size: {len(init_content)} bytes") # Attempt various import strategies, trying to be very flexible try_imports = [ # Standard import lambda: exec('from photoshop import Session'), # Alternative path lambda: exec('from photoshop.api import Session'), # Manual import construction lambda: exec('import importlib; photoshop = importlib.import_module("photoshop"); Session = photoshop.Session'), # Import for photoshop-python-api (with hyphen) lambda: exec('import photoshop_python_api as photoshop; Session = photoshop.Session') ] found_ps_module = False last_error = None for importer in try_imports: try: importer() found_ps_module = True break except Exception as e: last_error = str(e) print(f"Import attempt failed: {e}") continue # Last resort - try to manually find and load the module if not found_ps_module: print("Attempting manual module discovery...") try: import subprocess # Find where the module is installed result = subprocess.run( [sys.executable, "-m", "pip", "show", "photoshop-python-api"], capture_output=True, text=True ) if result.returncode == 0: location_line = [line for line in result.stdout.split('\n') if line.startswith('Location:')] if location_line: location = location_line[0].split('Location:')[1].strip() print(f"Package location: {location}") # Add to Python path if location not in sys.path: sys.path.append(location) print(f"Added {location} to Python path") # Try importing again after adjusting the path try: from photoshop import Session found_ps_module = True print("Successfully imported after path adjustment") except ImportError as e: print(f"Still cannot import after path adjustment: {e}") except Exception as e: print(f"Manual module discovery failed: {e}") # If the import failed because of 'winreg' on macOS, try creating a compatibility layer if not found_ps_module and is_mac and "No module named 'winreg'" in str(last_error): print("Detected 'winreg' compatibility issue on macOS.") print("Creating a compatibility layer for Windows-specific modules...") # Create a mock winreg module to satisfy the import try: import types # Create a fake winreg module mock_winreg = types.ModuleType("winreg") # Add necessary constants and functions mock_winreg.HKEY_CURRENT_USER = 0 mock_winreg.HKEY_LOCAL_MACHINE = 1 mock_winreg.KEY_ALL_ACCESS = 2 # Add mock functions def mock_open_key(*args, **kwargs): return None def mock_query_value(*args, **kwargs): # Return default Photoshop path for macOS return "/Applications/Adobe Photoshop 2025/Adobe Photoshop 2025.app" def mock_close_key(*args, **kwargs): pass # Attach functions to the mock module mock_winreg.OpenKey = mock_open_key mock_winreg.QueryValueEx = mock_query_value mock_winreg.CloseKey = mock_close_key # Add the mock module to sys.modules sys.modules["winreg"] = mock_winreg print("Mock winreg module created. Trying to import photoshop again...") # Try importing again try: from photoshop import Session found_ps_module = True print("Successfully imported after adding compatibility layer") except Exception as e: print(f"Still cannot import after adding compatibility layer: {e}") except Exception as e: print(f"Failed to create compatibility layer: {e}") # Create a custom Session class for macOS if needed if not found_ps_module and is_mac: print("Attempting to create a custom Photoshop Session class for macOS...") try: # Define a basic Session class that will work on macOS class Session: def __init__(self, ps_version=None): self.app = None self.version = ps_version or "2023" # Try to locate Photoshop on macOS ps_paths = [ f"/Applications/Adobe Photoshop {self.version}/Adobe Photoshop {self.version}.app", f"/Applications/Adobe Photoshop {self.version}/Adobe Photoshop.app", f"/Applications/Adobe Photoshop/Adobe Photoshop.app", "/Applications/Adobe Photoshop CC 2025/Adobe Photoshop CC 2025.app", "/Applications/Adobe Photoshop 2025/Adobe Photoshop 2025.app", "/Applications/Adobe Photoshop 2024/Adobe Photoshop 2024.app" ] self.ps_path = None for path in ps_paths: if os.path.exists(path): self.ps_path = path print(f"Found Photoshop at: {path}") break if not self.ps_path: print("Warning: Couldn't find Photoshop application path") # Initialize the app through AppleScript self._initialize_app() def _initialize_app(self): try: import subprocess # Check if Photoshop is running ps_running_script = """ tell application "System Events" set isRunning to (count of (every process whose name is "Adobe Photoshop")) > 0 end tell """ # Launch Photoshop if needed launch_script = f""" tell application "Adobe Photoshop" activate end tell """ # Execute AppleScript to launch Photoshop subprocess.run(["osascript", "-e", launch_script], check=True) print("Photoshop launched successfully") # Create a simple app object with the required methods class PhotoshopApp: def __init__(self): self.activeDocument = None def Open(self, file_path): print(f"Opening file: {file_path}") open_script = f""" tell application "Adobe Photoshop" open POSIX file "{file_path}" end tell """ subprocess.run(["osascript", "-e", open_script], check=True) self.activeDocument = self.ActiveDocument() return True def DoJavaScript(self, script): # Save the script to a temporary file script_path = os.path.expanduser("~/Desktop/temp_ps_script.jsx") with open(script_path, "w") as f: f.write(script) # Run the script run_script = f""" tell application "Adobe Photoshop" do javascript POSIX file "{script_path}" end tell """ subprocess.run(["osascript", "-e", run_script], check=True) # Clean up the temporary file os.remove(script_path) return True class ActiveDocument: def __init__(self): pass def Close(self, save_option): close_script = f""" tell application "Adobe Photoshop" close current document saving {'yes' if save_option == 3 else 'no'} end tell """ subprocess.run(["osascript", "-e", close_script], check=True) self.app = PhotoshopApp() except Exception as e: print(f"Error initializing Photoshop on macOS: {e}") self.app = None def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): pass # We'll let AppleScript handle the cleanup # Set the Session class found_ps_module = True print("Created custom macOS Session class for Photoshop") except Exception as e: print(f"Failed to create custom Session class: {e}") # If all attempts fail, notify the user if not found_ps_module: print("Error: Could not import the Photoshop Python API.") print(f"Last error: {last_error}") print("\nTroubleshooting steps:") print("1. Verify the package is installed: pip list | grep photoshop") print("2. Try reinstalling: pip uninstall photoshop-python-api; pip install photoshop-python-api") print("3. Check if you're using the right version of Python") print("4. On macOS, the photoshop-python-api package might not be compatible") print(" - The package was designed for Windows and uses Windows-specific modules") sys.exit(1) else: print("Successfully imported or created Photoshop API session handler") # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) logger = logging.getLogger(__name__) # The ExtractTextWithBreaks.jsx script as a string (minified version) EXTRACT_TEXT_SCRIPT = r""" // Photoshop Script to Extract Text Layers With Exact Line Breaks #target photoshop function writeTextFile(e,t){e.encoding="UTF8",e.open("w"),e.write(t),e.close()}function escapeJsonString(e){return e?e.replace(/\\/g,"\\\\").replace(/"/g,'\\"').replace(/\n/g,"\\n").replace(/\r/g,"\\r").replace(/\t/g,"\\t").replace(/\f/g,"\\f"):""}function extractTextLayers(e){function t(e,r){r=r||"";for(var n=0;n1?($.writeln("Multi-paragraph text detected - treating each paragraph separately"),function(){for(var e=0,t=0;t20?"...":"")+"\"");var d=0===t;c.push({start:n,end:l,text:r,font:o.textItem.font||"Unknown",style:d?"Bold":"Regular",size:i,color:d?[0,0,0]:[80,80,80],isPrimary:d}),e=l,t1&&($.writeln("Created "+c.length+" different style entries for paragraphs"),window.forceRichTextFormatting=!0)}()):($.writeln("Single paragraph text - checking for character-level formatting"),function(){var e=new ActionReference;e.putEnumerated(charIDToTypeID("Lyr "),charIDToTypeID("Ordn"),charIDToTypeID("Trgt"));var t=executeActionGet(e);if(t.hasKey(stringIDToTypeID("textKey"))){var r=t.getObjectValue(stringIDToTypeID("textKey"));if(r.hasKey(stringIDToTypeID("textStyleRange"))){var n=r.getList(stringIDToTypeID("textStyleRange"));$.writeln("Found "+n.count+" text style ranges");for(var a=0;a1?($.writeln("Multi-paragraph text found: "+e+" paragraphs, marking as rich formatted"),!0):c.length>1?($.writeln("Multiple style ranges found, marking as rich formatted"),!0):function(){for(var e=0;e0)for(var f=0;f str: """ Opens a PSD file in Photoshop and extracts text layers using the JSX script. Args: ps_app: The Photoshop application instance psd_path: Path to the PSD file output_dir: Directory to save the extracted JSON Returns: Path to the saved JSON file or None if extraction failed """ # Create output filename (same as PSD but with -textonly.json suffix) output_filename = f"{psd_path.stem}-textonly.json" output_path = output_dir / output_filename try: # Open the PSD file logger.info(f"Opening {psd_path}") ps_app.Open(psd_path.as_posix()) # Modify the script to automatically save to our output path with -textonly.json suffix modified_script = EXTRACT_TEXT_SCRIPT.replace( 'var n=t.replace(/\\.[^\\.]+$/, "-text.json");', f'var n=t.replace(/\\.[^\\.]+$/, "-textonly.json");' ) # Also replace the file dialog with direct file saving modified_script = modified_script.replace( 'var o=File.saveDialog("Save text layer data as:",n);if(!o)return;', f'var o=new File("{output_path.as_posix().replace("\\", "\\\\")}");' ) # Execute the script logger.info(f"Extracting text from {psd_path.name}") ps_app.DoJavaScript(modified_script) # Wait for file to be created timeout = 10 # seconds start_time = time.time() while not output_path.exists() and time.time() - start_time < timeout: time.sleep(0.5) if output_path.exists(): logger.info(f"Successfully saved text to {output_path}") return output_path.as_posix() else: logger.warning(f"Failed to extract text from {psd_path.name} (timeout)") return None except Exception as e: logger.error(f"Error extracting text from {psd_path.name}: {str(e)}") return None finally: # Close the document without saving try: ps_app.ActiveDocument.Close(2) # 2 = Don't save changes except: pass def batch_extract_text(input_dir: str, output_dir: str, recursive: bool = False) -> List[str]: """ Processes all PSD files in the input directory and extracts text layers. Args: input_dir: Directory containing PSD files output_dir: Directory to save extracted JSON files recursive: Whether to search for PSD files in subdirectories Returns: List of paths to the saved JSON files """ input_path = Path(input_dir).resolve() output_path = Path(output_dir).resolve() # Create output directory if it doesn't exist if not output_path.exists(): output_path.mkdir(parents=True) # Find all PSD files pattern = '**/*.psd' if recursive else '*.psd' psd_files = list(input_path.glob(pattern)) if not psd_files: logger.warning(f"No PSD files found in {input_path}") return [] logger.info(f"Found {len(psd_files)} PSD files to process") # Extract text from each PSD file results = [] with Session() as ps: app = ps.app for psd_file in psd_files: result = extract_text_from_psd(app, psd_file, output_path) if result: results.append(result) logger.info(f"Successfully processed {len(results)} of {len(psd_files)} files") return results def parse_arguments(): """Parse command line arguments""" parser = argparse.ArgumentParser( description='Batch extract text from PSD files', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Extract text from all PSD files in the current directory python batch_extract_text.py . # Extract text from all PSD files in a specific directory python batch_extract_text.py /path/to/psd_files # Extract text and save JSON files to a different directory python batch_extract_text.py /path/to/psd_files -o /path/to/output # Extract text from all PSD files including subdirectories python batch_extract_text.py /path/to/psd_files -r """ ) parser.add_argument('input_dir', help='Directory containing PSD files') parser.add_argument('--output-dir', '-o', default=None, help='Directory to save extracted JSON files (defaults to input_dir)') parser.add_argument('--recursive', '-r', action='store_true', help='Search for PSD files in subdirectories') parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose logging') return parser.parse_args() def main(): """Main function""" args = parse_arguments() input_dir = args.input_dir output_dir = args.output_dir or input_dir # Set logging level based on verbose flag if args.verbose: logger.setLevel(logging.DEBUG) # Set log format to include more details for handler in logger.handlers: handler.setFormatter(logging.Formatter( '%(asctime)s - %(levelname)s - %(message)s', '%Y-%m-%d %H:%M:%S' )) logger.info(f"Processing PSD files from: {input_dir}") logger.info(f"Saving extracted text to: {output_dir}") logger.info(f"Recursive search: {args.recursive}") results = batch_extract_text(input_dir, output_dir, args.recursive) if results: logger.info(f"Extraction complete. Processed {len(results)} files:") for result in results: logger.info(f" - {result}") print(f"\nSuccessfully extracted text from {len(results)} PSD files.") print(f"JSON files saved to: {output_dir}") print("\nNaming convention: [psd_filename]-textonly.json") else: logger.warning("No text was extracted from any files.") print("\nNo PSD files were processed. Check the input directory or enable recursive search with -r.") if __name__ == "__main__": main()