adobe-ps-scripts-loreal/batch_extract_text.py

#!/usr/bin/env python3
"""
Batch Text Extractor for Photoshop
----------------------------------

This script automates extracting text layers from multiple PSD files in a folder.
It uses the ExtractTextWithBreaks.jsx script to get text with formatting preserved.

Requirements:
- Python 3.6+
- Adobe Photoshop installed
- photoshop_python_api package (install with: pip install photoshop_python_api)
"""

import os
import sys
import time
import json
import argparse
import platform
from pathlib import Path
import logging
from typing import List, Dict, Any

# Check platform
is_windows = platform.system() == "Windows"
is_mac = platform.system() == "Darwin"

if is_mac:
    # On macOS, we need to ensure we're using the right Python environment
    try:
        # Try to fix Mac-specific PATH issues
        if "PYTHONPATH" not in os.environ:
            os.environ["PYTHONPATH"] = ""

        # Add the site-packages directory to path - this helps with venv environments
        import site
        site_packages = site.getsitepackages()
        for site_path in site_packages:
            if site_path not in sys.path:
                sys.path.append(site_path)
                print(f"Added {site_path} to Python path")
    except Exception as e:
        print(f"Warning: Could not set paths: {e}")

    # Print some debug info on Mac
    print(f"Python: {sys.version}")
    print(f"System: {platform.system()} {platform.release()}")
    print(f"Site packages: {', '.join(site.getsitepackages())}")

# First try to import the photoshop module - the import might be in different places
# depending on how the package was installed
found_ps_module = False

# Try to directly import the photoshop module - work around importing issues by adding to sys.modules
import importlib
import inspect

# Print path information to help debug
print("Python module search paths:")
for path in sys.path:
    if 'site-packages' in path:
        print(f"  - {path}")

# Try to directly locate the module file
found_ps_path = None
for path in sys.path:
    ps_module_path = os.path.join(path, 'photoshop')
    if os.path.exists(ps_module_path):
        found_ps_path = ps_module_path
        print(f"Found photoshop module at: {ps_module_path}")
        break

if found_ps_path:
    # Check for specific module structure
    init_path = os.path.join(found_ps_path, '__init__.py')
    if os.path.exists(init_path):
        with open(init_path, 'r') as f:
            init_content = f.read()
            print(f"Found __init__.py, size: {len(init_content)} bytes")

# Attempt various import strategies, trying to be very flexible
try_imports = [
    # Standard import
    lambda: exec('from photoshop import Session'),
    # Alternative path
    lambda: exec('from photoshop.api import Session'),
    # Manual import construction
    lambda: exec('import importlib; photoshop = importlib.import_module("photoshop"); Session = photoshop.Session'),
    # Import for photoshop-python-api (with hyphen)
    lambda: exec('import photoshop_python_api as photoshop; Session = photoshop.Session')
]

found_ps_module = False
last_error = None

for importer in try_imports:
    try:
        importer()
        found_ps_module = True
        break
    except Exception as e:
        last_error = str(e)
        print(f"Import attempt failed: {e}")
        continue

# Last resort - try to manually find and load the module
if not found_ps_module:
    print("Attempting manual module discovery...")
    try:
        import subprocess
        # Find where the module is installed
        result = subprocess.run(
            [sys.executable, "-m", "pip", "show", "photoshop-python-api"],
            capture_output=True, text=True
        )

        if result.returncode == 0:
            location_line = [line for line in result.stdout.split('\n') if line.startswith('Location:')]
            if location_line:
                location = location_line[0].split('Location:')[1].strip()
                print(f"Package location: {location}")

                # Add to Python path
                if location not in sys.path:
                    sys.path.append(location)
                    print(f"Added {location} to Python path")

                # Try importing again after adjusting the path
                try:
                    from photoshop import Session
                    found_ps_module = True
                    print("Successfully imported after path adjustment")
                except ImportError as e:
                    print(f"Still cannot import after path adjustment: {e}")
    except Exception as e:
        print(f"Manual module discovery failed: {e}")

# If the import failed because of 'winreg' on macOS, try creating a compatibility layer
if not found_ps_module and is_mac and "No module named 'winreg'" in str(last_error):
    print("Detected 'winreg' compatibility issue on macOS.")
    print("Creating a compatibility layer for Windows-specific modules...")

    # Create a mock winreg module to satisfy the import
    try:
        import types

        # Create a fake winreg module
        mock_winreg = types.ModuleType("winreg")

        # Add necessary constants and functions
        mock_winreg.HKEY_CURRENT_USER = 0
        mock_winreg.HKEY_LOCAL_MACHINE = 1
        mock_winreg.KEY_ALL_ACCESS = 2

        # Add mock functions
        def mock_open_key(*args, **kwargs):
            return None

        def mock_query_value(*args, **kwargs):
            # Return default Photoshop path for macOS
            return "/Applications/Adobe Photoshop 2025/Adobe Photoshop 2025.app"

        def mock_close_key(*args, **kwargs):
            pass

        # Attach functions to the mock module
        mock_winreg.OpenKey = mock_open_key
        mock_winreg.QueryValueEx = mock_query_value
        mock_winreg.CloseKey = mock_close_key

        # Add the mock module to sys.modules
        sys.modules["winreg"] = mock_winreg

        print("Mock winreg module created. Trying to import photoshop again...")

        # Try importing again
        try:
            from photoshop import Session
            found_ps_module = True
            print("Successfully imported after adding compatibility layer")
        except Exception as e:
            print(f"Still cannot import after adding compatibility layer: {e}")
    except Exception as e:
        print(f"Failed to create compatibility layer: {e}")

# Create a custom Session class for macOS if needed
if not found_ps_module and is_mac:
    print("Attempting to create a custom Photoshop Session class for macOS...")

    try:
        # Define a basic Session class that will work on macOS
        class Session:
            def __init__(self, ps_version=None):
                self.app = None
                self.version = ps_version or "2023"

                # Try to locate Photoshop on macOS
                ps_paths = [
                    f"/Applications/Adobe Photoshop {self.version}/Adobe Photoshop {self.version}.app",
                    f"/Applications/Adobe Photoshop {self.version}/Adobe Photoshop.app",
                    f"/Applications/Adobe Photoshop/Adobe Photoshop.app",
                    "/Applications/Adobe Photoshop CC 2025/Adobe Photoshop CC 2025.app",
                    "/Applications/Adobe Photoshop 2025/Adobe Photoshop 2025.app",
                    "/Applications/Adobe Photoshop 2024/Adobe Photoshop 2024.app"
                ]

                self.ps_path = None
                for path in ps_paths:
                    if os.path.exists(path):
                        self.ps_path = path
                        print(f"Found Photoshop at: {path}")
                        break

                if not self.ps_path:
                    print("Warning: Couldn't find Photoshop application path")

                # Initialize the app through AppleScript
                self._initialize_app()

            def _initialize_app(self):
                try:
                    import subprocess

                    # Check if Photoshop is running
                    ps_running_script = """
                    tell application "System Events"
                      set isRunning to (count of (every process whose name is "Adobe Photoshop")) > 0
                    end tell
                    """

                    # Launch Photoshop if needed
                    launch_script = f"""
                    tell application "Adobe Photoshop"
                      activate
                    end tell
                    """

                    # Execute AppleScript to launch Photoshop
                    subprocess.run(["osascript", "-e", launch_script], check=True)
                    print("Photoshop launched successfully")

                    # Create a simple app object with the required methods
                    class PhotoshopApp:
                        def __init__(self):
                            self.activeDocument = None

                        def Open(self, file_path):
                            print(f"Opening file: {file_path}")
                            open_script = f"""
                            tell application "Adobe Photoshop"
                              open POSIX file "{file_path}"
                            end tell
                            """
                            subprocess.run(["osascript", "-e", open_script], check=True)
                            self.activeDocument = self.ActiveDocument()
                            return True

                        def DoJavaScript(self, script):
                            # Save the script to a temporary file
                            script_path = os.path.expanduser("~/Desktop/temp_ps_script.jsx")
                            with open(script_path, "w") as f:
                                f.write(script)

                            # Run the script
                            run_script = f"""
                            tell application "Adobe Photoshop"
                              do javascript POSIX file "{script_path}"
                            end tell
                            """
                            subprocess.run(["osascript", "-e", run_script], check=True)

                            # Clean up the temporary file
                            os.remove(script_path)
                            return True

                        class ActiveDocument:
                            def __init__(self):
                                pass

                            def Close(self, save_option):
                                close_script = f"""
                                tell application "Adobe Photoshop"
                                  close current document saving {'yes' if save_option == 3 else 'no'}
                                end tell
                                """
                                subprocess.run(["osascript", "-e", close_script], check=True)

                    self.app = PhotoshopApp()

                except Exception as e:
                    print(f"Error initializing Photoshop on macOS: {e}")
                    self.app = None

            def __enter__(self):
                return self

            def __exit__(self, exc_type, exc_val, exc_tb):
                pass  # We'll let AppleScript handle the cleanup

        # Set the Session class
        found_ps_module = True
        print("Created custom macOS Session class for Photoshop")

    except Exception as e:
        print(f"Failed to create custom Session class: {e}")

# If all attempts fail, notify the user
if not found_ps_module:
    print("Error: Could not import the Photoshop Python API.")
    print(f"Last error: {last_error}")
    print("\nTroubleshooting steps:")
    print("1. Verify the package is installed: pip list | grep photoshop")
    print("2. Try reinstalling: pip uninstall photoshop-python-api; pip install photoshop-python-api")
    print("3. Check if you're using the right version of Python")
    print("4. On macOS, the photoshop-python-api package might not be compatible")
    print("   - The package was designed for Windows and uses Windows-specific modules")
    sys.exit(1)
else:
    print("Successfully imported or created Photoshop API session handler")

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

# The ExtractTextWithBreaks.jsx script as a string (minified version)
EXTRACT_TEXT_SCRIPT = r"""
// Photoshop Script to Extract Text Layers With Exact Line Breaks
#target photoshop
function writeTextFile(e,t){e.encoding="UTF8",e.open("w"),e.write(t),e.close()}function escapeJsonString(e){return e?e.replace(/\\/g,"\\\\").replace(/"/g,'\\"').replace(/\n/g,"\\n").replace(/\r/g,"\\r").replace(/\t/g,"\\t").replace(/\f/g,"\\f"):""}function extractTextLayers(e){function t(e,r){r=r||"";for(var n=0;n<e.length;n++){var o=e[n],a=r?r+"/"+o.name:o.name;if(o.kind===LayerKind.TEXT){$.writeln("Found text layer: "+o.name);try{var s=o.textItem.contents;$.writeln("Text: "+s);var i=12;try{if(o.textItem.size){var l=o.textItem.size.toString();(i=parseInt(l,10))&&isNaN(i)&&(i=12)}}catch(e){$.writeln("Could not get font size: "+e)}var c=[];try{app.activeDocument.activeLayer=o;$.writeln("Extracting detailed text formatting for layer: "+o.name);var f=s.split(/[\r\n]/);$.writeln("Text has "+f.length+" paragraphs"),f.length>1?($.writeln("Multi-paragraph text detected - treating each paragraph separately"),function(){for(var e=0,t=0;t<f.length;t++){var r=f[t];if(0!==r.length){var n=e,l=n+r.length;$.writeln("Paragraph "+(t+1)+" ["+n+"-"+l+']: "'+r.substring(0,Math.min(20,r.length))+(r.length>20?"...":"")+"\"");var d=0===t;c.push({start:n,end:l,text:r,font:o.textItem.font||"Unknown",style:d?"Bold":"Regular",size:i,color:d?[0,0,0]:[80,80,80],isPrimary:d}),e=l,t<f.length-1&&e++}}c.length>1&&($.writeln("Created "+c.length+" different style entries for paragraphs"),window.forceRichTextFormatting=!0)}()):($.writeln("Single paragraph text - checking for character-level formatting"),function(){var e=new ActionReference;e.putEnumerated(charIDToTypeID("Lyr "),charIDToTypeID("Ordn"),charIDToTypeID("Trgt"));var t=executeActionGet(e);if(t.hasKey(stringIDToTypeID("textKey"))){var r=t.getObjectValue(stringIDToTypeID("textKey"));if(r.hasKey(stringIDToTypeID("textStyleRange"))){var n=r.getList(stringIDToTypeID("textStyleRange"));$.writeln("Found "+n.count+" text style ranges");for(var a=0;a<n.count;a++)try{var l=n.getObjectValue(a),f=l.getObjectValue(stringIDToTypeID("from")),d=l.getObjectValue(stringIDToTypeID("textStyle")),p=f.getInteger(stringIDToTypeID("from")),g=f.getInteger(stringIDToTypeID("to")),u=s.substring(p,g),y=o.textItem.font||"Unknown",m="Regular",T=null,h=i;d.hasKey(stringIDToTypeID("fontName"))&&(y=d.getString(stringIDToTypeID("fontName"))),d.hasKey(stringIDToTypeID("fontStyleName"))&&(m=d.getString(stringIDToTypeID("fontStyleName"))),d.hasKey(stringIDToTypeID("size"))&&(h=d.getDouble(stringIDToTypeID("size"))),c.push({start:p,end:g,text:u,font:y,style:m,size:h,color:T})}catch(e){$.writeln("Error processing style range: "+e)}}}()}),0===c.length&&($.writeln("No style ranges detected, adding default style for entire text"),c.push({start:0,end:s.length,text:s,font:o.textItem.font||"Unknown",style:"Regular",size:i,color:null})),$.writeln("Total style ranges found: "+c.length)}catch(e){$.writeln("Could not extract text styles: "+e),c.push({start:0,end:s.length,text:s,font:o.textItem.font||"Unknown",style:"Regular",size:i,color:null})}d.push({id:"",name:o.name,path:a,text:s,updatedText:s,visible:o.visible,styleInfo:{font:o.textItem.font||"Unknown",size:i,color:null,alignment:"left",styles:c},hasRichTextFormatting:function(){var e=s.split(/[\r\n]/).length;return e>1?($.writeln("Multi-paragraph text found: "+e+" paragraphs, marking as rich formatted"),!0):c.length>1?($.writeln("Multiple style ranges found, marking as rich formatted"),!0):function(){for(var e=0;e<c.length;e++)if(c[e].color)return $.writeln("Color information found in style, marking as rich formatted"),!0;return!1}()||window.forceRichTextFormatting||(e=["•","‣","◦","*",":","|"],t=!1,e.forEach(function(e){-1!==s.indexOf(e)&&($.writeln("Found formatting indicator character: "+e),t=!0)}),t);var e,t}()})}catch(e){$.writeln("Error extracting from layer "+o.name+": "+e)}}o.typename==="LayerSet"&&t(o.layers,a)}}var d=[];return t(e.layers),d}function main(){try{if(!documents.length)return void alert("Please open a PSD file before running this script.");var e=app.activeDocument,t=e.name;$.writeln("Extracting text layers from: "+t);var r=extractTextLayers(e);if(0===r.length)return void alert("No text layers found in this document.");$.writeln("Found "+r.length+" text layer(s)");var n=t.replace(/\.[^\.]+$/,"-text.json"),o=File.saveDialog("Save text layer data as:",n);if(!o)return;var a="{\n";a+='  "documentName": "'+escapeJsonString(t)+'",\n',a+='  "psdPath": "'+escapeJsonString(e.path?e.path+"/"+e.name:e.name)+'",\n',a+='  "extractedAt": "'+new Date.toString()+'",\n',a+='  "dimensions": {\n';var s=0,i=0;try{e.width&&(s=parseInt(e.width.toString(),10)),e.height&&(i=parseInt(e.height.toString(),10))}catch(e){$.writeln("Error getting dimensions: "+e)}a+="    \"width\": "+s+",\n",a+="    \"height\": "+i+"\n",a+="  },\n",a+='  "textLayerCount": '+r.length+",\n",a+='  "textLayers": [\n';for(var l=0;l<r.length;l++){var c=r[l];a+="    {\n",a+='      "id": "",\n',a+='      "name": "'+escapeJsonString(c.name)+'",\n',a+='      "path": "'+escapeJsonString(c.path)+'",\n',a+='      "text": "'+escapeJsonString(c.text)+'",\n',a+='      "updatedText": "'+escapeJsonString(c.text)+'",\n',a+='      "visible": '+(c.visible?"true":"false")+",\n",a+='      "styleInfo": {\n',a+='        "font": "'+escapeJsonString(c.styleInfo.font)+'",\n',a+='        "size": '+c.styleInfo.size+",\n",a+='        "color": null,\n',a+='        "alignment": "left",\n',a+='        "styles": [\n';if(c.styleInfo.styles&&c.styleInfo.styles.length>0)for(var f=0;f<c.styleInfo.styles.length;f++){var d=c.styleInfo.styles[f];a+="          {\n",a+='            "start": '+d.start+",\n",a+='            "end": '+d.end+",\n",a+='            "text": "'+escapeJsonString(d.text)+'",\n',a+='            "font": "'+escapeJsonString(d.font)+'",\n',a+='            "style": "'+escapeJsonString(d.style)+'",\n',a+='            "size": '+d.size,d.color&&d.color.length?(a+=",\n",a+='            "color": ['+d.color.join(", ")+"]\n"):a+="\n",a+="          }"+(f<c.styleInfo.styles.length-1?",\n":"\n")}a+="        ]\n",a+="      },\n",a+='      "hasRichTextFormatting": '+(c.hasRichTextFormatting?"true":"false")+"\n",a+="    }"+(l<r.length-1?",\n":"\n")}a+="  ]\n",a+="}",writeTextFile(o,a);var p="Extracted "+r.length+' text layers from document "'+t+'".\n\n';p+="Text data saved to: "+o.fsName,alert(p)}catch(e){alert("Error: "+e.message)}}main();
"""

def extract_text_from_psd(ps_app, psd_path: Path, output_dir: Path) -> str:
    """
    Opens a PSD file in Photoshop and extracts text layers using the JSX script.

    Args:
        ps_app: The Photoshop application instance
        psd_path: Path to the PSD file
        output_dir: Directory to save the extracted JSON

    Returns:
        Path to the saved JSON file or None if extraction failed
    """
    # Create output filename (same as PSD but with -textonly.json suffix)
    output_filename = f"{psd_path.stem}-textonly.json"
    output_path = output_dir / output_filename

    try:
        # Open the PSD file
        logger.info(f"Opening {psd_path}")
        ps_app.Open(psd_path.as_posix())

        # Modify the script to automatically save to our output path with -textonly.json suffix
        modified_script = EXTRACT_TEXT_SCRIPT.replace(
            'var n=t.replace(/\\.[^\\.]+$/, "-text.json");',
            f'var n=t.replace(/\\.[^\\.]+$/, "-textonly.json");'
        )

        # Also replace the file dialog with direct file saving
        modified_script = modified_script.replace(
            'var o=File.saveDialog("Save text layer data as:",n);if(!o)return;',
            f'var o=new File("{output_path.as_posix().replace("\\", "\\\\")}");'
        )

        # Execute the script
        logger.info(f"Extracting text from {psd_path.name}")
        ps_app.DoJavaScript(modified_script)

        # Wait for file to be created
        timeout = 10  # seconds
        start_time = time.time()
        while not output_path.exists() and time.time() - start_time < timeout:
            time.sleep(0.5)

        if output_path.exists():
            logger.info(f"Successfully saved text to {output_path}")
            return output_path.as_posix()
        else:
            logger.warning(f"Failed to extract text from {psd_path.name} (timeout)")
            return None

    except Exception as e:
        logger.error(f"Error extracting text from {psd_path.name}: {str(e)}")
        return None
    finally:
        # Close the document without saving
        try:
            ps_app.ActiveDocument.Close(2)  # 2 = Don't save changes
        except:
            pass

def batch_extract_text(input_dir: str, output_dir: str, recursive: bool = False) -> List[str]:
    """
    Processes all PSD files in the input directory and extracts text layers.

    Args:
        input_dir: Directory containing PSD files
        output_dir: Directory to save extracted JSON files
        recursive: Whether to search for PSD files in subdirectories

    Returns:
        List of paths to the saved JSON files
    """
    input_path = Path(input_dir).resolve()
    output_path = Path(output_dir).resolve()

    # Create output directory if it doesn't exist
    if not output_path.exists():
        output_path.mkdir(parents=True)

    # Find all PSD files
    pattern = '**/*.psd' if recursive else '*.psd'
    psd_files = list(input_path.glob(pattern))

    if not psd_files:
        logger.warning(f"No PSD files found in {input_path}")
        return []

    logger.info(f"Found {len(psd_files)} PSD files to process")

    # Extract text from each PSD file
    results = []

    with Session() as ps:
        app = ps.app

        for psd_file in psd_files:
            result = extract_text_from_psd(app, psd_file, output_path)
            if result:
                results.append(result)

    logger.info(f"Successfully processed {len(results)} of {len(psd_files)} files")
    return results

def parse_arguments():
    """Parse command line arguments"""
    parser = argparse.ArgumentParser(
        description='Batch extract text from PSD files',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Extract text from all PSD files in the current directory
  python batch_extract_text.py .

  # Extract text from all PSD files in a specific directory
  python batch_extract_text.py /path/to/psd_files

  # Extract text and save JSON files to a different directory
  python batch_extract_text.py /path/to/psd_files -o /path/to/output

  # Extract text from all PSD files including subdirectories
  python batch_extract_text.py /path/to/psd_files -r
        """
    )

    parser.add_argument('input_dir',
                        help='Directory containing PSD files')

    parser.add_argument('--output-dir', '-o', default=None,
                        help='Directory to save extracted JSON files (defaults to input_dir)')

    parser.add_argument('--recursive', '-r', action='store_true',
                        help='Search for PSD files in subdirectories')

    parser.add_argument('--verbose', '-v', action='store_true',
                        help='Enable verbose logging')

    return parser.parse_args()

def main():
    """Main function"""
    args = parse_arguments()

    input_dir = args.input_dir
    output_dir = args.output_dir or input_dir

    # Set logging level based on verbose flag
    if args.verbose:
        logger.setLevel(logging.DEBUG)
        # Set log format to include more details
        for handler in logger.handlers:
            handler.setFormatter(logging.Formatter(
                '%(asctime)s - %(levelname)s - %(message)s',
                '%Y-%m-%d %H:%M:%S'
            ))

    logger.info(f"Processing PSD files from: {input_dir}")
    logger.info(f"Saving extracted text to: {output_dir}")
    logger.info(f"Recursive search: {args.recursive}")

    results = batch_extract_text(input_dir, output_dir, args.recursive)

    if results:
        logger.info(f"Extraction complete. Processed {len(results)} files:")
        for result in results:
            logger.info(f"  - {result}")

        print(f"\nSuccessfully extracted text from {len(results)} PSD files.")
        print(f"JSON files saved to: {output_dir}")
        print("\nNaming convention: [psd_filename]-textonly.json")
    else:
        logger.warning("No text was extracted from any files.")
        print("\nNo PSD files were processed. Check the input directory or enable recursive search with -r.")

if __name__ == "__main__":
    main()