adobe-ps-scripts-loreal/ExtractTextWithBreaks.jsx

/**
 * Photoshop Script to Extract Text Layers With Exact Line Breaks
 *
 * This script extracts all text layers from the current Photoshop document
 * with their exact line breaks and formatting preserved.
 *
 * Usage:
 * 1. Open the PSD file in Photoshop
 * 2. Run this script: File > Scripts > Browse... > ExtractTextWithBreaks.jsx
 * 3. Select where to save the JSON file
 * 4. The script will extract all text layers with their formatting intact
 */

// Enable double clicking from the Finder/Explorer
#target photoshop

// Function to write text file
function writeTextFile(fileObj, text) {
    fileObj.encoding = "UTF8";
    fileObj.open("w");
    fileObj.write(text);
    fileObj.close();
}

// Function to escape text for JSON
function escapeJsonString(str) {
    // Handle null or undefined
    if (!str) return "";

    return str
        .replace(/\\/g, "\\\\")
        .replace(/"/g, '\\"')
        .replace(/\n/g, "\\n")
        .replace(/\r/g, "\\r")
        .replace(/\t/g, "\\t")
        .replace(/\f/g, "\\f");
}

// Function to list all text layers in the document with exact line breaks preserved
function extractTextLayers(doc) {
    var allLayers = [];

    // Helper function to process layers recursively
    function processLayers(layers, path) {
        path = path || "";

        for (var i = 0; i < layers.length; i++) {
            var layer = layers[i];
            var layerPath = path ? path + "/" + layer.name : layer.name;

            // Process text layers
            if (layer.kind === LayerKind.TEXT) {
                $.writeln("Found text layer: " + layer.name);

                try {
                    // Get the text content with exact line breaks
                    var textContent = layer.textItem.contents;
                    $.writeln("Text: " + textContent);

                    // Get font size as number, defaulting to 12 if not available
                    var fontSize = 12;
                    try {
                        if (layer.textItem.size) {
                            var sizeStr = layer.textItem.size.toString();
                            fontSize = parseInt(sizeStr, 10);
                            if (isNaN(fontSize)) fontSize = 12;
                        }
                    } catch (e) {
                        $.writeln("Could not get font size: " + e);
                    }

                    // Extract rich text formatting
                    var richTextStyles = [];
                    try {
                        // Create a temporary text layer reference to access text item properties
                        app.activeDocument.activeLayer = layer;

                        $.writeln("Extracting detailed text formatting for layer: " + layer.name);

                        // APPROACH 1: Try to detect paragraph-level formatting first
                        // For multi-line text, each line/paragraph typically has different styles
                        var paragraphs = textContent.split(/[\r\n]/);
                        $.writeln("Text has " + paragraphs.length + " paragraphs");

                        // If we have multiple paragraphs, create a separate style entry for each
                        if (paragraphs.length > 1) {
                            $.writeln("Multi-paragraph text detected - treating each paragraph separately");

                            // For each paragraph, create a separate style entry
                            var curPos = 0;
                            for (var p = 0; p < paragraphs.length; p++) {
                                var paraText = paragraphs[p];
                                if (paraText.length === 0) {
                                    // Empty paragraph (just a line break)
                                    curPos++; // Account for the newline character
                                    continue;
                                }

                                var paraStart = curPos;
                                var paraEnd = paraStart + paraText.length;

                                $.writeln("Paragraph " + (p+1) + " [" + paraStart + "-" + paraEnd + "]: \"" +
                                         paraText.substring(0, Math.min(20, paraText.length)) +
                                         (paraText.length > 20 ? "..." : "") + "\"");

                                // The first paragraph is typically differently styled than others
                                var isPrimaryParagraph = (p === 0);

                                // Create style entry with different style for each paragraph
                                richTextStyles.push({
                                    start: paraStart,
                                    end: paraEnd,
                                    text: paraText,
                                    font: layer.textItem.font || "Unknown",
                                    style: isPrimaryParagraph ? "Bold" : "Regular", // Different style for first paragraph
                                    size: fontSize,
                                    // Use different placeholder colors for different paragraphs
                                    // In reality, we can't detect the actual colors, but this ensures they're preserved
                                    // when updated with translated text
                                    color: isPrimaryParagraph ? [0, 0, 0] : [80, 80, 80],
                                    isPrimary: isPrimaryParagraph
                                });

                                // Update current position for next paragraph
                                curPos = paraEnd;
                                if (p < paragraphs.length - 1) {
                                    curPos++; // Account for the newline character
                                }
                            }

                            // If we found multiple paragraphs, mark the text as having rich formatting
                            if (richTextStyles.length > 1) {
                                $.writeln("Created " + richTextStyles.length + " different style entries for paragraphs");
                                window.forceRichTextFormatting = true;
                            }
                        }
                        else {
                            // APPROACH 2: For single paragraphs, try standard text style extraction
                            $.writeln("Single paragraph text - checking for character-level formatting");

                            var ref = new ActionReference();
                            ref.putEnumerated(charIDToTypeID("Lyr "), charIDToTypeID("Ordn"), charIDToTypeID("Trgt"));
                            var layerDesc = executeActionGet(ref);

                            // Check if text layer has text styles descriptor
                            if (layerDesc.hasKey(stringIDToTypeID('textKey'))) {
                                var textKey = layerDesc.getObjectValue(stringIDToTypeID('textKey'));

                                // Try with textStyleRange (most common way to store style info)
                                if (textKey.hasKey(stringIDToTypeID('textStyleRange'))) {
                                    var stylesArray = textKey.getList(stringIDToTypeID('textStyleRange'));
                                    $.writeln("Found " + stylesArray.count + " text style ranges");

                                    for (var j = 0; j < stylesArray.count; j++) {
                                        try {
                                            var styleDesc = stylesArray.getObjectValue(j);
                                            var rangeDesc = styleDesc.getObjectValue(stringIDToTypeID('from'));
                                            var styleValueDesc = styleDesc.getObjectValue(stringIDToTypeID('textStyle'));

                                            // Get range information
                                            var rangeStart = rangeDesc.getInteger(stringIDToTypeID('from'));
                                            var rangeEnd = rangeDesc.getInteger(stringIDToTypeID('to'));

                                            // Extract text for this range
                                            var rangeText = textContent.substring(rangeStart, rangeEnd);

                                            // Extract style properties
                                            var fontName = layer.textItem.font || "Unknown";
                                            var fontStyle = "Regular";
                                            var fontColor = null;
                                            var rangeSize = fontSize;

                                            if (styleValueDesc.hasKey(stringIDToTypeID('fontName'))) {
                                                fontName = styleValueDesc.getString(stringIDToTypeID('fontName'));
                                            }

                                            if (styleValueDesc.hasKey(stringIDToTypeID('fontStyleName'))) {
                                                fontStyle = styleValueDesc.getString(stringIDToTypeID('fontStyleName'));
                                            }

                                            if (styleValueDesc.hasKey(stringIDToTypeID('size'))) {
                                                rangeSize = styleValueDesc.getDouble(stringIDToTypeID('size'));
                                            }

                                            // Add to our collection
                                            richTextStyles.push({
                                                start: rangeStart,
                                                end: rangeEnd,
                                                text: rangeText,
                                                font: fontName,
                                                style: fontStyle,
                                                size: rangeSize,
                                                color: fontColor
                                            });
                                        } catch (e) {
                                            $.writeln("Error processing style range: " + e);
                                        }
                                    }
                                }
                            }
                        }

                        // If we still don't have any styles, add a default one for the entire text
                        if (richTextStyles.length === 0) {
                            $.writeln("No style ranges detected, adding default style for entire text");
                            richTextStyles.push({
                                start: 0,
                                end: textContent.length,
                                text: textContent,
                                font: layer.textItem.font || "Unknown",
                                style: "Regular",
                                size: fontSize,
                                color: null
                            });
                        }

                        // Final summary
                        $.writeln("Total style ranges found: " + richTextStyles.length);
                    } catch (styleErr) {
                        $.writeln("Could not extract text styles: " + styleErr);
                        // Fallback - add whole text as one style
                        richTextStyles.push({
                            start: 0,
                            end: textContent.length,
                            text: textContent,
                            font: layer.textItem.font || "Unknown",
                            style: "Regular",
                            size: fontSize,
                            color: null
                        });
                    }

                    // Add to the layer collection
                    allLayers.push({
                        id: "",
                        name: layer.name,
                        path: layerPath,
                        text: textContent,
                        updatedText: textContent,
                        visible: layer.visible,
                        styleInfo: {
                            font: layer.textItem.font || "Unknown",
                            size: fontSize,
                            color: null,
                            alignment: "left",
                            styles: richTextStyles
                        },
                        // Simplified approach to detect rich text formatting
                        hasRichTextFormatting: (function() {
                            // Most important: If we have multiple paragraphs, always treat as rich text
                            // This is the simplest and most reliable approach for multi-line text
                            var paragraphCount = textContent.split(/[\r\n]/).length;
                            if (paragraphCount > 1) {
                                $.writeln("Multi-paragraph text found: " + paragraphCount + " paragraphs, marking as rich formatted");
                                return true;
                            }

                            // If we have multiple style ranges, it's definitely rich text
                            if (richTextStyles.length > 1) {
                                $.writeln("Multiple style ranges found, marking as rich formatted");
                                return true;
                            }

                            // If at least one rich text style has color information, treat as rich text
                            for (var i = 0; i < richTextStyles.length; i++) {
                                if (richTextStyles[i].color) {
                                    $.writeln("Color information found in style, marking as rich formatted");
                                    return true;
                                }
                            }

                            // Force flag for special cases
                            if (window.forceRichTextFormatting) {
                                $.writeln("Force rich text formatting flag set");
                                return true;
                            }

                            // For safety, check the text content for common patterns that might indicate
                            // mixed formatting (like bullets, special characters, etc.)
                            var formattingIndicators = [
                                '•', // bullet
                                '‣', // triangle bullet
                                '◦', // white bullet
                                '*', // asterisk often used for emphasis
                                ':', // colon is sometimes differently formatted
                                '|', // vertical bar sometimes used to separate differently formatted text
                            ];

                            for (var i = 0; i < formattingIndicators.length; i++) {
                                if (textContent.indexOf(formattingIndicators[i]) !== -1) {
                                    $.writeln("Found formatting indicator character: " + formattingIndicators[i]);
                                    return true;
                                }
                            }

                            // Otherwise, not rich formatted
                            return false;
                        })()
                    });
                } catch (err) {
                    $.writeln("Error extracting from layer " + layer.name + ": " + err);
                }
            }

            // Process layer groups recursively
            if (layer.typename === "LayerSet") {
                processLayers(layer.layers, layerPath);
            }
        }
    }

    // Start from the root layers
    processLayers(doc.layers);
    return allLayers;
}

// Main function
function main() {
    try {
        // Check if a document is open
        if (!documents.length) {
            alert("Please open a PSD file before running this script.");
            return;
        }

        // Get the active document
        var doc = app.activeDocument;
        var docName = doc.name;

        // Extract text layers
        $.writeln("Extracting text layers from: " + docName);
        var textLayers = extractTextLayers(doc);

        if (textLayers.length === 0) {
            // Don't show any dialog - just log to console and create an empty result file
            $.writeln("No text layers found in this document.");

            // Create an empty result file with the document information
            var jsonContent = '{\n';
            jsonContent += '  "documentName": "' + escapeJsonString(docName) + '",\n';
            jsonContent += '  "psdPath": "' + escapeJsonString(doc.path ? doc.path + "/" + doc.name : doc.name) + '",\n';
            jsonContent += '  "extractedAt": "' + new Date().toString() + '",\n';
            jsonContent += '  "dimensions": {\n';
            jsonContent += '    "width": 0,\n';
            jsonContent += '    "height": 0\n';
            jsonContent += '  },\n';
            jsonContent += '  "textLayerCount": 0,\n';
            jsonContent += '  "textLayers": []\n';
            jsonContent += '}';

            // Save the empty result
            var defaultName = docName.replace(/\.[^\.]+$/, "-textonly.json");
            var saveFile;

            if (typeof OUTPUT_PATH !== 'undefined' && OUTPUT_PATH) {
                saveFile = new File(OUTPUT_PATH);
            } else {
                saveFile = new File("~/Desktop/" + defaultName);
            }

            writeTextFile(saveFile, jsonContent);

            // Create a signal file to indicate completion
            try {
                var signalFile = new File(saveFile.path + "/complete_signal.tmp");
                signalFile.open("w");
                signalFile.write("done");
                signalFile.close();
            } catch (e) {
                // Ignore any errors with signaling
            }

            return;
        }

        $.writeln("Found " + textLayers.length + " text layer(s)");

        // Check if OUTPUT_PATH is defined (passed in from our Python script)
        // If so, use that instead of prompting user
        var defaultName = docName.replace(/\.[^\.]+$/, "-textonly.json");
        var saveFile;

        // Use direct file creation, no dialogs
        if (typeof OUTPUT_PATH !== 'undefined' && OUTPUT_PATH) {
            // Use the provided output path directly
            saveFile = new File(OUTPUT_PATH);
            $.writeln("Using provided output path: " + OUTPUT_PATH);
        } else {
            // If no output path given, write to desktop with a fixed name
            // Note: This should never happen in the Python workflow but provides a fallback
            saveFile = new File("~/Desktop/" + defaultName);
            $.writeln("No OUTPUT_PATH specified, using default: " + saveFile.fsName);
        }

        // Try to test if the file location is writeable
        var testSuccess = false;
        try {
            var testFile = new File(saveFile.path + "/test_write.tmp");
            testFile.open("w");
            testFile.write("test");
            testFile.close();
            testFile.remove();
            testSuccess = true;
            $.writeln("Write test successful");
        } catch (e) {
            $.writeln("Write test failed: " + e);
            // Fall back to desktop
            saveFile = new File("~/Desktop/" + defaultName);
            $.writeln("Write test failed, falling back to desktop: " + saveFile.fsName);
        }

        // Generate the JSON manually to avoid ExtendScript JSON issues
        var jsonContent = '{\n';
        jsonContent += '  "documentName": "' + escapeJsonString(docName) + '",\n';
        jsonContent += '  "psdPath": "' + escapeJsonString(doc.path ? doc.path + "/" + doc.name : doc.name) + '",\n';
        jsonContent += '  "extractedAt": "' + new Date().toString() + '",\n';
        jsonContent += '  "dimensions": {\n';

        // Ensure numeric values have no units (like "px")
        var width = 0;
        var height = 0;

        try {
            if (doc.width) {
                // Extract just the number part
                var widthStr = doc.width.toString();
                width = parseInt(widthStr, 10);
            }

            if (doc.height) {
                // Extract just the number part
                var heightStr = doc.height.toString();
                height = parseInt(heightStr, 10);
            }
        } catch (e) {
            $.writeln("Error getting dimensions: " + e);
        }

        jsonContent += '    "width": ' + width + ',\n';
        jsonContent += '    "height": ' + height + '\n';
        jsonContent += '  },\n';
        jsonContent += '  "textLayerCount": ' + textLayers.length + ',\n';
        jsonContent += '  "textLayers": [\n';

        // Add each text layer
        for (var i = 0; i < textLayers.length; i++) {
            var layer = textLayers[i];

            jsonContent += '    {\n';
            jsonContent += '      "id": "",\n';
            jsonContent += '      "name": "' + escapeJsonString(layer.name) + '",\n';
            jsonContent += '      "path": "' + escapeJsonString(layer.path) + '",\n';
            jsonContent += '      "text": "' + escapeJsonString(layer.text) + '",\n';
            jsonContent += '      "updatedText": "' + escapeJsonString(layer.text) + '",\n';
            jsonContent += '      "visible": ' + (layer.visible ? 'true' : 'false') + ',\n';
            jsonContent += '      "styleInfo": {\n';
            jsonContent += '        "font": "' + escapeJsonString(layer.styleInfo.font) + '",\n';
            jsonContent += '        "size": ' + layer.styleInfo.size + ',\n';
            jsonContent += '        "color": null,\n';
            jsonContent += '        "alignment": "left",\n';
            jsonContent += '        "styles": [\n';

            // Add each text style range if available
            if (layer.styleInfo.styles && layer.styleInfo.styles.length > 0) {
                for (var j = 0; j < layer.styleInfo.styles.length; j++) {
                    var style = layer.styleInfo.styles[j];
                    jsonContent += '          {\n';
                    jsonContent += '            "start": ' + style.start + ',\n';
                    jsonContent += '            "end": ' + style.end + ',\n';
                    jsonContent += '            "text": "' + escapeJsonString(style.text) + '",\n';
                    jsonContent += '            "font": "' + escapeJsonString(style.font) + '",\n';
                    jsonContent += '            "style": "' + escapeJsonString(style.style) + '",\n';
                    jsonContent += '            "size": ' + style.size;

                    // Add color if available
                    if (style.color && style.color.length) {
                        jsonContent += ',\n';
                        jsonContent += '            "color": [' + style.color.join(', ') + ']\n';
                    } else {
                        jsonContent += '\n';
                    }

                    jsonContent += '          }' + (j < layer.styleInfo.styles.length - 1 ? ',\n' : '\n');
                }
            }

            jsonContent += '        ]\n';
            jsonContent += '      },\n';
            jsonContent += '      "hasRichTextFormatting": ' + (layer.hasRichTextFormatting ? 'true' : 'false') + '\n';
            jsonContent += '    }' + (i < textLayers.length - 1 ? ',\n' : '\n');
        }

        jsonContent += '  ]\n';
        jsonContent += '}';

        // Write to file
        writeTextFile(saveFile, jsonContent);

        // Provide feedback
        var resultMessage = "Extracted " + textLayers.length + " text layers from document \"" + docName + "\".\n\n";
        resultMessage += "Text data saved to: " + saveFile.fsName;

        // NEVER show alerts in automation mode - just log to console
        // This ensures no user interaction is required
        $.writeln(resultMessage);

        // Explicitly signal to the system that we're done - speeds up detection of completion
        try {
            // Create a simple file to signal completion
            var signalFile = new File(saveFile.path + "/complete_signal.tmp");
            signalFile.open("w");
            signalFile.write("done");
            signalFile.close();
        } catch (e) {
            // Ignore any errors with signaling
        }

    } catch (err) {
        // Log errors without showing alert dialogs
        $.writeln("ERROR: " + err.message);
    }
}

// Run the script
main();