adobe-ps-scripts-loreal/ExtractTextWithBreaks.jsx
DJP 4a192a8c97 Initial commit: Adobe Photoshop API text management scripts
Local and cloud-based workflows for extracting and updating
text layers in PSD files via ExtendScript and Adobe PS API.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 13:46:52 -05:00

525 lines
No EOL
26 KiB
JavaScript

/**
* Photoshop Script to Extract Text Layers With Exact Line Breaks
*
* This script extracts all text layers from the current Photoshop document
* with their exact line breaks and formatting preserved.
*
* Usage:
* 1. Open the PSD file in Photoshop
* 2. Run this script: File > Scripts > Browse... > ExtractTextWithBreaks.jsx
* 3. Select where to save the JSON file
* 4. The script will extract all text layers with their formatting intact
*/
// Enable double clicking from the Finder/Explorer
#target photoshop
// Function to write text file
function writeTextFile(fileObj, text) {
fileObj.encoding = "UTF8";
fileObj.open("w");
fileObj.write(text);
fileObj.close();
}
// Function to escape text for JSON
function escapeJsonString(str) {
// Handle null or undefined
if (!str) return "";
return str
.replace(/\\/g, "\\\\")
.replace(/"/g, '\\"')
.replace(/\n/g, "\\n")
.replace(/\r/g, "\\r")
.replace(/\t/g, "\\t")
.replace(/\f/g, "\\f");
}
// Function to list all text layers in the document with exact line breaks preserved
function extractTextLayers(doc) {
var allLayers = [];
// Helper function to process layers recursively
function processLayers(layers, path) {
path = path || "";
for (var i = 0; i < layers.length; i++) {
var layer = layers[i];
var layerPath = path ? path + "/" + layer.name : layer.name;
// Process text layers
if (layer.kind === LayerKind.TEXT) {
$.writeln("Found text layer: " + layer.name);
try {
// Get the text content with exact line breaks
var textContent = layer.textItem.contents;
$.writeln("Text: " + textContent);
// Get font size as number, defaulting to 12 if not available
var fontSize = 12;
try {
if (layer.textItem.size) {
var sizeStr = layer.textItem.size.toString();
fontSize = parseInt(sizeStr, 10);
if (isNaN(fontSize)) fontSize = 12;
}
} catch (e) {
$.writeln("Could not get font size: " + e);
}
// Extract rich text formatting
var richTextStyles = [];
try {
// Create a temporary text layer reference to access text item properties
app.activeDocument.activeLayer = layer;
$.writeln("Extracting detailed text formatting for layer: " + layer.name);
// APPROACH 1: Try to detect paragraph-level formatting first
// For multi-line text, each line/paragraph typically has different styles
var paragraphs = textContent.split(/[\r\n]/);
$.writeln("Text has " + paragraphs.length + " paragraphs");
// If we have multiple paragraphs, create a separate style entry for each
if (paragraphs.length > 1) {
$.writeln("Multi-paragraph text detected - treating each paragraph separately");
// For each paragraph, create a separate style entry
var curPos = 0;
for (var p = 0; p < paragraphs.length; p++) {
var paraText = paragraphs[p];
if (paraText.length === 0) {
// Empty paragraph (just a line break)
curPos++; // Account for the newline character
continue;
}
var paraStart = curPos;
var paraEnd = paraStart + paraText.length;
$.writeln("Paragraph " + (p+1) + " [" + paraStart + "-" + paraEnd + "]: \"" +
paraText.substring(0, Math.min(20, paraText.length)) +
(paraText.length > 20 ? "..." : "") + "\"");
// The first paragraph is typically differently styled than others
var isPrimaryParagraph = (p === 0);
// Create style entry with different style for each paragraph
richTextStyles.push({
start: paraStart,
end: paraEnd,
text: paraText,
font: layer.textItem.font || "Unknown",
style: isPrimaryParagraph ? "Bold" : "Regular", // Different style for first paragraph
size: fontSize,
// Use different placeholder colors for different paragraphs
// In reality, we can't detect the actual colors, but this ensures they're preserved
// when updated with translated text
color: isPrimaryParagraph ? [0, 0, 0] : [80, 80, 80],
isPrimary: isPrimaryParagraph
});
// Update current position for next paragraph
curPos = paraEnd;
if (p < paragraphs.length - 1) {
curPos++; // Account for the newline character
}
}
// If we found multiple paragraphs, mark the text as having rich formatting
if (richTextStyles.length > 1) {
$.writeln("Created " + richTextStyles.length + " different style entries for paragraphs");
window.forceRichTextFormatting = true;
}
}
else {
// APPROACH 2: For single paragraphs, try standard text style extraction
$.writeln("Single paragraph text - checking for character-level formatting");
var ref = new ActionReference();
ref.putEnumerated(charIDToTypeID("Lyr "), charIDToTypeID("Ordn"), charIDToTypeID("Trgt"));
var layerDesc = executeActionGet(ref);
// Check if text layer has text styles descriptor
if (layerDesc.hasKey(stringIDToTypeID('textKey'))) {
var textKey = layerDesc.getObjectValue(stringIDToTypeID('textKey'));
// Try with textStyleRange (most common way to store style info)
if (textKey.hasKey(stringIDToTypeID('textStyleRange'))) {
var stylesArray = textKey.getList(stringIDToTypeID('textStyleRange'));
$.writeln("Found " + stylesArray.count + " text style ranges");
for (var j = 0; j < stylesArray.count; j++) {
try {
var styleDesc = stylesArray.getObjectValue(j);
var rangeDesc = styleDesc.getObjectValue(stringIDToTypeID('from'));
var styleValueDesc = styleDesc.getObjectValue(stringIDToTypeID('textStyle'));
// Get range information
var rangeStart = rangeDesc.getInteger(stringIDToTypeID('from'));
var rangeEnd = rangeDesc.getInteger(stringIDToTypeID('to'));
// Extract text for this range
var rangeText = textContent.substring(rangeStart, rangeEnd);
// Extract style properties
var fontName = layer.textItem.font || "Unknown";
var fontStyle = "Regular";
var fontColor = null;
var rangeSize = fontSize;
if (styleValueDesc.hasKey(stringIDToTypeID('fontName'))) {
fontName = styleValueDesc.getString(stringIDToTypeID('fontName'));
}
if (styleValueDesc.hasKey(stringIDToTypeID('fontStyleName'))) {
fontStyle = styleValueDesc.getString(stringIDToTypeID('fontStyleName'));
}
if (styleValueDesc.hasKey(stringIDToTypeID('size'))) {
rangeSize = styleValueDesc.getDouble(stringIDToTypeID('size'));
}
// Add to our collection
richTextStyles.push({
start: rangeStart,
end: rangeEnd,
text: rangeText,
font: fontName,
style: fontStyle,
size: rangeSize,
color: fontColor
});
} catch (e) {
$.writeln("Error processing style range: " + e);
}
}
}
}
}
// If we still don't have any styles, add a default one for the entire text
if (richTextStyles.length === 0) {
$.writeln("No style ranges detected, adding default style for entire text");
richTextStyles.push({
start: 0,
end: textContent.length,
text: textContent,
font: layer.textItem.font || "Unknown",
style: "Regular",
size: fontSize,
color: null
});
}
// Final summary
$.writeln("Total style ranges found: " + richTextStyles.length);
} catch (styleErr) {
$.writeln("Could not extract text styles: " + styleErr);
// Fallback - add whole text as one style
richTextStyles.push({
start: 0,
end: textContent.length,
text: textContent,
font: layer.textItem.font || "Unknown",
style: "Regular",
size: fontSize,
color: null
});
}
// Add to the layer collection
allLayers.push({
id: "",
name: layer.name,
path: layerPath,
text: textContent,
updatedText: textContent,
visible: layer.visible,
styleInfo: {
font: layer.textItem.font || "Unknown",
size: fontSize,
color: null,
alignment: "left",
styles: richTextStyles
},
// Simplified approach to detect rich text formatting
hasRichTextFormatting: (function() {
// Most important: If we have multiple paragraphs, always treat as rich text
// This is the simplest and most reliable approach for multi-line text
var paragraphCount = textContent.split(/[\r\n]/).length;
if (paragraphCount > 1) {
$.writeln("Multi-paragraph text found: " + paragraphCount + " paragraphs, marking as rich formatted");
return true;
}
// If we have multiple style ranges, it's definitely rich text
if (richTextStyles.length > 1) {
$.writeln("Multiple style ranges found, marking as rich formatted");
return true;
}
// If at least one rich text style has color information, treat as rich text
for (var i = 0; i < richTextStyles.length; i++) {
if (richTextStyles[i].color) {
$.writeln("Color information found in style, marking as rich formatted");
return true;
}
}
// Force flag for special cases
if (window.forceRichTextFormatting) {
$.writeln("Force rich text formatting flag set");
return true;
}
// For safety, check the text content for common patterns that might indicate
// mixed formatting (like bullets, special characters, etc.)
var formattingIndicators = [
'•', // bullet
'‣', // triangle bullet
'◦', // white bullet
'*', // asterisk often used for emphasis
':', // colon is sometimes differently formatted
'|', // vertical bar sometimes used to separate differently formatted text
];
for (var i = 0; i < formattingIndicators.length; i++) {
if (textContent.indexOf(formattingIndicators[i]) !== -1) {
$.writeln("Found formatting indicator character: " + formattingIndicators[i]);
return true;
}
}
// Otherwise, not rich formatted
return false;
})()
});
} catch (err) {
$.writeln("Error extracting from layer " + layer.name + ": " + err);
}
}
// Process layer groups recursively
if (layer.typename === "LayerSet") {
processLayers(layer.layers, layerPath);
}
}
}
// Start from the root layers
processLayers(doc.layers);
return allLayers;
}
// Main function
function main() {
try {
// Check if a document is open
if (!documents.length) {
alert("Please open a PSD file before running this script.");
return;
}
// Get the active document
var doc = app.activeDocument;
var docName = doc.name;
// Extract text layers
$.writeln("Extracting text layers from: " + docName);
var textLayers = extractTextLayers(doc);
if (textLayers.length === 0) {
// Don't show any dialog - just log to console and create an empty result file
$.writeln("No text layers found in this document.");
// Create an empty result file with the document information
var jsonContent = '{\n';
jsonContent += ' "documentName": "' + escapeJsonString(docName) + '",\n';
jsonContent += ' "psdPath": "' + escapeJsonString(doc.path ? doc.path + "/" + doc.name : doc.name) + '",\n';
jsonContent += ' "extractedAt": "' + new Date().toString() + '",\n';
jsonContent += ' "dimensions": {\n';
jsonContent += ' "width": 0,\n';
jsonContent += ' "height": 0\n';
jsonContent += ' },\n';
jsonContent += ' "textLayerCount": 0,\n';
jsonContent += ' "textLayers": []\n';
jsonContent += '}';
// Save the empty result
var defaultName = docName.replace(/\.[^\.]+$/, "-textonly.json");
var saveFile;
if (typeof OUTPUT_PATH !== 'undefined' && OUTPUT_PATH) {
saveFile = new File(OUTPUT_PATH);
} else {
saveFile = new File("~/Desktop/" + defaultName);
}
writeTextFile(saveFile, jsonContent);
// Create a signal file to indicate completion
try {
var signalFile = new File(saveFile.path + "/complete_signal.tmp");
signalFile.open("w");
signalFile.write("done");
signalFile.close();
} catch (e) {
// Ignore any errors with signaling
}
return;
}
$.writeln("Found " + textLayers.length + " text layer(s)");
// Check if OUTPUT_PATH is defined (passed in from our Python script)
// If so, use that instead of prompting user
var defaultName = docName.replace(/\.[^\.]+$/, "-textonly.json");
var saveFile;
// Use direct file creation, no dialogs
if (typeof OUTPUT_PATH !== 'undefined' && OUTPUT_PATH) {
// Use the provided output path directly
saveFile = new File(OUTPUT_PATH);
$.writeln("Using provided output path: " + OUTPUT_PATH);
} else {
// If no output path given, write to desktop with a fixed name
// Note: This should never happen in the Python workflow but provides a fallback
saveFile = new File("~/Desktop/" + defaultName);
$.writeln("No OUTPUT_PATH specified, using default: " + saveFile.fsName);
}
// Try to test if the file location is writeable
var testSuccess = false;
try {
var testFile = new File(saveFile.path + "/test_write.tmp");
testFile.open("w");
testFile.write("test");
testFile.close();
testFile.remove();
testSuccess = true;
$.writeln("Write test successful");
} catch (e) {
$.writeln("Write test failed: " + e);
// Fall back to desktop
saveFile = new File("~/Desktop/" + defaultName);
$.writeln("Write test failed, falling back to desktop: " + saveFile.fsName);
}
// Generate the JSON manually to avoid ExtendScript JSON issues
var jsonContent = '{\n';
jsonContent += ' "documentName": "' + escapeJsonString(docName) + '",\n';
jsonContent += ' "psdPath": "' + escapeJsonString(doc.path ? doc.path + "/" + doc.name : doc.name) + '",\n';
jsonContent += ' "extractedAt": "' + new Date().toString() + '",\n';
jsonContent += ' "dimensions": {\n';
// Ensure numeric values have no units (like "px")
var width = 0;
var height = 0;
try {
if (doc.width) {
// Extract just the number part
var widthStr = doc.width.toString();
width = parseInt(widthStr, 10);
}
if (doc.height) {
// Extract just the number part
var heightStr = doc.height.toString();
height = parseInt(heightStr, 10);
}
} catch (e) {
$.writeln("Error getting dimensions: " + e);
}
jsonContent += ' "width": ' + width + ',\n';
jsonContent += ' "height": ' + height + '\n';
jsonContent += ' },\n';
jsonContent += ' "textLayerCount": ' + textLayers.length + ',\n';
jsonContent += ' "textLayers": [\n';
// Add each text layer
for (var i = 0; i < textLayers.length; i++) {
var layer = textLayers[i];
jsonContent += ' {\n';
jsonContent += ' "id": "",\n';
jsonContent += ' "name": "' + escapeJsonString(layer.name) + '",\n';
jsonContent += ' "path": "' + escapeJsonString(layer.path) + '",\n';
jsonContent += ' "text": "' + escapeJsonString(layer.text) + '",\n';
jsonContent += ' "updatedText": "' + escapeJsonString(layer.text) + '",\n';
jsonContent += ' "visible": ' + (layer.visible ? 'true' : 'false') + ',\n';
jsonContent += ' "styleInfo": {\n';
jsonContent += ' "font": "' + escapeJsonString(layer.styleInfo.font) + '",\n';
jsonContent += ' "size": ' + layer.styleInfo.size + ',\n';
jsonContent += ' "color": null,\n';
jsonContent += ' "alignment": "left",\n';
jsonContent += ' "styles": [\n';
// Add each text style range if available
if (layer.styleInfo.styles && layer.styleInfo.styles.length > 0) {
for (var j = 0; j < layer.styleInfo.styles.length; j++) {
var style = layer.styleInfo.styles[j];
jsonContent += ' {\n';
jsonContent += ' "start": ' + style.start + ',\n';
jsonContent += ' "end": ' + style.end + ',\n';
jsonContent += ' "text": "' + escapeJsonString(style.text) + '",\n';
jsonContent += ' "font": "' + escapeJsonString(style.font) + '",\n';
jsonContent += ' "style": "' + escapeJsonString(style.style) + '",\n';
jsonContent += ' "size": ' + style.size;
// Add color if available
if (style.color && style.color.length) {
jsonContent += ',\n';
jsonContent += ' "color": [' + style.color.join(', ') + ']\n';
} else {
jsonContent += '\n';
}
jsonContent += ' }' + (j < layer.styleInfo.styles.length - 1 ? ',\n' : '\n');
}
}
jsonContent += ' ]\n';
jsonContent += ' },\n';
jsonContent += ' "hasRichTextFormatting": ' + (layer.hasRichTextFormatting ? 'true' : 'false') + '\n';
jsonContent += ' }' + (i < textLayers.length - 1 ? ',\n' : '\n');
}
jsonContent += ' ]\n';
jsonContent += '}';
// Write to file
writeTextFile(saveFile, jsonContent);
// Provide feedback
var resultMessage = "Extracted " + textLayers.length + " text layers from document \"" + docName + "\".\n\n";
resultMessage += "Text data saved to: " + saveFile.fsName;
// NEVER show alerts in automation mode - just log to console
// This ensures no user interaction is required
$.writeln(resultMessage);
// Explicitly signal to the system that we're done - speeds up detection of completion
try {
// Create a simple file to signal completion
var signalFile = new File(saveFile.path + "/complete_signal.tmp");
signalFile.open("w");
signalFile.write("done");
signalFile.close();
} catch (e) {
// Ignore any errors with signaling
}
} catch (err) {
// Log errors without showing alert dialogs
$.writeln("ERROR: " + err.message);
}
}
// Run the script
main();