From 508edf603c911454c056f18b3ea2b1015c55c410 Mon Sep 17 00:00:00 2001 From: waleed Date: Thu, 7 May 2026 18:30:25 -0700 Subject: [PATCH 01/16] improvement(sandbox): upgrade pptx/docx/pdf bootstrap with image helpers, MIME guards, and 256 MB isolate limit --- apps/sim/lib/execution/isolated-vm-worker.cjs | 4 +- apps/sim/sandbox-tasks/docx-generate.ts | 49 ++++++++++++++- apps/sim/sandbox-tasks/pdf-generate.ts | 61 +++++++++++++++++-- apps/sim/sandbox-tasks/pptx-generate.ts | 44 ++++++++++++- 4 files changed, 149 insertions(+), 9 deletions(-) diff --git a/apps/sim/lib/execution/isolated-vm-worker.cjs b/apps/sim/lib/execution/isolated-vm-worker.cjs index 0bca38e4c2b..73d244de75f 100644 --- a/apps/sim/lib/execution/isolated-vm-worker.cjs +++ b/apps/sim/lib/execution/isolated-vm-worker.cjs @@ -183,7 +183,7 @@ async function executeCode(request, executionId) { const externalCopies = [] try { - isolate = new ivm.Isolate({ memoryLimit: 128 }) + isolate = new ivm.Isolate({ memoryLimit: 256 }) if (executionId !== undefined) activeIsolates.set(executionId, isolate) context = await isolate.createContext() const jail = context.global @@ -511,7 +511,7 @@ async function executeTask(request, executionId) { let tPhase = tStart try { - isolate = new ivm.Isolate({ memoryLimit: 128 }) + isolate = new ivm.Isolate({ memoryLimit: 256 }) if (executionId !== undefined) activeIsolates.set(executionId, isolate) context = await isolate.createContext() const jail = context.global diff --git a/apps/sim/sandbox-tasks/docx-generate.ts b/apps/sim/sandbox-tasks/docx-generate.ts index 04efa68abeb..98f2fb70100 100644 --- a/apps/sim/sandbox-tasks/docx-generate.ts +++ b/apps/sim/sandbox-tasks/docx-generate.ts @@ -15,10 +15,57 @@ export const docxGenerateTask = defineSandboxTask({ globalThis.addSection = (section) => { globalThis.__docxSections.push(section); }; - globalThis.getFileBase64 = async (fileId) => { + + // Page geometry constants (twips, 1 twip = 1/1440 inch) for US Letter + globalThis.PAGE_W = 12240; // 8.5" + globalThis.PAGE_H = 15840; // 11" + globalThis.MARGIN = 1440; // 1" margins + globalThis.CONTENT_W = 9360; // PAGE_W - 2 * MARGIN + + // 6 MB raw ≈ 8 MB base64; reject above this to avoid sandbox OOM. + const _MAX_IMG_B64 = 8 * 1024 * 1024; + + /** + * getFileBase64(fileId) — load a workspace file as a full data URI string. + * Returns the complete "data:image/png;base64,..." string. + * Use addImage() rather than passing this directly to ImageRun. + */ + globalThis.getFileBase64 = async function getFileBase64(fileId) { + if (!fileId || typeof fileId !== 'string') { + throw new Error('getFileBase64: fileId must be a non-empty string'); + } const res = await globalThis.__brokers.workspaceFile({ fileId }); + if (!res || !res.dataUri) { + throw new Error('getFileBase64: broker returned no data for file ' + fileId); + } + if (res.dataUri.length > _MAX_IMG_B64) { + throw new Error( + 'getFileBase64: image exceeds the 6 MB embed limit (~8 MB base64). Use a smaller/compressed image.' + ); + } return res.dataUri; }; + + /** + * addImage(fileId, opts) — fetch a workspace file and return a docx.ImageRun. + * Required opts: width, height (pixels or EMUs via transformation option). + * Example: + * new docx.Paragraph({ children: [await addImage('abc123', { width: 200, height: 100 })] }) + */ + globalThis.addImage = async function addImage(fileId, opts) { + const dataUri = await globalThis.getFileBase64(fileId); + const comma = dataUri.indexOf(','); + const header = comma !== -1 ? dataUri.slice(0, comma) : ''; + const base64 = comma !== -1 ? dataUri.slice(comma + 1) : dataUri; + const mime = header.split(';')[0].replace('data:', '') || 'image/png'; + const ext = mime.includes('png') ? 'png' : mime.includes('gif') ? 'gif' : mime.includes('bmp') ? 'bmp' : 'jpg'; + if (!globalThis.Buffer) throw new Error('addImage: Buffer polyfill missing — ensure docx bundle is loaded'); + return new globalThis.docx.ImageRun(Object.assign({ + data: globalThis.Buffer.from(base64, 'base64'), + transformation: { width: (opts && opts.width) || 200, height: (opts && opts.height) || 200 }, + type: ext, + }, opts || {})); + }; `, // JSZip's browser build doesn't support nodebuffer output, so we go through // base64 and decode back to bytes inside the isolate (avoids DataURL / Blob). diff --git a/apps/sim/sandbox-tasks/pdf-generate.ts b/apps/sim/sandbox-tasks/pdf-generate.ts index 4a6cbc26f25..7152131c830 100644 --- a/apps/sim/sandbox-tasks/pdf-generate.ts +++ b/apps/sim/sandbox-tasks/pdf-generate.ts @@ -12,20 +12,73 @@ export const pdfGenerateTask = defineSandboxTask({ if (!PDFLib) throw new Error('pdf-lib bundle not loaded'); globalThis.PDFLib = PDFLib; globalThis.pdf = await PDFLib.PDFDocument.create(); - globalThis.embedImage = async (dataUri) => { + + // Convenience shortcuts — avoids verbose PDFLib.rgb() / PDFLib.StandardFonts.Helvetica + globalThis.rgb = PDFLib.rgb; + globalThis.StandardFonts = PDFLib.StandardFonts; + + // Page-size constants in points (1pt = 1/72 inch) + globalThis.LETTER = [612, 792]; // 8.5" × 11" + globalThis.A4 = [595.28, 841.89]; // 210mm × 297mm + + // 6 MB raw ≈ 8 MB base64; reject above this to avoid sandbox OOM. + const _MAX_IMG_B64 = 8 * 1024 * 1024; + + /** + * embedImage(dataUri) — embed a data-URI image into the active PDF document. + * Dispatches to embedPng or embedJpg based on MIME type. + */ + globalThis.embedImage = async function embedImage(dataUri) { + if (!dataUri || typeof dataUri !== 'string') { + throw new Error('embedImage: dataUri must be a non-empty string'); + } + if (dataUri.length > _MAX_IMG_B64) { + throw new Error( + 'embedImage: image exceeds the 6 MB embed limit (~8 MB base64). Use a smaller/compressed image.' + ); + } const comma = dataUri.indexOf(','); + if (comma === -1) throw new Error('embedImage: invalid data URI (no comma separator)'); const header = dataUri.slice(0, comma); const base64 = dataUri.slice(comma + 1); const binary = globalThis.Buffer ? globalThis.Buffer.from(base64, 'base64') : null; if (!binary) throw new Error('Buffer polyfill missing'); const mime = header.split(';')[0].split(':')[1] || ''; - if (mime.includes('png')) return globalThis.pdf.embedPng(binary); - return globalThis.pdf.embedJpg(binary); + // image/jpg is non-standard but tolerated; the canonical MIME is image/jpeg + if (mime === 'image/png') return globalThis.pdf.embedPng(binary); + if (mime === 'image/jpeg' || mime === 'image/jpg') return globalThis.pdf.embedJpg(binary); + throw new Error('embedImage: only PNG and JPEG are supported (got ' + (mime || 'unknown — check data URI header') + ')'); }; - globalThis.getFileBase64 = async (fileId) => { + + /** + * getFileBase64(fileId) — load a workspace file as a data URI string. + */ + globalThis.getFileBase64 = async function getFileBase64(fileId) { + if (!fileId || typeof fileId !== 'string') { + throw new Error('getFileBase64: fileId must be a non-empty string'); + } const res = await globalThis.__brokers.workspaceFile({ fileId }); + if (!res || !res.dataUri) { + throw new Error('getFileBase64: broker returned no data for file ' + fileId); + } + if (res.dataUri.length > _MAX_IMG_B64) { + throw new Error( + 'getFileBase64: image exceeds the 6 MB embed limit (~8 MB base64). Use a smaller/compressed image.' + ); + } return res.dataUri; }; + + /** + * drawImage(page, fileId, opts) — fetch a workspace file and draw it on the given page. + * Required opts: x, y, width, height (points). + * Example: await drawImage(page, 'abc123', { x: 50, y: 700, width: 200, height: 100 }); + */ + globalThis.drawImage = async function drawImage(page, fileId, opts) { + const dataUri = await globalThis.getFileBase64(fileId); + const img = await globalThis.embedImage(dataUri); + page.drawImage(img, opts || {}); + }; `, finalize: ` const pdf = globalThis.pdf; diff --git a/apps/sim/sandbox-tasks/pptx-generate.ts b/apps/sim/sandbox-tasks/pptx-generate.ts index bca608791dd..01d9b1dd403 100644 --- a/apps/sim/sandbox-tasks/pptx-generate.ts +++ b/apps/sim/sandbox-tasks/pptx-generate.ts @@ -11,9 +11,49 @@ export const pptxGenerateTask = defineSandboxTask({ const PptxGenJS = globalThis.__bundles['pptxgenjs']; if (!PptxGenJS) throw new Error('pptxgenjs bundle not loaded'); globalThis.pptx = new PptxGenJS(); - globalThis.getFileBase64 = async (fileId) => { + globalThis.pptx.layout = 'LAYOUT_16x9'; + + // Slide geometry for LAYOUT_16x9 (inches) + globalThis.SLIDE_W = 10; + globalThis.SLIDE_H = 5.625; + globalThis.MARGIN = 0.5; + globalThis.CONTENT_W = 9; // SLIDE_W - 2 * MARGIN + globalThis.CONTENT_H = 3.8; // usable body height below a standard title row + + // ── Image helpers ────────────────────────────────────────────────────────── + // 6 MB raw ≈ 8 MB base64; reject above this to avoid sandbox OOM. + const _MAX_IMG_B64 = 8 * 1024 * 1024; + + /** + * getFileBase64(fileId) — load a workspace file as a data URI string. + * PptxGenJS data format: "image/png;base64," (no "data:" prefix). + * Use as: slide.addImage({ data: await getFileBase64(fileId), x, y, w, h }) + */ + globalThis.getFileBase64 = async function getFileBase64(fileId) { + if (!fileId || typeof fileId !== 'string') { + throw new Error('getFileBase64: fileId must be a non-empty string'); + } const res = await globalThis.__brokers.workspaceFile({ fileId }); - return res.dataUri; + if (!res || !res.dataUri) { + throw new Error('getFileBase64: broker returned no data for file ' + fileId); + } + if (res.dataUri.length > _MAX_IMG_B64) { + throw new Error( + 'getFileBase64: image exceeds the 6 MB embed limit (~8 MB base64). Use a smaller/compressed image.' + ); + } + // PptxGenJS expects "image/png;base64,..." — strip the leading "data:" if present + return res.dataUri.replace(/^data:/, ''); + }; + + /** + * addImage(slide, fileId, opts) — fetch a workspace file and embed it. + * Required opts: x, y, w, h (inches). + * Example: await addImage(slide, 'abc123', { x: 0.5, y: 1, w: 2, h: 1 }); + */ + globalThis.addImage = async function addImage(slide, fileId, opts) { + const data = await globalThis.getFileBase64(fileId); + slide.addImage(Object.assign({ data }, opts || {})); }; `, finalize: ` From 9e38939755880cde6bbf32ed2d85addbe03951c9 Mon Sep 17 00:00:00 2001 From: waleed Date: Thu, 7 May 2026 18:38:37 -0700 Subject: [PATCH 02/16] fix(sandbox): strict MIME allowlist and nullish coalescing in docx addImage --- apps/sim/sandbox-tasks/docx-generate.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/apps/sim/sandbox-tasks/docx-generate.ts b/apps/sim/sandbox-tasks/docx-generate.ts index 98f2fb70100..3572c815a02 100644 --- a/apps/sim/sandbox-tasks/docx-generate.ts +++ b/apps/sim/sandbox-tasks/docx-generate.ts @@ -58,11 +58,13 @@ export const docxGenerateTask = defineSandboxTask({ const header = comma !== -1 ? dataUri.slice(0, comma) : ''; const base64 = comma !== -1 ? dataUri.slice(comma + 1) : dataUri; const mime = header.split(';')[0].replace('data:', '') || 'image/png'; - const ext = mime.includes('png') ? 'png' : mime.includes('gif') ? 'gif' : mime.includes('bmp') ? 'bmp' : 'jpg'; + const extMap = { 'image/png': 'png', 'image/jpeg': 'jpg', 'image/jpg': 'jpg', 'image/gif': 'gif', 'image/bmp': 'bmp', 'image/svg+xml': 'svg' }; + const ext = extMap[mime]; + if (!ext) throw new Error('addImage: unsupported image type "' + mime + '". Use PNG, JPEG, GIF, BMP, or SVG.'); if (!globalThis.Buffer) throw new Error('addImage: Buffer polyfill missing — ensure docx bundle is loaded'); return new globalThis.docx.ImageRun(Object.assign({ data: globalThis.Buffer.from(base64, 'base64'), - transformation: { width: (opts && opts.width) || 200, height: (opts && opts.height) || 200 }, + transformation: { width: opts?.width ?? 200, height: opts?.height ?? 200 }, type: ext, }, opts || {})); }; From f3f1a45cbb5162fb7cecaf0f8d34c09d5e9ef15d Mon Sep 17 00:00:00 2001 From: waleed Date: Thu, 7 May 2026 18:39:28 -0700 Subject: [PATCH 03/16] fix(sandbox): validate required opts in pdf drawImage to prevent silent origin placement --- apps/sim/sandbox-tasks/pdf-generate.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/sim/sandbox-tasks/pdf-generate.ts b/apps/sim/sandbox-tasks/pdf-generate.ts index 7152131c830..d38ec2a6bb4 100644 --- a/apps/sim/sandbox-tasks/pdf-generate.ts +++ b/apps/sim/sandbox-tasks/pdf-generate.ts @@ -75,9 +75,12 @@ export const pdfGenerateTask = defineSandboxTask({ * Example: await drawImage(page, 'abc123', { x: 50, y: 700, width: 200, height: 100 }); */ globalThis.drawImage = async function drawImage(page, fileId, opts) { + if (!opts || opts.x == null || opts.y == null || opts.width == null || opts.height == null) { + throw new Error('drawImage: opts must include x, y, width, and height (in points)'); + } const dataUri = await globalThis.getFileBase64(fileId); const img = await globalThis.embedImage(dataUri); - page.drawImage(img, opts || {}); + page.drawImage(img, opts); }; `, finalize: ` From 86c6ba88b1b27a105f132a884ce3849bc532769e Mon Sep 17 00:00:00 2001 From: waleed Date: Thu, 7 May 2026 18:53:16 -0700 Subject: [PATCH 04/16] fix(sandbox): throw on malformed data URI in docx addImage --- apps/sim/sandbox-tasks/docx-generate.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/apps/sim/sandbox-tasks/docx-generate.ts b/apps/sim/sandbox-tasks/docx-generate.ts index 3572c815a02..f6716c81e23 100644 --- a/apps/sim/sandbox-tasks/docx-generate.ts +++ b/apps/sim/sandbox-tasks/docx-generate.ts @@ -55,8 +55,9 @@ export const docxGenerateTask = defineSandboxTask({ globalThis.addImage = async function addImage(fileId, opts) { const dataUri = await globalThis.getFileBase64(fileId); const comma = dataUri.indexOf(','); - const header = comma !== -1 ? dataUri.slice(0, comma) : ''; - const base64 = comma !== -1 ? dataUri.slice(comma + 1) : dataUri; + if (comma === -1) throw new Error('addImage: invalid data URI (no comma separator)'); + const header = dataUri.slice(0, comma); + const base64 = dataUri.slice(comma + 1); const mime = header.split(';')[0].replace('data:', '') || 'image/png'; const extMap = { 'image/png': 'png', 'image/jpeg': 'jpg', 'image/jpg': 'jpg', 'image/gif': 'gif', 'image/bmp': 'bmp', 'image/svg+xml': 'svg' }; const ext = extMap[mime]; From 918954a51f044f5303919cec627e29b22158d91d Mon Sep 17 00:00:00 2001 From: waleed Date: Thu, 7 May 2026 18:57:30 -0700 Subject: [PATCH 05/16] fix(sandbox): prevent opts from clobbering computed ImageRun data/type/transformation --- apps/sim/sandbox-tasks/docx-generate.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/apps/sim/sandbox-tasks/docx-generate.ts b/apps/sim/sandbox-tasks/docx-generate.ts index f6716c81e23..dfe446d7dd3 100644 --- a/apps/sim/sandbox-tasks/docx-generate.ts +++ b/apps/sim/sandbox-tasks/docx-generate.ts @@ -63,11 +63,12 @@ export const docxGenerateTask = defineSandboxTask({ const ext = extMap[mime]; if (!ext) throw new Error('addImage: unsupported image type "' + mime + '". Use PNG, JPEG, GIF, BMP, or SVG.'); if (!globalThis.Buffer) throw new Error('addImage: Buffer polyfill missing — ensure docx bundle is loaded'); - return new globalThis.docx.ImageRun(Object.assign({ + const { width, height, type: _t, data: _d, transformation: userTransform, ...passThrough } = opts || {}; + return new globalThis.docx.ImageRun(Object.assign(passThrough, { data: globalThis.Buffer.from(base64, 'base64'), - transformation: { width: opts?.width ?? 200, height: opts?.height ?? 200 }, type: ext, - }, opts || {})); + transformation: Object.assign({ width: width ?? 200, height: height ?? 200 }, userTransform || {}), + })); }; `, // JSZip's browser build doesn't support nodebuffer output, so we go through From 449ed273e947c968f388973f936e7ab646f0eec3 Mon Sep 17 00:00:00 2001 From: waleed Date: Thu, 7 May 2026 19:05:52 -0700 Subject: [PATCH 06/16] fix(sandbox): prevent opts from clobbering fetched data in pptx addImage --- apps/sim/sandbox-tasks/pptx-generate.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/sim/sandbox-tasks/pptx-generate.ts b/apps/sim/sandbox-tasks/pptx-generate.ts index 01d9b1dd403..9a5a7dffb89 100644 --- a/apps/sim/sandbox-tasks/pptx-generate.ts +++ b/apps/sim/sandbox-tasks/pptx-generate.ts @@ -53,7 +53,7 @@ export const pptxGenerateTask = defineSandboxTask({ */ globalThis.addImage = async function addImage(slide, fileId, opts) { const data = await globalThis.getFileBase64(fileId); - slide.addImage(Object.assign({ data }, opts || {})); + slide.addImage(Object.assign({}, opts || {}, { data })); }; `, finalize: ` From 20c23416ff9d546d37af52a6cab10d1a6a8a54cf Mon Sep 17 00:00:00 2001 From: waleed Date: Thu, 7 May 2026 19:16:18 -0700 Subject: [PATCH 07/16] fix(sandbox): validate required opts in pptx addImage --- apps/sim/sandbox-tasks/pptx-generate.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/sim/sandbox-tasks/pptx-generate.ts b/apps/sim/sandbox-tasks/pptx-generate.ts index 9a5a7dffb89..986954da8d6 100644 --- a/apps/sim/sandbox-tasks/pptx-generate.ts +++ b/apps/sim/sandbox-tasks/pptx-generate.ts @@ -52,8 +52,11 @@ export const pptxGenerateTask = defineSandboxTask({ * Example: await addImage(slide, 'abc123', { x: 0.5, y: 1, w: 2, h: 1 }); */ globalThis.addImage = async function addImage(slide, fileId, opts) { + if (!opts || opts.x == null || opts.y == null || opts.w == null || opts.h == null) { + throw new Error('addImage: opts must include x, y, w, and h (in inches)'); + } const data = await globalThis.getFileBase64(fileId); - slide.addImage(Object.assign({}, opts || {}, { data })); + slide.addImage(Object.assign({}, opts, { data })); }; `, finalize: ` From 925d2baa562f3b502431f5561557e517e2f100d9 Mon Sep 17 00:00:00 2001 From: waleed Date: Thu, 7 May 2026 19:26:49 -0700 Subject: [PATCH 08/16] fix(sandbox): remove silent image/png fallback in docx addImage MIME parsing --- apps/sim/sandbox-tasks/docx-generate.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/sim/sandbox-tasks/docx-generate.ts b/apps/sim/sandbox-tasks/docx-generate.ts index dfe446d7dd3..d689a8f9a02 100644 --- a/apps/sim/sandbox-tasks/docx-generate.ts +++ b/apps/sim/sandbox-tasks/docx-generate.ts @@ -58,7 +58,7 @@ export const docxGenerateTask = defineSandboxTask({ if (comma === -1) throw new Error('addImage: invalid data URI (no comma separator)'); const header = dataUri.slice(0, comma); const base64 = dataUri.slice(comma + 1); - const mime = header.split(';')[0].replace('data:', '') || 'image/png'; + const mime = header.split(';')[0].replace('data:', ''); const extMap = { 'image/png': 'png', 'image/jpeg': 'jpg', 'image/jpg': 'jpg', 'image/gif': 'gif', 'image/bmp': 'bmp', 'image/svg+xml': 'svg' }; const ext = extMap[mime]; if (!ext) throw new Error('addImage: unsupported image type "' + mime + '". Use PNG, JPEG, GIF, BMP, or SVG.'); From b7db38a5c700010de87aa8c2729fc2f7b8f05b0e Mon Sep 17 00:00:00 2001 From: waleed Date: Thu, 7 May 2026 19:46:35 -0700 Subject: [PATCH 09/16] fix(sandbox): consistency and cleanup pass on doc-gen tasks and worker - DOCX addImage: upfront width/height validation (matches PDF/PPTX pattern) - PDF embedImage: remove dead Buffer ternary; drop redundant size guard already enforced in getFileBase64 - isolated-vm-worker: add friendly MemoryLimitError branch in both execute paths so OOM produces a clear message instead of a raw V8 error --- apps/sim/lib/execution/isolated-vm-worker.cjs | 32 +++++++++++++++++++ apps/sim/sandbox-tasks/docx-generate.ts | 7 ++-- apps/sim/sandbox-tasks/pdf-generate.ts | 9 ++---- 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/apps/sim/lib/execution/isolated-vm-worker.cjs b/apps/sim/lib/execution/isolated-vm-worker.cjs index 73d244de75f..aa23858e151 100644 --- a/apps/sim/lib/execution/isolated-vm-worker.cjs +++ b/apps/sim/lib/execution/isolated-vm-worker.cjs @@ -398,6 +398,21 @@ async function executeCode(request, executionId) { } } + if ( + err.message.includes('Array buffer allocation failed') || + err.message.includes('memory limit') + ) { + return { + result: null, + stdout, + error: { + message: + 'Execution exceeded memory limit (256 MB). Reduce image sizes or split the work into smaller batches.', + name: 'MemoryLimitError', + }, + } + } + return { result: null, stdout, @@ -937,6 +952,23 @@ async function executeTask(request, executionId) { timings, } } + + if ( + err.message?.includes('Array buffer allocation failed') || + err.message?.includes('memory limit') + ) { + return { + result: null, + stdout, + error: { + message: + 'Execution exceeded memory limit (256 MB). Reduce image sizes or split the work into smaller batches.', + name: 'MemoryLimitError', + }, + timings, + } + } + return { result: null, stdout, diff --git a/apps/sim/sandbox-tasks/docx-generate.ts b/apps/sim/sandbox-tasks/docx-generate.ts index d689a8f9a02..214b9f8f41f 100644 --- a/apps/sim/sandbox-tasks/docx-generate.ts +++ b/apps/sim/sandbox-tasks/docx-generate.ts @@ -53,6 +53,9 @@ export const docxGenerateTask = defineSandboxTask({ * new docx.Paragraph({ children: [await addImage('abc123', { width: 200, height: 100 })] }) */ globalThis.addImage = async function addImage(fileId, opts) { + if (!opts || opts.width == null || opts.height == null) { + throw new Error('addImage: opts must include width and height (in pixels)'); + } const dataUri = await globalThis.getFileBase64(fileId); const comma = dataUri.indexOf(','); if (comma === -1) throw new Error('addImage: invalid data URI (no comma separator)'); @@ -63,11 +66,11 @@ export const docxGenerateTask = defineSandboxTask({ const ext = extMap[mime]; if (!ext) throw new Error('addImage: unsupported image type "' + mime + '". Use PNG, JPEG, GIF, BMP, or SVG.'); if (!globalThis.Buffer) throw new Error('addImage: Buffer polyfill missing — ensure docx bundle is loaded'); - const { width, height, type: _t, data: _d, transformation: userTransform, ...passThrough } = opts || {}; + const { width, height, type: _t, data: _d, transformation: userTransform, ...passThrough } = opts; return new globalThis.docx.ImageRun(Object.assign(passThrough, { data: globalThis.Buffer.from(base64, 'base64'), type: ext, - transformation: Object.assign({ width: width ?? 200, height: height ?? 200 }, userTransform || {}), + transformation: Object.assign({ width, height }, userTransform || {}), })); }; `, diff --git a/apps/sim/sandbox-tasks/pdf-generate.ts b/apps/sim/sandbox-tasks/pdf-generate.ts index d38ec2a6bb4..a7f23e710f1 100644 --- a/apps/sim/sandbox-tasks/pdf-generate.ts +++ b/apps/sim/sandbox-tasks/pdf-generate.ts @@ -32,17 +32,12 @@ export const pdfGenerateTask = defineSandboxTask({ if (!dataUri || typeof dataUri !== 'string') { throw new Error('embedImage: dataUri must be a non-empty string'); } - if (dataUri.length > _MAX_IMG_B64) { - throw new Error( - 'embedImage: image exceeds the 6 MB embed limit (~8 MB base64). Use a smaller/compressed image.' - ); - } const comma = dataUri.indexOf(','); if (comma === -1) throw new Error('embedImage: invalid data URI (no comma separator)'); const header = dataUri.slice(0, comma); const base64 = dataUri.slice(comma + 1); - const binary = globalThis.Buffer ? globalThis.Buffer.from(base64, 'base64') : null; - if (!binary) throw new Error('Buffer polyfill missing'); + if (!globalThis.Buffer) throw new Error('embedImage: Buffer polyfill missing'); + const binary = globalThis.Buffer.from(base64, 'base64'); const mime = header.split(';')[0].split(':')[1] || ''; // image/jpg is non-standard but tolerated; the canonical MIME is image/jpeg if (mime === 'image/png') return globalThis.pdf.embedPng(binary); From 9e9b5fc02f55f5cb1d3da07d2ddf0d457c733300 Mon Sep 17 00:00:00 2001 From: waleed Date: Thu, 7 May 2026 20:09:53 -0700 Subject: [PATCH 10/16] fix(sandbox): move OOM check before isDisposed guard so MemoryLimitError is reachable Isolate OOM auto-disposes the isolate before throwing, meaning isDisposed is true on the way into the catch block. The previous ordering caused OOM to surface as AbortError ('Execution cancelled') instead of MemoryLimitError. Move the message-based OOM check above the isDisposed check in both executeCode and executeTask so the friendly message is actually shown. --- apps/sim/lib/execution/isolated-vm-worker.cjs | 68 ++++++++++--------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/apps/sim/lib/execution/isolated-vm-worker.cjs b/apps/sim/lib/execution/isolated-vm-worker.cjs index aa23858e151..5f43c731402 100644 --- a/apps/sim/lib/execution/isolated-vm-worker.cjs +++ b/apps/sim/lib/execution/isolated-vm-worker.cjs @@ -376,6 +376,24 @@ async function executeCode(request, executionId) { stack: err.stack, } + // OOM check must run before the isDisposed guard: isolate OOM auto-disposes + // the isolate (isDisposed becomes true), so the cancel branch would fire first + // and mask the real cause. Message-based detection disambiguates the two. + if ( + err.message.includes('Array buffer allocation failed') || + err.message.includes('memory limit') + ) { + return { + result: null, + stdout, + error: { + message: + 'Execution exceeded memory limit (256 MB). Reduce image sizes or split the work into smaller batches.', + name: 'MemoryLimitError', + }, + } + } + // Host sent a `cancel` IPC which called `isolate.dispose()`. Any // in-flight compileScript/run then throws; detect that authoritatively // via the isolate flag rather than fuzzy-matching the error message. @@ -398,21 +416,6 @@ async function executeCode(request, executionId) { } } - if ( - err.message.includes('Array buffer allocation failed') || - err.message.includes('memory limit') - ) { - return { - result: null, - stdout, - error: { - message: - 'Execution exceeded memory limit (256 MB). Reduce image sizes or split the work into smaller batches.', - name: 'MemoryLimitError', - }, - } - } - return { result: null, stdout, @@ -930,6 +933,25 @@ async function executeTask(request, executionId) { timings.total = Date.now() - tStart if (err instanceof Error) { const errorInfo = { message: err.message, name: err.name, stack: err.stack } + // OOM check must run before the isDisposed guard: isolate OOM auto-disposes + // the isolate (isDisposed becomes true), so the cancel branch would fire first + // and mask the real cause. Message-based detection disambiguates the two. + if ( + err.message?.includes('Array buffer allocation failed') || + err.message?.includes('memory limit') + ) { + return { + result: null, + stdout, + error: { + message: + 'Execution exceeded memory limit (256 MB). Reduce image sizes or split the work into smaller batches.', + name: 'MemoryLimitError', + }, + timings, + } + } + // Cancellation: host sent `cancel` IPC which called `isolate.dispose()`. // Detect authoritatively via the isolate flag so we don't depend on // isolated-vm's internal error wording. @@ -953,22 +975,6 @@ async function executeTask(request, executionId) { } } - if ( - err.message?.includes('Array buffer allocation failed') || - err.message?.includes('memory limit') - ) { - return { - result: null, - stdout, - error: { - message: - 'Execution exceeded memory limit (256 MB). Reduce image sizes or split the work into smaller batches.', - name: 'MemoryLimitError', - }, - timings, - } - } - return { result: null, stdout, From 9d71d99b6e6f0e9d3863ae1ecb2c0313429a6044 Mon Sep 17 00:00:00 2001 From: waleed Date: Fri, 8 May 2026 14:55:35 -0700 Subject: [PATCH 11/16] fix(files): add preview accuracy disclaimer to pptx viewer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PptxViewJS renders slides as canvas->JPEG which has known layout fidelity issues with complex overlapping shapes. The downloaded binary is always correct — add a note so users know to download for exact rendering. --- .../files/components/file-viewer/pptx-preview.tsx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/pptx-preview.tsx b/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/pptx-preview.tsx index d102bf94fa3..b5b4c05a748 100644 --- a/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/pptx-preview.tsx +++ b/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/pptx-preview.tsx @@ -258,6 +258,9 @@ export const PptxPreview = memo(function PptxPreview({ return (
+

+ Preview is approximate — download to view in PowerPoint or Keynote for exact rendering. +

{slides.map((src, i) => ( Date: Fri, 8 May 2026 15:15:42 -0700 Subject: [PATCH 12/16] revert(files): remove approximate preview disclaimer from pptx viewer --- .../files/components/file-viewer/pptx-preview.tsx | 3 --- 1 file changed, 3 deletions(-) diff --git a/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/pptx-preview.tsx b/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/pptx-preview.tsx index b5b4c05a748..d102bf94fa3 100644 --- a/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/pptx-preview.tsx +++ b/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/pptx-preview.tsx @@ -258,9 +258,6 @@ export const PptxPreview = memo(function PptxPreview({ return (
-

- Preview is approximate — download to view in PowerPoint or Keynote for exact rendering. -

{slides.map((src, i) => ( Date: Fri, 8 May 2026 15:40:06 -0700 Subject: [PATCH 13/16] =?UTF-8?q?improvement(style):=20expand=20style=20ex?= =?UTF-8?q?traction=20=E2=80=94=20DOCX=20inheritance/defaults/BodyText,=20?= =?UTF-8?q?PDF=20support?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../[id]/files/[fileId]/style/route.ts | 14 +- apps/sim/lib/copilot/vfs/document-style.ts | 287 +++++++++++++++--- apps/sim/lib/copilot/vfs/workspace-vfs.ts | 4 +- 3 files changed, 255 insertions(+), 50 deletions(-) diff --git a/apps/sim/app/api/workspaces/[id]/files/[fileId]/style/route.ts b/apps/sim/app/api/workspaces/[id]/files/[fileId]/style/route.ts index c30d0e9723f..47932c9f4e7 100644 --- a/apps/sim/app/api/workspaces/[id]/files/[fileId]/style/route.ts +++ b/apps/sim/app/api/workspaces/[id]/files/[fileId]/style/route.ts @@ -16,9 +16,9 @@ const logger = createLogger('WorkspaceFileStyleAPI') /** * GET /api/workspaces/[id]/files/[fileId]/style - * Extract a compact JSON style summary from an uploaded .docx or .pptx file. - * Uses OOXML theme XML to return theme colors, font pair, and named styles. - * Only works on binary OOXML files (ZIP format) — not on JS source files. + * Extract a compact JSON style summary from an uploaded .docx, .pptx, or .pdf file. + * OOXML files return theme colors, font pair, and named styles. + * PDF files return page dimensions and embedded font names. */ export const GET = withRouteHandler( async (request: NextRequest, context: { params: Promise<{ id: string; fileId: string }> }) => { @@ -42,13 +42,13 @@ export const GET = withRouteHandler( } const rawExt = fileRecord.name.split('.').pop()?.toLowerCase() - if (rawExt !== 'docx' && rawExt !== 'pptx') { + if (rawExt !== 'docx' && rawExt !== 'pptx' && rawExt !== 'pdf') { return NextResponse.json( - { error: 'Style extraction only supports .docx and .pptx files' }, + { error: 'Style extraction supports .docx, .pptx, and .pdf files' }, { status: 422 } ) } - const ext: 'docx' | 'pptx' = rawExt + const ext: 'docx' | 'pptx' | 'pdf' = rawExt let buffer: Buffer try { @@ -75,7 +75,7 @@ export const GET = withRouteHandler( logger.info('Extracted style summary via API', { fileId, format: ext, - themeName: summary.theme.name, + themeName: summary.theme?.name, }) return NextResponse.json(summary, { diff --git a/apps/sim/lib/copilot/vfs/document-style.ts b/apps/sim/lib/copilot/vfs/document-style.ts index 3c1ebac6c57..018c30846b9 100644 --- a/apps/sim/lib/copilot/vfs/document-style.ts +++ b/apps/sim/lib/copilot/vfs/document-style.ts @@ -22,12 +22,14 @@ interface ThemeColors { } export interface DocumentStyleSummary { - format: 'docx' | 'pptx' - theme: { + format: 'docx' | 'pptx' | 'pdf' + /** OOXML theme — present for pptx; present for docx when theme1.xml exists; absent for pdf */ + theme?: { name: string colors: Partial fonts: { major: string; minor: string } } + /** Named paragraph/character styles — docx only */ styles?: Array<{ id: string name: string @@ -37,6 +39,19 @@ export interface DocumentStyleSummary { color?: string font?: string }> + /** Document-wide default run properties (body text baseline) — docx only */ + defaults?: { + fontSize?: number + font?: string + } + /** Page dimensions in points — pdf only */ + pageSize?: { + widthPt: number + heightPt: number + preset: 'A4' | 'letter' | 'custom' + } + /** Embedded font names extracted from page resource dictionaries — pdf only */ + fonts?: string[] } function attr(xml: string, name: string): string { @@ -69,7 +84,7 @@ function parseFontScheme(xml: string): { major: string; minor: string } { return { major: attr(major, 'typeface') || '', minor: attr(minor, 'typeface') || '' } } -function parseThemeXml(xml: string): DocumentStyleSummary['theme'] { +function parseThemeXml(xml: string): NonNullable { const clrSchemeMatch = /]*name="([^"]*)"/.exec(xml) const slots: Array = [ 'dk1', @@ -93,52 +108,232 @@ function parseThemeXml(xml: string): DocumentStyleSummary['theme'] { return { name: clrSchemeMatch?.[1] ?? '', colors, fonts: parseFontScheme(xml) } } -function parseDocxStyles(xml: string): DocumentStyleSummary['styles'] { - const targetIds = new Set([ - 'Normal', - 'DefaultParagraphFont', - 'Heading1', - 'Heading2', - 'Heading3', - 'Title', - 'Subtitle', - ]) - const results: DocumentStyleSummary['styles'] = [] - const blocks = xml.split(' + defaults?: DocumentStyleSummary['defaults'] +} { + // Extract document-default run properties (the baseline for body text) + const defaults: DocumentStyleSummary['defaults'] = {} + const docDefaultsBlock = between(xml, '', '') + if (docDefaultsBlock) { + const rPrBlock = between(docDefaultsBlock, '', '') + if (rPrBlock) { + const szMatch = /]*)>/.exec(rPrBlock) + if (fontAttrMatch) { + const { font } = parseFontAttrs(fontAttrMatch[1], themeFonts) + if (font) defaults.font = font + } + } + } + + // Build a full style map for basedOn inheritance resolution + const styleMap = new Map() + for (const block of xml.split('/.test(block) && !/]*w:ascii="([^"]*)"/.exec(block) - const font = fontMatch?.[1] - results.push({ - id: styleId, - name, - type: styleType, - ...(fontSize !== undefined && { fontSize }), - ...(bold && { bold }), - ...(color && { color }), + const fontAttrMatch = /]*)>/.exec(block) + const { font, themeFont } = fontAttrMatch ? parseFontAttrs(fontAttrMatch[1], themeFonts) : {} + + styleMap.set(id, { + id, + name: nameMatch?.[1] ?? id, + type, + ...(basedOnMatch && { basedOn: basedOnMatch[1] }), + ...(szMatch && { fontSize: Math.round(Number.parseInt(szMatch[1]) / 2) }), + ...(//.test(block) && !/()): StyleRaw | undefined { + if (visited.has(id)) return undefined + visited.add(id) + const s = styleMap.get(id) + if (!s) return undefined + if (!s.basedOn) return s + const parent = resolveInheritance(s.basedOn, visited) + if (!parent) return s + // Own properties override parent; undefined falls through to parent + return { + ...parent, + ...s, + fontSize: s.fontSize ?? parent.fontSize, + bold: s.bold ?? parent.bold, + color: s.color ?? parent.color, + font: s.font ?? parent.font, + themeFont: s.themeFont ?? parent.themeFont, + } + } + + // Target styles: fixed set + all Heading* styles found in the document + const targetIds: string[] = [ + 'Normal', + 'DefaultParagraphFont', + 'BodyText', + 'Body Text', + 'Title', + 'Subtitle', + ] + for (const id of styleMap.keys()) { + if (id.startsWith('Heading') && !targetIds.includes(id)) targetIds.push(id) + } + + const styles: NonNullable = [] + const seen = new Set() + for (const id of targetIds) { + if (seen.has(id)) continue + seen.add(id) + const resolved = resolveInheritance(id) + if (!resolved) continue + + // Deferred theme font resolution (only reached when themeFonts was unavailable during parse) + let resolvedFont = resolved.font + if (!resolvedFont && resolved.themeFont && themeFonts) { + resolvedFont = resolveThemeFont(resolved.themeFont, themeFonts) + } + + styles.push({ + id: resolved.id, + name: resolved.name, + type: resolved.type, + ...(resolved.fontSize !== undefined && { fontSize: resolved.fontSize }), + ...(resolved.bold && { bold: resolved.bold }), + ...(resolved.color && { color: resolved.color }), + ...(resolvedFont && { font: resolvedFont }), + }) + } + + return { + styles, + ...(Object.keys(defaults).length > 0 && { defaults }), + } +} + +async function extractPdfStyle(buffer: Buffer): Promise { + try { + const { PDFDocument, PDFName, PDFDict } = await import('pdf-lib') + + let doc: Awaited> + try { + doc = await PDFDocument.load(buffer, { updateMetadata: false }) + } catch { + // Encrypted or corrupt + return null + } + + const pages = doc.getPages() + if (pages.length === 0) return null + + // Page dimensions (first page is canonical for preset detection) + const { width: widthPt, height: heightPt } = pages[0].getSize() + let preset: 'A4' | 'letter' | 'custom' = 'custom' + if (Math.abs(widthPt - 595.28) < 5 && Math.abs(heightPt - 841.89) < 5) preset = 'A4' + else if (Math.abs(widthPt - 612) < 5 && Math.abs(heightPt - 792) < 5) preset = 'letter' + + // Font names from page resource dictionaries (first 10 pages to bound cost) + const fontNamesSet = new Set() + const pagesToScan = Math.min(pages.length, 10) + for (let i = 0; i < pagesToScan; i++) { + try { + const resourcesRef = pages[i].node.get(PDFName.of('Resources')) + if (!resourcesRef) continue + const resources = doc.context.lookup(resourcesRef, PDFDict) + if (!resources) continue + const fontDictRef = resources.get(PDFName.of('Font')) + if (!fontDictRef) continue + const fontDict = doc.context.lookup(fontDictRef, PDFDict) + if (!fontDict) continue + for (const key of fontDict.keys()) { + try { + const fontRef = fontDict.get(key) + if (!fontRef) continue + const fontObj = doc.context.lookup(fontRef, PDFDict) + if (!fontObj) continue + const baseFontRef = fontObj.get(PDFName.of('BaseFont')) + if (!baseFontRef) continue + // Format: "/ABCDEF+FontName" (subset) or "/FontName" (full embed) + const raw = baseFontRef + .toString() + .replace(/^\//, '') + .replace(/^[A-Z]{6}\+/, '') + if (raw) fontNamesSet.add(raw) + } catch {} + } + } catch {} + } + + return { + format: 'pdf', + pageSize: { + widthPt: Math.round(widthPt), + heightPt: Math.round(heightPt), + preset, + }, + fonts: [...fontNamesSet], + } + } catch (err) { + logger.warn('Failed to extract PDF style', { error: toError(err).message }) + return null + } } /** - * Extract a compact style summary from a binary OOXML (.docx or .pptx) buffer. - * Returns null if the buffer is not a valid ZIP/OOXML file. + * Extract a compact style summary from a binary document buffer. + * Supports .docx and .pptx (OOXML/ZIP) and .pdf. + * Returns null if the buffer cannot be parsed or yields no useful data. */ export async function extractDocumentStyle( buffer: Buffer, - ext: 'docx' | 'pptx' + ext: 'docx' | 'pptx' | 'pdf' ): Promise { + if (ext === 'pdf') { + return extractPdfStyle(buffer) + } + if (buffer.length < 4) return null for (let i = 0; i < 4; i++) { if (buffer[i] !== ZIP_MAGIC[i]) return null @@ -150,17 +345,27 @@ export async function extractDocumentStyle( const themePath = ext === 'docx' ? 'word/theme/theme1.xml' : 'ppt/theme/theme1.xml' const themeFile = zip.file(themePath) - if (!themeFile) return null - const theme = parseThemeXml(await themeFile.async('string')) - const summary: DocumentStyleSummary = { format: ext, theme } + let theme: DocumentStyleSummary['theme'] + if (themeFile) { + theme = parseThemeXml(await themeFile.async('string')) + } else if (ext === 'pptx') { + // PPTX without a theme is malformed — nothing useful to return + return null + } + // DOCX without a theme is valid (e.g. LibreOffice-generated); continue with styles only + + const summary: DocumentStyleSummary = { format: ext, ...(theme && { theme }) } if (ext === 'docx') { const stylesFile = zip.file('word/styles.xml') if (stylesFile) { - const styles = parseDocxStyles(await stylesFile.async('string')) - if (styles && styles.length > 0) summary.styles = styles + const { styles, defaults } = parseDocxStyles(await stylesFile.async('string'), theme?.fonts) + if (styles.length > 0) summary.styles = styles + if (defaults) summary.defaults = defaults } + // If there's neither a theme nor any styles, there's nothing useful to return + if (!theme && !summary.styles?.length) return null } return summary diff --git a/apps/sim/lib/copilot/vfs/workspace-vfs.ts b/apps/sim/lib/copilot/vfs/workspace-vfs.ts index 5ab975876c7..c23d66889c2 100644 --- a/apps/sim/lib/copilot/vfs/workspace-vfs.ts +++ b/apps/sim/lib/copilot/vfs/workspace-vfs.ts @@ -518,8 +518,8 @@ export class WorkspaceVFS { const record = await getWorkspaceFile(this._workspaceId, fileId) if (!record) return null const rawExt = record.name.split('.').pop()?.toLowerCase() - if (rawExt !== 'docx' && rawExt !== 'pptx') return null - const ext: 'docx' | 'pptx' = rawExt + if (rawExt !== 'docx' && rawExt !== 'pptx' && rawExt !== 'pdf') return null + const ext: 'docx' | 'pptx' | 'pdf' = rawExt const buffer = await fetchWorkspaceFileBuffer(record) const summary = await extractDocumentStyle(buffer, ext) if (!summary) return null From ced273fd3bfa2f62053bbcc6451584b68910f06c Mon Sep 17 00:00:00 2001 From: waleed Date: Fri, 8 May 2026 15:43:40 -0700 Subject: [PATCH 14/16] improvement(style): remove noise from style extraction output --- .../[id]/files/[fileId]/style/route.ts | 6 +- apps/sim/lib/copilot/vfs/document-style.ts | 61 ++++++++++--------- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/apps/sim/app/api/workspaces/[id]/files/[fileId]/style/route.ts b/apps/sim/app/api/workspaces/[id]/files/[fileId]/style/route.ts index 47932c9f4e7..5ba3a5d9070 100644 --- a/apps/sim/app/api/workspaces/[id]/files/[fileId]/style/route.ts +++ b/apps/sim/app/api/workspaces/[id]/files/[fileId]/style/route.ts @@ -72,11 +72,7 @@ export const GET = withRouteHandler( ) } - logger.info('Extracted style summary via API', { - fileId, - format: ext, - themeName: summary.theme?.name, - }) + logger.info('Extracted style summary via API', { fileId, format: ext }) return NextResponse.json(summary, { headers: { 'Cache-Control': 'private, max-age=300' }, diff --git a/apps/sim/lib/copilot/vfs/document-style.ts b/apps/sim/lib/copilot/vfs/document-style.ts index 018c30846b9..08609ffac55 100644 --- a/apps/sim/lib/copilot/vfs/document-style.ts +++ b/apps/sim/lib/copilot/vfs/document-style.ts @@ -17,15 +17,12 @@ interface ThemeColors { accent4: string accent5: string accent6: string - hlink: string - folHlink: string } export interface DocumentStyleSummary { format: 'docx' | 'pptx' | 'pdf' /** OOXML theme — present for pptx; present for docx when theme1.xml exists; absent for pdf */ theme?: { - name: string colors: Partial fonts: { major: string; minor: string } } @@ -44,11 +41,11 @@ export interface DocumentStyleSummary { fontSize?: number font?: string } - /** Page dimensions in points — pdf only */ + /** Page dimensions — pdf only. widthPt/heightPt present only when preset is 'custom' */ pageSize?: { - widthPt: number - heightPt: number preset: 'A4' | 'letter' | 'custom' + widthPt?: number + heightPt?: number } /** Embedded font names extracted from page resource dictionaries — pdf only */ fonts?: string[] @@ -85,7 +82,6 @@ function parseFontScheme(xml: string): { major: string; minor: string } { } function parseThemeXml(xml: string): NonNullable { - const clrSchemeMatch = /]*name="([^"]*)"/.exec(xml) const slots: Array = [ 'dk1', 'lt1', @@ -97,15 +93,13 @@ function parseThemeXml(xml: string): NonNullable 'accent4', 'accent5', 'accent6', - 'hlink', - 'folHlink', ] const colors: Partial = {} for (const slot of slots) { const hex = parseColorSlot(xml, slot) if (hex) colors[slot] = hex } - return { name: clrSchemeMatch?.[1] ?? '', colors, fonts: parseFontScheme(xml) } + return { colors, fonts: parseFontScheme(xml) } } type StyleRaw = { @@ -209,15 +203,8 @@ function parseDocxStyles( } } - // Target styles: fixed set + all Heading* styles found in the document - const targetIds: string[] = [ - 'Normal', - 'DefaultParagraphFont', - 'BodyText', - 'Body Text', - 'Title', - 'Subtitle', - ] + // Target paragraph styles (character styles excluded — generation works at paragraph level) + const targetIds: string[] = ['Normal', 'BodyText', 'Body Text', 'Title', 'Subtitle'] for (const id of styleMap.keys()) { if (id.startsWith('Heading') && !targetIds.includes(id)) targetIds.push(id) } @@ -228,7 +215,7 @@ function parseDocxStyles( if (seen.has(id)) continue seen.add(id) const resolved = resolveInheritance(id) - if (!resolved) continue + if (!resolved || resolved.type !== 'paragraph') continue // Deferred theme font resolution (only reached when themeFonts was unavailable during parse) let resolvedFont = resolved.font @@ -275,7 +262,7 @@ async function extractPdfStyle(buffer: Buffer): Promise() + const rawFontNames = new Set() const pagesToScan = Math.min(pages.length, 10) for (let i = 0; i < pagesToScan; i++) { try { @@ -300,20 +287,38 @@ async function extractPdfStyle(buffer: Buffer): Promise + name + .replace( + /[-]?(BoldMT|BoldItalic|Regular|Bold|Italic|Light|Medium|SemiBold|ExtraBold|Black|Oblique|Condensed|Expanded|MT)$/i, + '' + ) + .trim() + ) + ), + ].filter(Boolean) + + // Omit exact dimensions when the preset already encodes the page size + const pageSize: DocumentStyleSummary['pageSize'] = + preset === 'custom' + ? { widthPt: Math.round(widthPt), heightPt: Math.round(heightPt), preset } + : { preset } + return { format: 'pdf', - pageSize: { - widthPt: Math.round(widthPt), - heightPt: Math.round(heightPt), - preset, - }, - fonts: [...fontNamesSet], + pageSize, + ...(familyNames.length > 0 && { fonts: familyNames }), } } catch (err) { logger.warn('Failed to extract PDF style', { error: toError(err).message }) From 3310321aef5c3321007c89a1ff0908d80e6567fd Mon Sep 17 00:00:00 2001 From: waleed Date: Fri, 8 May 2026 16:02:22 -0700 Subject: [PATCH 15/16] improvement(style-api): fix auth order, add size guard, extract pptx slide meta --- .../[id]/files/[fileId]/style/route.ts | 19 ++++-- apps/sim/lib/api/contracts/workspace-files.ts | 21 ++++++- apps/sim/lib/copilot/vfs/document-style.ts | 59 +++++++++++++++++++ 3 files changed, 91 insertions(+), 8 deletions(-) diff --git a/apps/sim/app/api/workspaces/[id]/files/[fileId]/style/route.ts b/apps/sim/app/api/workspaces/[id]/files/[fileId]/style/route.ts index 5ba3a5d9070..cc68e4dc348 100644 --- a/apps/sim/app/api/workspaces/[id]/files/[fileId]/style/route.ts +++ b/apps/sim/app/api/workspaces/[id]/files/[fileId]/style/route.ts @@ -20,17 +20,19 @@ const logger = createLogger('WorkspaceFileStyleAPI') * OOXML files return theme colors, font pair, and named styles. * PDF files return page dimensions and embedded font names. */ +const MAX_STYLE_FILE_BYTES = 100 * 1024 * 1024 // 100 MB + export const GET = withRouteHandler( async (request: NextRequest, context: { params: Promise<{ id: string; fileId: string }> }) => { - const parsed = await parseRequest(workspaceFileStyleContract, request, context) - if (!parsed.success) return parsed.response - const { id: workspaceId, fileId } = parsed.data.params - const session = await getSession() if (!session?.user?.id) { return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }) } + const parsed = await parseRequest(workspaceFileStyleContract, request, context) + if (!parsed.success) return parsed.response + const { id: workspaceId, fileId } = parsed.data.params + const membership = await verifyWorkspaceMembership(session.user.id, workspaceId) if (!membership) { return NextResponse.json({ error: 'Insufficient permissions' }, { status: 403 }) @@ -50,6 +52,13 @@ export const GET = withRouteHandler( } const ext: 'docx' | 'pptx' | 'pdf' = rawExt + if (fileRecord.size > MAX_STYLE_FILE_BYTES) { + return NextResponse.json( + { error: 'File is too large for style extraction (limit: 100 MB)' }, + { status: 422 } + ) + } + let buffer: Buffer try { buffer = await fetchWorkspaceFileBuffer(fileRecord) @@ -66,7 +75,7 @@ export const GET = withRouteHandler( return NextResponse.json( { error: - 'File is not a compiled binary document — style extraction requires an uploaded or compiled .docx/.pptx file', + 'Could not extract style — file may be encrypted, corrupt, image-only, or contain no parseable style information', }, { status: 422 } ) diff --git a/apps/sim/lib/api/contracts/workspace-files.ts b/apps/sim/lib/api/contracts/workspace-files.ts index 6aae8b2d2df..6f5d1009084 100644 --- a/apps/sim/lib/api/contracts/workspace-files.ts +++ b/apps/sim/lib/api/contracts/workspace-files.ts @@ -107,15 +107,30 @@ export const updateWorkspaceFileContentContract = defineRouteContract({ const documentStyleSummarySchema = z .object({ - format: z.enum(['docx', 'pptx']), + format: z.enum(['docx', 'pptx', 'pdf']), + // OOXML theme — present for pptx, present for docx when theme1.xml exists, absent for pdf theme: z .object({ - name: z.string(), colors: z.record(z.string(), z.string()), fonts: z.object({ major: z.string(), minor: z.string() }), }) - .passthrough(), + .optional(), + // docx only styles: z.array(z.object({}).passthrough()).optional(), + defaults: z.object({ fontSize: z.number().optional(), font: z.string().optional() }).optional(), + // pdf only + pageSize: z + .object({ + preset: z.enum(['A4', 'letter', 'custom']), + widthPt: z.number().optional(), + heightPt: z.number().optional(), + }) + .optional(), + fonts: z.array(z.string()).optional(), + // pptx only + slideCount: z.number().optional(), + aspectRatio: z.enum(['16:9', '4:3', 'custom']).optional(), + background: z.string().optional(), }) .passthrough() diff --git a/apps/sim/lib/copilot/vfs/document-style.ts b/apps/sim/lib/copilot/vfs/document-style.ts index 08609ffac55..e3a699b0cee 100644 --- a/apps/sim/lib/copilot/vfs/document-style.ts +++ b/apps/sim/lib/copilot/vfs/document-style.ts @@ -49,6 +49,12 @@ export interface DocumentStyleSummary { } /** Embedded font names extracted from page resource dictionaries — pdf only */ fonts?: string[] + /** Number of slides — pptx only */ + slideCount?: number + /** Slide aspect ratio — pptx only */ + aspectRatio?: '16:9' | '4:3' | 'custom' + /** Slide master background hex color (no #) — pptx only, absent when background is transparent/image */ + background?: string } function attr(xml: string, name: string): string { @@ -326,6 +332,43 @@ async function extractPdfStyle(buffer: Buffer): Promise', '') + const slideCount = (sldIdLst.match(/]*\bcx="(\d+)"[^>]*\bcy="(\d+)"/.exec(xml) + let aspectRatio: '16:9' | '4:3' | 'custom' = 'custom' + if (sldSzMatch) { + const cx = Number.parseInt(sldSzMatch[1]) + const cy = Number.parseInt(sldSzMatch[2]) + const ratio = cx / cy + // 16:9 ≈ 1.7778 (covers both 9144000×5143500 and 12192000×6858000) + // 4:3 ≈ 1.3333 (9144000×6858000 or 10×7.5 inches) + if (Math.abs(ratio - 16 / 9) < 0.01) aspectRatio = '16:9' + else if (Math.abs(ratio - 4 / 3) < 0.01) aspectRatio = '4:3' + } + + return { slideCount, aspectRatio } +} + +function parseSlideMasterBackground(xml: string): string | undefined { + // Look for a solid fill color in the slide master background + const bgBlock = between(xml, '', '') + if (!bgBlock) return undefined + // solidFill with srgbClr + const srgbMatch = /]*\bval="([A-Fa-f0-9]{6})"/.exec(bgBlock) + if (srgbMatch) return srgbMatch[1].toUpperCase() + // solidFill with sysClr fallback + const sysMatch = /]*\blastClr="([A-Fa-f0-9]{6})"/.exec(bgBlock) + if (sysMatch) return sysMatch[1].toUpperCase() + return undefined +} + /** * Extract a compact style summary from a binary document buffer. * Supports .docx and .pptx (OOXML/ZIP) and .pdf. @@ -373,6 +416,22 @@ export async function extractDocumentStyle( if (!theme && !summary.styles?.length) return null } + if (ext === 'pptx') { + const presFile = zip.file('ppt/presentation.xml') + if (presFile) { + const { slideCount, aspectRatio } = parsePptxPresentation(await presFile.async('string')) + if (slideCount > 0) summary.slideCount = slideCount + summary.aspectRatio = aspectRatio + } + const masterFile = + zip.file('ppt/slideMasters/slideMaster1.xml') ?? + zip.file('ppt/slidemaster/slidemaster1.xml') + if (masterFile) { + const bg = parseSlideMasterBackground(await masterFile.async('string')) + if (bg) summary.background = bg + } + } + return summary } catch (err) { logger.warn('Failed to extract document style from buffer', { error: toError(err).message }) From 8d0ae099f93c718414b1f1fddc1932f7f63f2ee4 Mon Sep 17 00:00:00 2001 From: waleed Date: Fri, 8 May 2026 16:22:11 -0700 Subject: [PATCH 16/16] fix(sandbox): align docx/pptx/pdf task guards, fix bold detection and font suffix stripping --- apps/sim/lib/copilot/vfs/document-style.ts | 32 +++++++++++++--------- apps/sim/lib/copilot/vfs/workspace-vfs.ts | 4 +-- apps/sim/sandbox-tasks/docx-generate.ts | 10 +++++-- apps/sim/sandbox-tasks/pptx-generate.ts | 3 ++ 4 files changed, 32 insertions(+), 17 deletions(-) diff --git a/apps/sim/lib/copilot/vfs/document-style.ts b/apps/sim/lib/copilot/vfs/document-style.ts index e3a699b0cee..eead749a2c9 100644 --- a/apps/sim/lib/copilot/vfs/document-style.ts +++ b/apps/sim/lib/copilot/vfs/document-style.ts @@ -182,7 +182,8 @@ function parseDocxStyles( type, ...(basedOnMatch && { basedOn: basedOnMatch[1] }), ...(szMatch && { fontSize: Math.round(Number.parseInt(szMatch[1]) / 2) }), - ...(//.test(block) && !//.test(block) && + !/]*\bw:val=["'](0|false)["']/.test(block) && { bold: true }), ...(colorMatch && { color: colorMatch[1].toUpperCase() }), ...(font && { font }), ...(themeFont && { themeFont }), @@ -212,7 +213,8 @@ function parseDocxStyles( // Target paragraph styles (character styles excluded — generation works at paragraph level) const targetIds: string[] = ['Normal', 'BodyText', 'Body Text', 'Title', 'Subtitle'] for (const id of styleMap.keys()) { - if (id.startsWith('Heading') && !targetIds.includes(id)) targetIds.push(id) + // Match both 'Heading1' (Office) and 'heading1' (LibreOffice) style IDs + if (/^[Hh]eading\d/.test(id) && !targetIds.includes(id)) targetIds.push(id) } const styles: NonNullable = [] @@ -299,19 +301,23 @@ async function extractPdfStyle(buffer: Buffer): Promise - name - .replace( - /[-]?(BoldMT|BoldItalic|Regular|Bold|Italic|Light|Medium|SemiBold|ExtraBold|Black|Oblique|Condensed|Expanded|MT)$/i, - '' - ) - .trim() - ) + [...rawFontNames].map((name) => { + let n = name + // Strip up to 3 suffix components to handle compound PostScript names + for (let i = 0; i < 3; i++) { + const stripped = n.replace(SUFFIX_RX, '').trim() + if (stripped === n) break + n = stripped + } + return n + }) ), ].filter(Boolean) diff --git a/apps/sim/lib/copilot/vfs/workspace-vfs.ts b/apps/sim/lib/copilot/vfs/workspace-vfs.ts index c23d66889c2..6e5cd70bb7d 100644 --- a/apps/sim/lib/copilot/vfs/workspace-vfs.ts +++ b/apps/sim/lib/copilot/vfs/workspace-vfs.ts @@ -316,7 +316,7 @@ function getStaticComponentFiles(): Map { * tables/{name}/meta.json * files/{name}/meta.json * files/by-id/{id}/meta.json - * files/by-id/{id}/style (dynamic — OOXML theme/font extraction for .docx/.pptx) + * files/by-id/{id}/style (dynamic — style extraction for .docx/.pptx/.pdf) * files/by-id/{id}/compiled-check (dynamic — compile generated source / validate diagrams, returns {ok,error?}) * jobs/{title}/meta.json * jobs/{title}/history.json @@ -457,7 +457,7 @@ export class WorkspaceVFS { * Attempt to read dynamic workspace file content from storage. * Handles images (base64), parseable documents (PDF, etc.), and text files. * Also handles: - * `files/by-id/{id}/style` — OOXML theme/style extraction (.docx / .pptx only) + * `files/by-id/{id}/style` — style extraction (.docx / .pptx / .pdf) * `files/by-id/{id}/compiled-check` — compile JS-source binary files or validate Mermaid diagrams * Returns null if the path doesn't match `files/{name}` / `files/by-id/{id}` or the file isn't found. */ diff --git a/apps/sim/sandbox-tasks/docx-generate.ts b/apps/sim/sandbox-tasks/docx-generate.ts index 214b9f8f41f..d93954d923c 100644 --- a/apps/sim/sandbox-tasks/docx-generate.ts +++ b/apps/sim/sandbox-tasks/docx-generate.ts @@ -15,6 +15,9 @@ export const docxGenerateTask = defineSandboxTask({ globalThis.addSection = (section) => { globalThis.__docxSections.push(section); }; + // Set globalThis.__docxDocOptions = { styles: {...}, numbering: {...} } in chunk 1 + // to configure document-wide styles and numbering in chunked (addSection) mode. + globalThis.__docxDocOptions = null; // Page geometry constants (twips, 1 twip = 1/1440 inch) for US Letter globalThis.PAGE_W = 12240; // 8.5" @@ -79,10 +82,13 @@ export const docxGenerateTask = defineSandboxTask({ finalize: ` let doc = globalThis.doc; if (!doc && globalThis.__docxSections.length > 0) { - doc = new globalThis.docx.Document({ sections: globalThis.__docxSections }); + doc = new globalThis.docx.Document({ + ...(globalThis.__docxDocOptions || {}), + sections: globalThis.__docxSections, + }); } if (!doc) { - throw new Error('No document created. Use addSection({ children: [...] }) for chunked writes, or set doc = new docx.Document({...}) for a single write.'); + throw new Error('No document created. Use addSection({ children: [...] }) for chunked writes, or set globalThis.doc = new docx.Document({...}) for a single write.'); } const b64 = await globalThis.docx.Packer.toBase64String(doc); const alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'; diff --git a/apps/sim/sandbox-tasks/pptx-generate.ts b/apps/sim/sandbox-tasks/pptx-generate.ts index 986954da8d6..8319eb37948 100644 --- a/apps/sim/sandbox-tasks/pptx-generate.ts +++ b/apps/sim/sandbox-tasks/pptx-generate.ts @@ -60,6 +60,9 @@ export const pptxGenerateTask = defineSandboxTask({ }; `, finalize: ` + if (!globalThis.pptx) { + throw new Error('No presentation found. Do not overwrite globalThis.pptx — call globalThis.pptx.addSlide() directly.'); + } const bytes = await globalThis.pptx.write({ outputType: 'uint8array' }); return bytes; `,