Compare commits

...

3 Commits

Author SHA1 Message Date
Peter Steinberger
1314605b3a fix: restrict local media reads to workspace/media (#4880) (thanks @evanotero) 2026-01-31 03:43:50 +01:00
Evan Otero
0f7ed4213f Lint: remove unused variable hasValidMediaOnLine 2026-01-31 03:27:09 +01:00
Evan Otero
57ce1fe0ec Media: restrict local path extraction to prevent LFI 2026-01-31 03:27:09 +01:00
12 changed files with 166 additions and 44 deletions

View File

@@ -74,6 +74,7 @@ Status: stable.
- **BREAKING:** Gateway auth mode "none" is removed; gateway now requires token/password (Tailscale Serve identity still allowed).
### Fixes
- Security: restrict local media reads to workspace and `~/.openclaw/media`; store node captures + TTS output in the media directory. (#4880) Thanks @evanotero.
- Infra: resolve Control UI assets for npm global installs. (#4909) Thanks @YuriNachos.
- Gateway: prevent blank token prompts from storing "undefined". (#4873) Thanks @Hisleren.
- Telegram: use undici fetch for per-account proxy dispatcher. (#4456) Thanks @spiceoogway.

View File

@@ -2530,7 +2530,7 @@ Start the Gateway with `--verbose` to get more console detail. Then inspect the
### My skill generated an imagePDF but nothing was sent
Outbound attachments from the agent must include a `MEDIA:<path-or-url>` line (on its own line). See [OpenClaw assistant setup](/start/openclaw) and [Agent send](/tools/agent-send).
Outbound attachments from the agent must include a `MEDIA:<path-or-url>` line (on its own line). Local paths must live under the agent workspace or `~/.openclaw/media`. See [OpenClaw assistant setup](/start/openclaw) and [Agent send](/tools/agent-send).
CLI sending:
@@ -2541,6 +2541,7 @@ openclaw message send --target +15555550123 --message "Here you go" --media /pat
Also check:
- The target channel supports outbound media and isnt blocked by allowlists.
- The file is within the providers size limits (images are resized to max 2048px).
- CLI runs are trusted, so local paths are allowed.
See [Images](/nodes/images).

View File

@@ -202,11 +202,11 @@ Inbound attachments (images/audio/docs) can be surfaced to your command via temp
- `{{MediaUrl}}` (pseudo-URL)
- `{{Transcript}}` (if audio transcription is enabled)
Outbound attachments from the agent: include `MEDIA:<path-or-url>` on its own line (no spaces). Example:
Outbound attachments from the agent: include `MEDIA:<path-or-url>` on its own line (no spaces). Local paths must live under the agent workspace or `~/.openclaw/media`. Example:
```
Heres the screenshot.
MEDIA:/tmp/screenshot.png
MEDIA:~/.openclaw/media/inbound/screenshot.png
```
OpenClaw extracts these and sends them as media alongside the text.

View File

@@ -397,7 +397,9 @@ export function createImageTool(options?: {
const media = isDataUrl
? decodeDataUrl(resolvedImage)
: await loadWebMedia(resolvedPath ?? resolvedImage, maxBytes);
: await loadWebMedia(resolvedPath ?? resolvedImage, maxBytes, {
localRoots: sandboxRoot ? [sandboxRoot] : undefined,
});
if (media.kind !== "image") {
throw new Error(`Unsupported media type: ${media.kind}`);
}

View File

@@ -1,24 +1,20 @@
import crypto from "node:crypto";
import path from "node:path";
import type { AgentToolResult } from "@mariozechner/pi-agent-core";
import { Type } from "@sinclair/typebox";
import {
type CameraFacing,
cameraTempPath,
parseCameraClipPayload,
parseCameraSnapPayload,
writeBase64ToFile,
} from "../../cli/nodes-camera.js";
import { parseEnvPairs, parseTimeoutMs } from "../../cli/nodes-run.js";
import {
parseScreenRecordPayload,
screenRecordTempPath,
writeScreenRecordToFile,
} from "../../cli/nodes-screen.js";
import { parseScreenRecordPayload } from "../../cli/nodes-screen.js";
import { parseDurationMs } from "../../cli/parse-duration.js";
import type { OpenClawConfig } from "../../config/config.js";
import { imageMimeFromFormat } from "../../media/mime.js";
import { saveMediaBuffer } from "../../media/store.js";
import { resolveSessionAgentId } from "../agent-scope.js";
import { optionalStringEnum, stringEnum } from "../schema/typebox.js";
import { sanitizeToolResultImages } from "../tool-images.js";
@@ -223,12 +219,17 @@ export function createNodesTool(options?: {
}
const isJpeg = normalizedFormat === "jpg" || normalizedFormat === "jpeg";
const filePath = cameraTempPath({
kind: "snap",
facing,
ext: isJpeg ? "jpg" : "png",
});
await writeBase64ToFile(filePath, payload.base64);
const buffer = Buffer.from(payload.base64, "base64");
const mimeType =
imageMimeFromFormat(payload.format) ?? (isJpeg ? "image/jpeg" : "image/png");
const saved = await saveMediaBuffer(
buffer,
mimeType,
"nodes",
Math.max(buffer.length, 1),
`camera-${facing}.${isJpeg ? "jpg" : "png"}`,
);
const filePath = saved.path;
content.push({ type: "text", text: `MEDIA:${filePath}` });
content.push({
type: "image",
@@ -293,12 +294,17 @@ export function createNodesTool(options?: {
idempotencyKey: crypto.randomUUID(),
})) as { payload?: unknown };
const payload = parseCameraClipPayload(raw?.payload);
const filePath = cameraTempPath({
kind: "clip",
facing,
ext: payload.format,
});
await writeBase64ToFile(filePath, payload.base64);
const buffer = Buffer.from(payload.base64, "base64");
const format = payload.format.toLowerCase();
const contentType = format === "mp4" ? "video/mp4" : undefined;
const saved = await saveMediaBuffer(
buffer,
contentType,
"nodes",
Math.max(buffer.length, 1),
`camera-clip-${facing}.${format || "mp4"}`,
);
const filePath = saved.path;
return {
content: [{ type: "text", text: `FILE:${filePath}` }],
details: {
@@ -339,15 +345,26 @@ export function createNodesTool(options?: {
idempotencyKey: crypto.randomUUID(),
})) as { payload?: unknown };
const payload = parseScreenRecordPayload(raw?.payload);
const filePath =
const buffer = Buffer.from(payload.base64, "base64");
const format = payload.format.toLowerCase() || "mp4";
const contentType = format === "mp4" ? "video/mp4" : undefined;
const outPath =
typeof params.outPath === "string" && params.outPath.trim()
? params.outPath.trim()
: screenRecordTempPath({ ext: payload.format || "mp4" });
const written = await writeScreenRecordToFile(filePath, payload.base64);
: "";
const fileName = outPath ? path.basename(outPath) : `screen-record.${format}`;
const saved = await saveMediaBuffer(
buffer,
contentType,
"nodes",
Math.max(buffer.length, 1),
fileName,
);
const filePath = saved.path;
return {
content: [{ type: "text", text: `FILE:${written.path}` }],
content: [{ type: "text", text: `FILE:${filePath}` }],
details: {
path: written.path,
path: filePath,
durationMs: payload.durationMs,
fps: payload.fps,
screenIndex: payload.screenIndex,

View File

@@ -248,7 +248,7 @@ export async function runPreparedReply(
const prefixedBody = [threadStarterNote, prefixedBodyBase].filter(Boolean).join("\n\n");
const mediaNote = buildInboundMediaNote(ctx);
const mediaReplyHint = mediaNote
? "To send an image back, prefer the message tool (media/path/filePath). If you must inline, use MEDIA:/path or MEDIA:https://example.com/image.jpg (spaces ok, quote if needed). Keep caption in the text body."
? "To send an image back, prefer the message tool (media/path/filePath). If you must inline, use MEDIA:/path or MEDIA:https://example.com/image.jpg (spaces ok, quote if needed). Local paths must be inside the agent workspace or ~/.openclaw/media. Keep caption in the text body."
: undefined;
let prefixedCommandBody = mediaNote
? [mediaNote, mediaReplyHint, prefixedBody ?? ""].filter(Boolean).join("\n").trim()

View File

@@ -27,6 +27,7 @@ export async function runCli(argv: string[] = process.argv) {
const normalizedArgv = stripWindowsNodeExec(argv);
loadDotEnv({ quiet: true });
normalizeEnv();
process.env.OPENCLAW_MEDIA_ALLOW_ANY_LOCAL = "1";
ensureOpenClawCliOnPath();
// Enforce the minimum supported runtime before doing any work.

21
src/media/local-roots.ts Normal file
View File

@@ -0,0 +1,21 @@
import path from "node:path";
import type { OpenClawConfig } from "../config/config.js";
import { listAgentIds, resolveAgentWorkspaceDir } from "../agents/agent-scope.js";
import { resolveSandboxConfigForAgent } from "../agents/sandbox/config.js";
import { resolveConfigDir, resolveUserPath } from "../utils.js";
export function resolveMediaLocalRoots(cfg: OpenClawConfig): string[] {
const roots = new Set<string>();
roots.add(path.join(resolveConfigDir(), "media"));
for (const agentId of listAgentIds(cfg)) {
roots.add(resolveAgentWorkspaceDir(cfg, agentId));
const sandboxRoot = resolveSandboxConfigForAgent(cfg, agentId).workspaceRoot;
if (sandboxRoot) {
roots.add(resolveUserPath(sandboxRoot));
}
}
return Array.from(roots);
}

View File

@@ -85,10 +85,8 @@ export function splitMediaFromOutput(raw: string): {
continue;
}
foundMediaToken = true;
const pieces: string[] = [];
let cursor = 0;
let hasValidMedia = false;
for (const match of matches) {
const start = match.index ?? 0;
@@ -101,11 +99,13 @@ export function splitMediaFromOutput(raw: string): {
const mediaStartIndex = media.length;
let validCount = 0;
const invalidParts: string[] = [];
let hasValidMedia = false;
for (const part of parts) {
const candidate = normalizeMediaSource(cleanCandidate(part));
if (isValidMedia(candidate, unwrapped ? { allowSpaces: true } : undefined)) {
media.push(candidate);
hasValidMedia = true;
foundMediaToken = true;
validCount += 1;
} else {
invalidParts.push(part);
@@ -130,6 +130,7 @@ export function splitMediaFromOutput(raw: string): {
if (isValidMedia(fallback, { allowSpaces: true })) {
media.splice(mediaStartIndex, media.length - mediaStartIndex, fallback);
hasValidMedia = true;
foundMediaToken = true;
validCount = 1;
invalidParts.length = 0;
}
@@ -140,12 +141,18 @@ export function splitMediaFromOutput(raw: string): {
if (isValidMedia(fallback, { allowSpaces: true })) {
media.push(fallback);
hasValidMedia = true;
foundMediaToken = true;
invalidParts.length = 0;
}
}
if (hasValidMedia && invalidParts.length > 0) {
pieces.push(invalidParts.join(" "));
if (hasValidMedia) {
if (invalidParts.length > 0) {
pieces.push(invalidParts.join(" "));
}
} else {
// If no valid media was found in this match, keep the original token text.
pieces.push(match[0]);
}
cursor = start + match[0].length;

View File

@@ -27,6 +27,7 @@ import type {
} from "../config/types.tts.js";
import { logVerbose } from "../globals.js";
import { isVoiceCompatibleAudio } from "../media/audio.js";
import { saveMediaSource } from "../media/store.js";
import { CONFIG_DIR, resolveUserPath } from "../utils.js";
import { getApiKeyForModel, requireApiKey } from "../agents/model-auth.js";
import {
@@ -1160,12 +1161,18 @@ export async function textToSpeech(params: {
}
}
scheduleCleanup(tempDir);
const voiceCompatible = isVoiceCompatibleAudio({ fileName: edgeResult.audioPath });
let savedPath = edgeResult.audioPath;
try {
const saved = await saveMediaSource(edgeResult.audioPath, undefined, "tts");
savedPath = saved.path;
} finally {
scheduleCleanup(tempDir);
}
const voiceCompatible = isVoiceCompatibleAudio({ fileName: savedPath });
return {
success: true,
audioPath: edgeResult.audioPath,
audioPath: savedPath,
latencyMs: Date.now() - providerStart,
provider,
outputFormat: edgeResult.outputFormat,
@@ -1221,11 +1228,17 @@ export async function textToSpeech(params: {
const tempDir = mkdtempSync(path.join(tmpdir(), "tts-"));
const audioPath = path.join(tempDir, `voice-${Date.now()}${output.extension}`);
writeFileSync(audioPath, audioBuffer);
scheduleCleanup(tempDir);
let savedPath = audioPath;
try {
const saved = await saveMediaSource(audioPath, undefined, "tts");
savedPath = saved.path;
} finally {
scheduleCleanup(tempDir);
}
return {
success: true,
audioPath,
audioPath: savedPath,
latencyMs,
provider,
outputFormat: provider === "openai" ? output.openai : output.elevenlabs,

View File

@@ -51,7 +51,7 @@ describe("web media loading", () => {
const file = await writeTempFile(buffer, ".jpg");
const cap = Math.floor(buffer.length * 0.8);
const result = await loadWebMedia(file, cap);
const result = await loadWebMedia(file, cap, { localRoots: [path.dirname(file)] });
expect(result.kind).toBe("image");
expect(result.buffer.length).toBeLessThanOrEqual(cap);
@@ -66,12 +66,32 @@ describe("web media loading", () => {
.toBuffer();
const wrongExt = await writeTempFile(pngBuffer, ".bin");
const result = await loadWebMedia(wrongExt, 1024 * 1024);
const result = await loadWebMedia(wrongExt, 1024 * 1024, {
localRoots: [path.dirname(wrongExt)],
});
expect(result.kind).toBe("image");
expect(result.contentType).toBe("image/jpeg");
});
it("rejects local files outside allowed roots", async () => {
const allowedDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-media-root-"));
const allowedFile = path.join(allowedDir, "ok.txt");
await fs.writeFile(allowedFile, Buffer.from("ok"));
const blockedFile = await writeTempFile(Buffer.from("nope"), ".txt");
const okResult = await loadWebMedia(allowedFile, 1024 * 1024, {
localRoots: [allowedDir],
});
expect(okResult.buffer.length).toBeGreaterThan(0);
await expect(
loadWebMedia(blockedFile, 1024 * 1024, { localRoots: [allowedDir] }),
).rejects.toThrow(/outside allowed roots/i);
await fs.rm(allowedDir, { recursive: true, force: true });
});
it("adds extension to URL fileName when missing", async () => {
const fetchMock = vi.spyOn(globalThis, "fetch").mockResolvedValueOnce({
ok: true,
@@ -169,7 +189,7 @@ describe("web media loading", () => {
const file = await writeTempFile(gifBuffer, ".gif");
const result = await loadWebMedia(file, 1024 * 1024);
const result = await loadWebMedia(file, 1024 * 1024, { localRoots: [path.dirname(file)] });
expect(result.kind).toBe("image");
expect(result.contentType).toBe("image/gif");
@@ -215,7 +235,7 @@ describe("web media loading", () => {
const file = await writeTempFile(buffer, ".png");
const result = await loadWebMedia(file, 1024 * 1024);
const result = await loadWebMedia(file, 1024 * 1024, { localRoots: [path.dirname(file)] });
expect(result.kind).toBe("image");
expect(result.contentType).toBe("image/png");
@@ -255,7 +275,7 @@ describe("web media loading", () => {
const file = await writeTempFile(pngBuffer, ".png");
const result = await loadWebMedia(file, cap);
const result = await loadWebMedia(file, cap, { localRoots: [path.dirname(file)] });
expect(result.kind).toBe("image");
expect(result.contentType).toBe("image/jpeg");

View File

@@ -2,8 +2,10 @@ import fs from "node:fs/promises";
import path from "node:path";
import { fileURLToPath } from "node:url";
import { loadConfig } from "../config/config.js";
import { logVerbose, shouldLogVerbose } from "../globals.js";
import { type MediaKind, maxBytesForKind, mediaKindFromMime } from "../media/constants.js";
import { resolveMediaLocalRoots } from "../media/local-roots.js";
import { resolveUserPath } from "../utils.js";
import { fetchRemoteMedia } from "../media/fetch.js";
import {
@@ -13,6 +15,7 @@ import {
resizeToJpeg,
} from "../media/image-ops.js";
import { detectMime, extensionForMime } from "../media/mime.js";
import { assertSandboxPath } from "../agents/sandbox-paths.js";
export type WebMediaResult = {
buffer: Buffer;
@@ -24,6 +27,8 @@ export type WebMediaResult = {
type WebMediaOptions = {
maxBytes?: number;
optimizeImages?: boolean;
allowAnyLocal?: boolean;
localRoots?: string[];
};
const HEIC_MIME_RE = /^image\/hei[cf]$/i;
@@ -107,6 +112,29 @@ async function optimizeImageWithFallback(params: {
return { ...optimized, format: "jpeg" };
}
function resolveAllowedLocalRoots(options: WebMediaOptions): string[] | null {
if (options.allowAnyLocal || process.env.OPENCLAW_MEDIA_ALLOW_ANY_LOCAL === "1") {
return null;
}
if (Array.isArray(options.localRoots) && options.localRoots.length > 0) {
return options.localRoots;
}
return resolveMediaLocalRoots(loadConfig());
}
async function resolveLocalMediaPath(mediaUrl: string, roots: string[]): Promise<string> {
const errors: string[] = [];
for (const root of roots) {
try {
const validated = await assertSandboxPath({ filePath: mediaUrl, cwd: root, root });
return validated.resolved;
} catch (err) {
errors.push(err instanceof Error ? err.message : String(err));
}
}
throw new Error(`Local media path is outside allowed roots. Checked ${roots.length} root(s).`);
}
async function loadWebMediaInternal(
mediaUrl: string,
options: WebMediaOptions = {},
@@ -201,6 +229,10 @@ async function loadWebMediaInternal(
}
// Local path
const allowedRoots = resolveAllowedLocalRoots(options);
if (allowedRoots && allowedRoots.length > 0) {
mediaUrl = await resolveLocalMediaPath(mediaUrl, allowedRoots);
}
const data = await fs.readFile(mediaUrl);
const mime = await detectMime({ buffer: data, filePath: mediaUrl });
const kind = mediaKindFromMime(mime);
@@ -217,8 +249,13 @@ async function loadWebMediaInternal(
});
}
export async function loadWebMedia(mediaUrl: string, maxBytes?: number): Promise<WebMediaResult> {
export async function loadWebMedia(
mediaUrl: string,
maxBytes?: number,
options: Omit<WebMediaOptions, "maxBytes" | "optimizeImages"> = {},
): Promise<WebMediaResult> {
return await loadWebMediaInternal(mediaUrl, {
...options,
maxBytes,
optimizeImages: true,
});
@@ -227,8 +264,10 @@ export async function loadWebMedia(mediaUrl: string, maxBytes?: number): Promise
export async function loadWebMediaRaw(
mediaUrl: string,
maxBytes?: number,
options: Omit<WebMediaOptions, "maxBytes" | "optimizeImages"> = {},
): Promise<WebMediaResult> {
return await loadWebMediaInternal(mediaUrl, {
...options,
maxBytes,
optimizeImages: false,
});