Compare commits
3 Commits
dev/ci
...
fix/lfi-me
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1314605b3a | ||
|
|
0f7ed4213f | ||
|
|
57ce1fe0ec |
@@ -74,6 +74,7 @@ Status: stable.
|
||||
- **BREAKING:** Gateway auth mode "none" is removed; gateway now requires token/password (Tailscale Serve identity still allowed).
|
||||
|
||||
### Fixes
|
||||
- Security: restrict local media reads to workspace and `~/.openclaw/media`; store node captures + TTS output in the media directory. (#4880) Thanks @evanotero.
|
||||
- Infra: resolve Control UI assets for npm global installs. (#4909) Thanks @YuriNachos.
|
||||
- Gateway: prevent blank token prompts from storing "undefined". (#4873) Thanks @Hisleren.
|
||||
- Telegram: use undici fetch for per-account proxy dispatcher. (#4456) Thanks @spiceoogway.
|
||||
|
||||
@@ -2530,7 +2530,7 @@ Start the Gateway with `--verbose` to get more console detail. Then inspect the
|
||||
|
||||
### My skill generated an imagePDF but nothing was sent
|
||||
|
||||
Outbound attachments from the agent must include a `MEDIA:<path-or-url>` line (on its own line). See [OpenClaw assistant setup](/start/openclaw) and [Agent send](/tools/agent-send).
|
||||
Outbound attachments from the agent must include a `MEDIA:<path-or-url>` line (on its own line). Local paths must live under the agent workspace or `~/.openclaw/media`. See [OpenClaw assistant setup](/start/openclaw) and [Agent send](/tools/agent-send).
|
||||
|
||||
CLI sending:
|
||||
|
||||
@@ -2541,6 +2541,7 @@ openclaw message send --target +15555550123 --message "Here you go" --media /pat
|
||||
Also check:
|
||||
- The target channel supports outbound media and isn’t blocked by allowlists.
|
||||
- The file is within the provider’s size limits (images are resized to max 2048px).
|
||||
- CLI runs are trusted, so local paths are allowed.
|
||||
|
||||
See [Images](/nodes/images).
|
||||
|
||||
|
||||
@@ -202,11 +202,11 @@ Inbound attachments (images/audio/docs) can be surfaced to your command via temp
|
||||
- `{{MediaUrl}}` (pseudo-URL)
|
||||
- `{{Transcript}}` (if audio transcription is enabled)
|
||||
|
||||
Outbound attachments from the agent: include `MEDIA:<path-or-url>` on its own line (no spaces). Example:
|
||||
Outbound attachments from the agent: include `MEDIA:<path-or-url>` on its own line (no spaces). Local paths must live under the agent workspace or `~/.openclaw/media`. Example:
|
||||
|
||||
```
|
||||
Here’s the screenshot.
|
||||
MEDIA:/tmp/screenshot.png
|
||||
MEDIA:~/.openclaw/media/inbound/screenshot.png
|
||||
```
|
||||
|
||||
OpenClaw extracts these and sends them as media alongside the text.
|
||||
|
||||
@@ -397,7 +397,9 @@ export function createImageTool(options?: {
|
||||
|
||||
const media = isDataUrl
|
||||
? decodeDataUrl(resolvedImage)
|
||||
: await loadWebMedia(resolvedPath ?? resolvedImage, maxBytes);
|
||||
: await loadWebMedia(resolvedPath ?? resolvedImage, maxBytes, {
|
||||
localRoots: sandboxRoot ? [sandboxRoot] : undefined,
|
||||
});
|
||||
if (media.kind !== "image") {
|
||||
throw new Error(`Unsupported media type: ${media.kind}`);
|
||||
}
|
||||
|
||||
@@ -1,24 +1,20 @@
|
||||
import crypto from "node:crypto";
|
||||
import path from "node:path";
|
||||
|
||||
import type { AgentToolResult } from "@mariozechner/pi-agent-core";
|
||||
import { Type } from "@sinclair/typebox";
|
||||
|
||||
import {
|
||||
type CameraFacing,
|
||||
cameraTempPath,
|
||||
parseCameraClipPayload,
|
||||
parseCameraSnapPayload,
|
||||
writeBase64ToFile,
|
||||
} from "../../cli/nodes-camera.js";
|
||||
import { parseEnvPairs, parseTimeoutMs } from "../../cli/nodes-run.js";
|
||||
import {
|
||||
parseScreenRecordPayload,
|
||||
screenRecordTempPath,
|
||||
writeScreenRecordToFile,
|
||||
} from "../../cli/nodes-screen.js";
|
||||
import { parseScreenRecordPayload } from "../../cli/nodes-screen.js";
|
||||
import { parseDurationMs } from "../../cli/parse-duration.js";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import { imageMimeFromFormat } from "../../media/mime.js";
|
||||
import { saveMediaBuffer } from "../../media/store.js";
|
||||
import { resolveSessionAgentId } from "../agent-scope.js";
|
||||
import { optionalStringEnum, stringEnum } from "../schema/typebox.js";
|
||||
import { sanitizeToolResultImages } from "../tool-images.js";
|
||||
@@ -223,12 +219,17 @@ export function createNodesTool(options?: {
|
||||
}
|
||||
|
||||
const isJpeg = normalizedFormat === "jpg" || normalizedFormat === "jpeg";
|
||||
const filePath = cameraTempPath({
|
||||
kind: "snap",
|
||||
facing,
|
||||
ext: isJpeg ? "jpg" : "png",
|
||||
});
|
||||
await writeBase64ToFile(filePath, payload.base64);
|
||||
const buffer = Buffer.from(payload.base64, "base64");
|
||||
const mimeType =
|
||||
imageMimeFromFormat(payload.format) ?? (isJpeg ? "image/jpeg" : "image/png");
|
||||
const saved = await saveMediaBuffer(
|
||||
buffer,
|
||||
mimeType,
|
||||
"nodes",
|
||||
Math.max(buffer.length, 1),
|
||||
`camera-${facing}.${isJpeg ? "jpg" : "png"}`,
|
||||
);
|
||||
const filePath = saved.path;
|
||||
content.push({ type: "text", text: `MEDIA:${filePath}` });
|
||||
content.push({
|
||||
type: "image",
|
||||
@@ -293,12 +294,17 @@ export function createNodesTool(options?: {
|
||||
idempotencyKey: crypto.randomUUID(),
|
||||
})) as { payload?: unknown };
|
||||
const payload = parseCameraClipPayload(raw?.payload);
|
||||
const filePath = cameraTempPath({
|
||||
kind: "clip",
|
||||
facing,
|
||||
ext: payload.format,
|
||||
});
|
||||
await writeBase64ToFile(filePath, payload.base64);
|
||||
const buffer = Buffer.from(payload.base64, "base64");
|
||||
const format = payload.format.toLowerCase();
|
||||
const contentType = format === "mp4" ? "video/mp4" : undefined;
|
||||
const saved = await saveMediaBuffer(
|
||||
buffer,
|
||||
contentType,
|
||||
"nodes",
|
||||
Math.max(buffer.length, 1),
|
||||
`camera-clip-${facing}.${format || "mp4"}`,
|
||||
);
|
||||
const filePath = saved.path;
|
||||
return {
|
||||
content: [{ type: "text", text: `FILE:${filePath}` }],
|
||||
details: {
|
||||
@@ -339,15 +345,26 @@ export function createNodesTool(options?: {
|
||||
idempotencyKey: crypto.randomUUID(),
|
||||
})) as { payload?: unknown };
|
||||
const payload = parseScreenRecordPayload(raw?.payload);
|
||||
const filePath =
|
||||
const buffer = Buffer.from(payload.base64, "base64");
|
||||
const format = payload.format.toLowerCase() || "mp4";
|
||||
const contentType = format === "mp4" ? "video/mp4" : undefined;
|
||||
const outPath =
|
||||
typeof params.outPath === "string" && params.outPath.trim()
|
||||
? params.outPath.trim()
|
||||
: screenRecordTempPath({ ext: payload.format || "mp4" });
|
||||
const written = await writeScreenRecordToFile(filePath, payload.base64);
|
||||
: "";
|
||||
const fileName = outPath ? path.basename(outPath) : `screen-record.${format}`;
|
||||
const saved = await saveMediaBuffer(
|
||||
buffer,
|
||||
contentType,
|
||||
"nodes",
|
||||
Math.max(buffer.length, 1),
|
||||
fileName,
|
||||
);
|
||||
const filePath = saved.path;
|
||||
return {
|
||||
content: [{ type: "text", text: `FILE:${written.path}` }],
|
||||
content: [{ type: "text", text: `FILE:${filePath}` }],
|
||||
details: {
|
||||
path: written.path,
|
||||
path: filePath,
|
||||
durationMs: payload.durationMs,
|
||||
fps: payload.fps,
|
||||
screenIndex: payload.screenIndex,
|
||||
|
||||
@@ -248,7 +248,7 @@ export async function runPreparedReply(
|
||||
const prefixedBody = [threadStarterNote, prefixedBodyBase].filter(Boolean).join("\n\n");
|
||||
const mediaNote = buildInboundMediaNote(ctx);
|
||||
const mediaReplyHint = mediaNote
|
||||
? "To send an image back, prefer the message tool (media/path/filePath). If you must inline, use MEDIA:/path or MEDIA:https://example.com/image.jpg (spaces ok, quote if needed). Keep caption in the text body."
|
||||
? "To send an image back, prefer the message tool (media/path/filePath). If you must inline, use MEDIA:/path or MEDIA:https://example.com/image.jpg (spaces ok, quote if needed). Local paths must be inside the agent workspace or ~/.openclaw/media. Keep caption in the text body."
|
||||
: undefined;
|
||||
let prefixedCommandBody = mediaNote
|
||||
? [mediaNote, mediaReplyHint, prefixedBody ?? ""].filter(Boolean).join("\n").trim()
|
||||
|
||||
@@ -27,6 +27,7 @@ export async function runCli(argv: string[] = process.argv) {
|
||||
const normalizedArgv = stripWindowsNodeExec(argv);
|
||||
loadDotEnv({ quiet: true });
|
||||
normalizeEnv();
|
||||
process.env.OPENCLAW_MEDIA_ALLOW_ANY_LOCAL = "1";
|
||||
ensureOpenClawCliOnPath();
|
||||
|
||||
// Enforce the minimum supported runtime before doing any work.
|
||||
|
||||
21
src/media/local-roots.ts
Normal file
21
src/media/local-roots.ts
Normal file
@@ -0,0 +1,21 @@
|
||||
import path from "node:path";
|
||||
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import { listAgentIds, resolveAgentWorkspaceDir } from "../agents/agent-scope.js";
|
||||
import { resolveSandboxConfigForAgent } from "../agents/sandbox/config.js";
|
||||
import { resolveConfigDir, resolveUserPath } from "../utils.js";
|
||||
|
||||
export function resolveMediaLocalRoots(cfg: OpenClawConfig): string[] {
|
||||
const roots = new Set<string>();
|
||||
roots.add(path.join(resolveConfigDir(), "media"));
|
||||
|
||||
for (const agentId of listAgentIds(cfg)) {
|
||||
roots.add(resolveAgentWorkspaceDir(cfg, agentId));
|
||||
const sandboxRoot = resolveSandboxConfigForAgent(cfg, agentId).workspaceRoot;
|
||||
if (sandboxRoot) {
|
||||
roots.add(resolveUserPath(sandboxRoot));
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(roots);
|
||||
}
|
||||
@@ -85,10 +85,8 @@ export function splitMediaFromOutput(raw: string): {
|
||||
continue;
|
||||
}
|
||||
|
||||
foundMediaToken = true;
|
||||
const pieces: string[] = [];
|
||||
let cursor = 0;
|
||||
let hasValidMedia = false;
|
||||
|
||||
for (const match of matches) {
|
||||
const start = match.index ?? 0;
|
||||
@@ -101,11 +99,13 @@ export function splitMediaFromOutput(raw: string): {
|
||||
const mediaStartIndex = media.length;
|
||||
let validCount = 0;
|
||||
const invalidParts: string[] = [];
|
||||
let hasValidMedia = false;
|
||||
for (const part of parts) {
|
||||
const candidate = normalizeMediaSource(cleanCandidate(part));
|
||||
if (isValidMedia(candidate, unwrapped ? { allowSpaces: true } : undefined)) {
|
||||
media.push(candidate);
|
||||
hasValidMedia = true;
|
||||
foundMediaToken = true;
|
||||
validCount += 1;
|
||||
} else {
|
||||
invalidParts.push(part);
|
||||
@@ -130,6 +130,7 @@ export function splitMediaFromOutput(raw: string): {
|
||||
if (isValidMedia(fallback, { allowSpaces: true })) {
|
||||
media.splice(mediaStartIndex, media.length - mediaStartIndex, fallback);
|
||||
hasValidMedia = true;
|
||||
foundMediaToken = true;
|
||||
validCount = 1;
|
||||
invalidParts.length = 0;
|
||||
}
|
||||
@@ -140,12 +141,18 @@ export function splitMediaFromOutput(raw: string): {
|
||||
if (isValidMedia(fallback, { allowSpaces: true })) {
|
||||
media.push(fallback);
|
||||
hasValidMedia = true;
|
||||
foundMediaToken = true;
|
||||
invalidParts.length = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (hasValidMedia && invalidParts.length > 0) {
|
||||
pieces.push(invalidParts.join(" "));
|
||||
if (hasValidMedia) {
|
||||
if (invalidParts.length > 0) {
|
||||
pieces.push(invalidParts.join(" "));
|
||||
}
|
||||
} else {
|
||||
// If no valid media was found in this match, keep the original token text.
|
||||
pieces.push(match[0]);
|
||||
}
|
||||
|
||||
cursor = start + match[0].length;
|
||||
|
||||
@@ -27,6 +27,7 @@ import type {
|
||||
} from "../config/types.tts.js";
|
||||
import { logVerbose } from "../globals.js";
|
||||
import { isVoiceCompatibleAudio } from "../media/audio.js";
|
||||
import { saveMediaSource } from "../media/store.js";
|
||||
import { CONFIG_DIR, resolveUserPath } from "../utils.js";
|
||||
import { getApiKeyForModel, requireApiKey } from "../agents/model-auth.js";
|
||||
import {
|
||||
@@ -1160,12 +1161,18 @@ export async function textToSpeech(params: {
|
||||
}
|
||||
}
|
||||
|
||||
scheduleCleanup(tempDir);
|
||||
const voiceCompatible = isVoiceCompatibleAudio({ fileName: edgeResult.audioPath });
|
||||
let savedPath = edgeResult.audioPath;
|
||||
try {
|
||||
const saved = await saveMediaSource(edgeResult.audioPath, undefined, "tts");
|
||||
savedPath = saved.path;
|
||||
} finally {
|
||||
scheduleCleanup(tempDir);
|
||||
}
|
||||
const voiceCompatible = isVoiceCompatibleAudio({ fileName: savedPath });
|
||||
|
||||
return {
|
||||
success: true,
|
||||
audioPath: edgeResult.audioPath,
|
||||
audioPath: savedPath,
|
||||
latencyMs: Date.now() - providerStart,
|
||||
provider,
|
||||
outputFormat: edgeResult.outputFormat,
|
||||
@@ -1221,11 +1228,17 @@ export async function textToSpeech(params: {
|
||||
const tempDir = mkdtempSync(path.join(tmpdir(), "tts-"));
|
||||
const audioPath = path.join(tempDir, `voice-${Date.now()}${output.extension}`);
|
||||
writeFileSync(audioPath, audioBuffer);
|
||||
scheduleCleanup(tempDir);
|
||||
let savedPath = audioPath;
|
||||
try {
|
||||
const saved = await saveMediaSource(audioPath, undefined, "tts");
|
||||
savedPath = saved.path;
|
||||
} finally {
|
||||
scheduleCleanup(tempDir);
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
audioPath,
|
||||
audioPath: savedPath,
|
||||
latencyMs,
|
||||
provider,
|
||||
outputFormat: provider === "openai" ? output.openai : output.elevenlabs,
|
||||
|
||||
@@ -51,7 +51,7 @@ describe("web media loading", () => {
|
||||
const file = await writeTempFile(buffer, ".jpg");
|
||||
|
||||
const cap = Math.floor(buffer.length * 0.8);
|
||||
const result = await loadWebMedia(file, cap);
|
||||
const result = await loadWebMedia(file, cap, { localRoots: [path.dirname(file)] });
|
||||
|
||||
expect(result.kind).toBe("image");
|
||||
expect(result.buffer.length).toBeLessThanOrEqual(cap);
|
||||
@@ -66,12 +66,32 @@ describe("web media loading", () => {
|
||||
.toBuffer();
|
||||
const wrongExt = await writeTempFile(pngBuffer, ".bin");
|
||||
|
||||
const result = await loadWebMedia(wrongExt, 1024 * 1024);
|
||||
const result = await loadWebMedia(wrongExt, 1024 * 1024, {
|
||||
localRoots: [path.dirname(wrongExt)],
|
||||
});
|
||||
|
||||
expect(result.kind).toBe("image");
|
||||
expect(result.contentType).toBe("image/jpeg");
|
||||
});
|
||||
|
||||
it("rejects local files outside allowed roots", async () => {
|
||||
const allowedDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-media-root-"));
|
||||
const allowedFile = path.join(allowedDir, "ok.txt");
|
||||
await fs.writeFile(allowedFile, Buffer.from("ok"));
|
||||
const blockedFile = await writeTempFile(Buffer.from("nope"), ".txt");
|
||||
|
||||
const okResult = await loadWebMedia(allowedFile, 1024 * 1024, {
|
||||
localRoots: [allowedDir],
|
||||
});
|
||||
expect(okResult.buffer.length).toBeGreaterThan(0);
|
||||
|
||||
await expect(
|
||||
loadWebMedia(blockedFile, 1024 * 1024, { localRoots: [allowedDir] }),
|
||||
).rejects.toThrow(/outside allowed roots/i);
|
||||
|
||||
await fs.rm(allowedDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("adds extension to URL fileName when missing", async () => {
|
||||
const fetchMock = vi.spyOn(globalThis, "fetch").mockResolvedValueOnce({
|
||||
ok: true,
|
||||
@@ -169,7 +189,7 @@ describe("web media loading", () => {
|
||||
|
||||
const file = await writeTempFile(gifBuffer, ".gif");
|
||||
|
||||
const result = await loadWebMedia(file, 1024 * 1024);
|
||||
const result = await loadWebMedia(file, 1024 * 1024, { localRoots: [path.dirname(file)] });
|
||||
|
||||
expect(result.kind).toBe("image");
|
||||
expect(result.contentType).toBe("image/gif");
|
||||
@@ -215,7 +235,7 @@ describe("web media loading", () => {
|
||||
|
||||
const file = await writeTempFile(buffer, ".png");
|
||||
|
||||
const result = await loadWebMedia(file, 1024 * 1024);
|
||||
const result = await loadWebMedia(file, 1024 * 1024, { localRoots: [path.dirname(file)] });
|
||||
|
||||
expect(result.kind).toBe("image");
|
||||
expect(result.contentType).toBe("image/png");
|
||||
@@ -255,7 +275,7 @@ describe("web media loading", () => {
|
||||
|
||||
const file = await writeTempFile(pngBuffer, ".png");
|
||||
|
||||
const result = await loadWebMedia(file, cap);
|
||||
const result = await loadWebMedia(file, cap, { localRoots: [path.dirname(file)] });
|
||||
|
||||
expect(result.kind).toBe("image");
|
||||
expect(result.contentType).toBe("image/jpeg");
|
||||
|
||||
@@ -2,8 +2,10 @@ import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
import { loadConfig } from "../config/config.js";
|
||||
import { logVerbose, shouldLogVerbose } from "../globals.js";
|
||||
import { type MediaKind, maxBytesForKind, mediaKindFromMime } from "../media/constants.js";
|
||||
import { resolveMediaLocalRoots } from "../media/local-roots.js";
|
||||
import { resolveUserPath } from "../utils.js";
|
||||
import { fetchRemoteMedia } from "../media/fetch.js";
|
||||
import {
|
||||
@@ -13,6 +15,7 @@ import {
|
||||
resizeToJpeg,
|
||||
} from "../media/image-ops.js";
|
||||
import { detectMime, extensionForMime } from "../media/mime.js";
|
||||
import { assertSandboxPath } from "../agents/sandbox-paths.js";
|
||||
|
||||
export type WebMediaResult = {
|
||||
buffer: Buffer;
|
||||
@@ -24,6 +27,8 @@ export type WebMediaResult = {
|
||||
type WebMediaOptions = {
|
||||
maxBytes?: number;
|
||||
optimizeImages?: boolean;
|
||||
allowAnyLocal?: boolean;
|
||||
localRoots?: string[];
|
||||
};
|
||||
|
||||
const HEIC_MIME_RE = /^image\/hei[cf]$/i;
|
||||
@@ -107,6 +112,29 @@ async function optimizeImageWithFallback(params: {
|
||||
return { ...optimized, format: "jpeg" };
|
||||
}
|
||||
|
||||
function resolveAllowedLocalRoots(options: WebMediaOptions): string[] | null {
|
||||
if (options.allowAnyLocal || process.env.OPENCLAW_MEDIA_ALLOW_ANY_LOCAL === "1") {
|
||||
return null;
|
||||
}
|
||||
if (Array.isArray(options.localRoots) && options.localRoots.length > 0) {
|
||||
return options.localRoots;
|
||||
}
|
||||
return resolveMediaLocalRoots(loadConfig());
|
||||
}
|
||||
|
||||
async function resolveLocalMediaPath(mediaUrl: string, roots: string[]): Promise<string> {
|
||||
const errors: string[] = [];
|
||||
for (const root of roots) {
|
||||
try {
|
||||
const validated = await assertSandboxPath({ filePath: mediaUrl, cwd: root, root });
|
||||
return validated.resolved;
|
||||
} catch (err) {
|
||||
errors.push(err instanceof Error ? err.message : String(err));
|
||||
}
|
||||
}
|
||||
throw new Error(`Local media path is outside allowed roots. Checked ${roots.length} root(s).`);
|
||||
}
|
||||
|
||||
async function loadWebMediaInternal(
|
||||
mediaUrl: string,
|
||||
options: WebMediaOptions = {},
|
||||
@@ -201,6 +229,10 @@ async function loadWebMediaInternal(
|
||||
}
|
||||
|
||||
// Local path
|
||||
const allowedRoots = resolveAllowedLocalRoots(options);
|
||||
if (allowedRoots && allowedRoots.length > 0) {
|
||||
mediaUrl = await resolveLocalMediaPath(mediaUrl, allowedRoots);
|
||||
}
|
||||
const data = await fs.readFile(mediaUrl);
|
||||
const mime = await detectMime({ buffer: data, filePath: mediaUrl });
|
||||
const kind = mediaKindFromMime(mime);
|
||||
@@ -217,8 +249,13 @@ async function loadWebMediaInternal(
|
||||
});
|
||||
}
|
||||
|
||||
export async function loadWebMedia(mediaUrl: string, maxBytes?: number): Promise<WebMediaResult> {
|
||||
export async function loadWebMedia(
|
||||
mediaUrl: string,
|
||||
maxBytes?: number,
|
||||
options: Omit<WebMediaOptions, "maxBytes" | "optimizeImages"> = {},
|
||||
): Promise<WebMediaResult> {
|
||||
return await loadWebMediaInternal(mediaUrl, {
|
||||
...options,
|
||||
maxBytes,
|
||||
optimizeImages: true,
|
||||
});
|
||||
@@ -227,8 +264,10 @@ export async function loadWebMedia(mediaUrl: string, maxBytes?: number): Promise
|
||||
export async function loadWebMediaRaw(
|
||||
mediaUrl: string,
|
||||
maxBytes?: number,
|
||||
options: Omit<WebMediaOptions, "maxBytes" | "optimizeImages"> = {},
|
||||
): Promise<WebMediaResult> {
|
||||
return await loadWebMediaInternal(mediaUrl, {
|
||||
...options,
|
||||
maxBytes,
|
||||
optimizeImages: false,
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user