fix: avoid auth cooldowns for format-only failures

fix: transcript corruption resilience (3 fixes)
Fix 1 - Strip tool_use blocks from aborted/errored assistant messages Instead of keeping aborted assistant messages with their (potentially incomplete) tool_use blocks, strip the tool_use blocks entirely. This prevents the API from expecting matching tool_results that don't exist, which causes 400 errors. If the message has no remaining content after stripping, drop it entirely. If it has text content alongside tool calls, keep the text. Fix 2 - Don't cooldown auth profiles for format errors Format errors (400 Bad Request) indicate malformed session input, not provider unavailability. Previously, format errors put the auth profile into cooldown, which cascaded failures to ALL sessions sharing that profile (Slack, Telegram, webchat all go down from one corrupted session). Now format errors are treated as session-scoped: the failing session gets its error, but other sessions continue working normally. Fix 3 - Updated tests for new behavior Tests updated to reflect that aborted/errored assistant messages have their tool_use blocks stripped rather than passed through unchanged. Fixes: https://github.com/openclaw/openclaw/issues/15037
2026-02-12 17:08:01 -08:00 · 2026-02-12 16:37:54 -06:00
4 changed files with 148 additions and 27 deletions
--- a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts
+++ b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts
@@ -158,6 +158,8 @@ vi.mock("../pi-embedded-helpers.js", async () => {
 });

 import type { EmbeddedRunAttemptResult } from "./run/types.js";
+import { markAuthProfileFailure } from "../auth-profiles.js";
+import * as piEmbeddedHelpers from "../pi-embedded-helpers.js";
 import { compactEmbeddedPiSessionDirect } from "./compact.js";
 import { log } from "./logger.js";
 import { runEmbeddedPiAgent } from "./run.js";
@@ -173,6 +175,9 @@ const mockedSessionLikelyHasOversizedToolResults = vi.mocked(sessionLikelyHasOve
 const mockedTruncateOversizedToolResultsInSession = vi.mocked(
  truncateOversizedToolResultsInSession,
 );
+const mockedMarkAuthProfileFailure = vi.mocked(markAuthProfileFailure);
+const mockedClassifyFailoverReason = vi.mocked(piEmbeddedHelpers.classifyFailoverReason);
+const mockedIsFailoverAssistantError = vi.mocked(piEmbeddedHelpers.isFailoverAssistantError);

 function makeAttemptResult(
  overrides: Partial<EmbeddedRunAttemptResult> = {},
@@ -433,4 +438,43 @@ describe("overflow compaction in run loop", () => {
    expect(mockedCompactDirect).not.toHaveBeenCalled();
    expect(log.warn).not.toHaveBeenCalledWith(expect.stringContaining("source=assistantError"));
  });
+
+  it("does not cooldown auth profile for assistant format errors", async () => {
+    mockedClassifyFailoverReason.mockReturnValue("format");
+    mockedIsFailoverAssistantError.mockReturnValue(true);
+
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
+      makeAttemptResult({
+        promptError: null,
+        lastAssistant: {
+          stopReason: "error",
+          errorMessage: "Cloud Code Assist format error",
+        } as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    );
+
+    const result = await runEmbeddedPiAgent(baseParams);
+
+    expect(result.meta.error).toBeUndefined();
+    expect(mockedMarkAuthProfileFailure).not.toHaveBeenCalled();
+    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
+  });
+
+  it("does not cooldown auth profile for prompt format errors", async () => {
+    mockedClassifyFailoverReason.mockReturnValue("format");
+
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
+      makeAttemptResult({
+        promptError: new Error("Cloud Code Assist format error"),
+        lastAssistant: {
+          stopReason: "error",
+          errorMessage: "Cloud Code Assist format error",
+        } as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    );
+
+    await expect(runEmbeddedPiAgent(baseParams)).rejects.toThrow("Cloud Code Assist format error");
+    expect(mockedMarkAuthProfileFailure).not.toHaveBeenCalled();
+    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
+  });
 });
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -673,7 +673,17 @@ export async function runEmbeddedPiAgent(
              };
            }
            const promptFailoverReason = classifyFailoverReason(errorText);
-            if (promptFailoverReason && promptFailoverReason !== "timeout" && lastProfileId) {
+            // Don't mark auth profile as failed for format errors (400 Bad Request).
+            // Format errors indicate malformed session input (e.g., corrupted transcript),
+            // NOT a provider/auth issue. Cooling down the profile cascades failures to
+            // all sessions sharing the same auth profile.
+            // See: https://github.com/openclaw/openclaw/issues/15037
+            if (
+              promptFailoverReason &&
+              promptFailoverReason !== "timeout" &&
+              promptFailoverReason !== "format" &&
+              lastProfileId
+            ) {
              await markAuthProfileFailure({
                store: authStore,
                profileId: lastProfileId,
@@ -753,8 +763,11 @@ export async function runEmbeddedPiAgent(
            );
          }

-          // Treat timeout as potential rate limit (Antigravity hangs on rate limit)
-          const shouldRotate = (!aborted && failoverFailure) || timedOut;
+          // Treat timeout as potential rate limit (Antigravity hangs on rate limit).
+          // Don't rotate profiles for format errors; those are usually session input
+          // issues and shouldn't affect shared auth profile health.
+          const shouldRotate =
+            timedOut || (!aborted && failoverFailure && assistantFailoverReason !== "format");

          if (shouldRotate) {
            if (lastProfileId) {
--- a/src/agents/session-transcript-repair.test.ts
+++ b/src/agents/session-transcript-repair.test.ts
@@ -114,10 +114,12 @@ describe("sanitizeToolUseResultPairing", () => {
    expect(out.map((m) => m.role)).toEqual(["user", "assistant"]);
  });

-  it("skips tool call extraction for assistant messages with stopReason 'error'", () => {
+  it("strips tool_use blocks from assistant messages with stopReason 'error'", () => {
    // When an assistant message has stopReason: "error", its tool_use blocks may be
-    // incomplete/malformed. We should NOT create synthetic tool_results for them,
-    // as this causes API 400 errors: "unexpected tool_use_id found in tool_result blocks"
+    // incomplete/malformed. We strip them to prevent the API from expecting matching
+    // tool_results that don't exist.
+    // See: https://github.com/openclaw/openclaw/issues/4597
+    // See: https://github.com/openclaw/openclaw/issues/15037
    const input = [
      {
        role: "assistant",
@@ -131,15 +133,58 @@ describe("sanitizeToolUseResultPairing", () => {

    // Should NOT add synthetic tool results for errored messages
    expect(result.added).toHaveLength(0);
-    // The assistant message should be passed through unchanged
-    expect(result.messages[0]?.role).toBe("assistant");
-    expect(result.messages[1]?.role).toBe("user");
+    // The assistant message with only tool calls should be dropped entirely
+    expect(result.messages).toHaveLength(1);
+    expect(result.messages[0]?.role).toBe("user");
+  });
+
+  it("keeps errored assistant text-only messages unchanged", () => {
+    const input = [
+      {
+        role: "assistant",
+        content: [{ type: "text", text: "I ran into trouble and explained it." }],
+        stopReason: "error",
+      },
+      { role: "user", content: "okay" },
+    ] as AgentMessage[];
+
+    const result = repairToolUseResultPairing(input);
+
+    // No tool calls were removed, so no transcript rewrite should happen.
+    expect(result.messages).toBe(input);
    expect(result.messages).toHaveLength(2);
  });

-  it("skips tool call extraction for assistant messages with stopReason 'aborted'", () => {
+  it("strips tool_use blocks but keeps text from errored assistant messages", () => {
+    // When an errored assistant message has both text and tool_use blocks,
+    // strip the tool_use blocks but keep the text content.
+    const input = [
+      {
+        role: "assistant",
+        content: [
+          { type: "text", text: "Let me try that..." },
+          { type: "toolCall", id: "call_error", name: "exec", arguments: {} },
+        ],
+        stopReason: "error",
+      },
+      { role: "user", content: "something went wrong" },
+    ] as AgentMessage[];
+
+    const result = repairToolUseResultPairing(input);
+
+    expect(result.added).toHaveLength(0);
+    expect(result.messages).toHaveLength(2);
+    expect(result.messages[0]?.role).toBe("assistant");
+    // The assistant message should only have the text block, not the tool call
+    const content = (result.messages[0] as { content: unknown[] }).content;
+    expect(content).toHaveLength(1);
+    expect((content[0] as { type: string }).type).toBe("text");
+    expect(result.messages[1]?.role).toBe("user");
+  });
+
+  it("strips tool_use blocks from assistant messages with stopReason 'aborted'", () => {
    // When a request is aborted mid-stream, the assistant message may have incomplete
-    // tool_use blocks (with partialJson). We should NOT create synthetic tool_results.
+    // tool_use blocks (with partialJson). We strip them to prevent API 400 errors.
    const input = [
      {
        role: "assistant",
@@ -153,10 +198,9 @@ describe("sanitizeToolUseResultPairing", () => {

    // Should NOT add synthetic tool results for aborted messages
    expect(result.added).toHaveLength(0);
-    // Messages should be passed through without synthetic insertions
-    expect(result.messages).toHaveLength(2);
-    expect(result.messages[0]?.role).toBe("assistant");
-    expect(result.messages[1]?.role).toBe("user");
+    // The assistant message with only tool calls should be dropped
+    expect(result.messages).toHaveLength(1);
+    expect(result.messages[0]?.role).toBe("user");
  });

  it("still repairs tool results for normal assistant messages with stopReason 'toolUse'", () => {
@@ -178,9 +222,8 @@ describe("sanitizeToolUseResultPairing", () => {
  });

  it("drops orphan tool results that follow an aborted assistant message", () => {
-    // When an assistant message is aborted, any tool results that follow should be
-    // dropped as orphans (since we skip extracting tool calls from aborted messages).
-    // This addresses the edge case where a partial tool result was persisted before abort.
+    // When an assistant message is aborted, its tool_use blocks are stripped.
+    // Any tool results that follow should also be dropped as orphans.
    const input = [
      {
        role: "assistant",
@@ -199,11 +242,11 @@ describe("sanitizeToolUseResultPairing", () => {

    const result = repairToolUseResultPairing(input);

-    // The orphan tool result should be dropped
+    // The orphan tool result should be dropped, and the empty assistant message too
    expect(result.droppedOrphanCount).toBe(1);
-    expect(result.messages).toHaveLength(2);
-    expect(result.messages[0]?.role).toBe("assistant");
-    expect(result.messages[1]?.role).toBe("user");
+    // Only the user message should remain
+    expect(result.messages).toHaveLength(1);
+    expect(result.messages[0]?.role).toBe("user");
    // No synthetic results should be added
    expect(result.added).toHaveLength(0);
  });
--- a/src/agents/session-transcript-repair.ts
+++ b/src/agents/session-transcript-repair.ts
@@ -214,15 +214,36 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep

    const assistant = msg as Extract<AgentMessage, { role: "assistant" }>;

-    // Skip tool call extraction for aborted or errored assistant messages.
+    // Handle aborted or errored assistant messages.
    // When stopReason is "error" or "aborted", the tool_use blocks may be incomplete
-    // (e.g., partialJson: true) and should not have synthetic tool_results created.
-    // Creating synthetic results for incomplete tool calls causes API 400 errors:
-    // "unexpected tool_use_id found in tool_result blocks"
+    // (e.g., partialJson: true). We must NOT create synthetic tool_results for incomplete
+    // tool calls, but we also must NOT leave tool_use blocks in the message that the API
+    // will expect matching tool_results for.
+    // Fix: strip tool_use blocks from aborted/errored messages entirely.
+    // If the message has no remaining content after stripping, drop it.
    // See: https://github.com/openclaw/openclaw/issues/4597
+    // See: https://github.com/openclaw/openclaw/issues/15037
    const stopReason = (assistant as { stopReason?: string }).stopReason;
    if (stopReason === "error" || stopReason === "aborted") {
-      out.push(msg);
+      if (Array.isArray(assistant.content)) {
+        const nonToolContent = assistant.content.filter((block) => {
+          if (!block || typeof block !== "object") {
+            return true;
+          }
+          return !isToolCallBlock(block);
+        });
+        const removedToolCalls = nonToolContent.length !== assistant.content.length;
+        if (nonToolContent.length > 0) {
+          out.push({ ...msg, content: nonToolContent } as AgentMessage);
+        }
+        if (removedToolCalls) {
+          // If all content was tool calls, drop the entire message.
+          // If only non-tool blocks remain, keep the message unchanged.
+          changed = true;
+        }
+      } else {
+        out.push(msg);
+      }
      continue;
    }