Compare commits
2 Commits
feat/routi
...
fix/transc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
274ec379b0 | ||
|
|
fb8862bb11 |
@@ -158,6 +158,8 @@ vi.mock("../pi-embedded-helpers.js", async () => {
|
||||
});
|
||||
|
||||
import type { EmbeddedRunAttemptResult } from "./run/types.js";
|
||||
import { markAuthProfileFailure } from "../auth-profiles.js";
|
||||
import * as piEmbeddedHelpers from "../pi-embedded-helpers.js";
|
||||
import { compactEmbeddedPiSessionDirect } from "./compact.js";
|
||||
import { log } from "./logger.js";
|
||||
import { runEmbeddedPiAgent } from "./run.js";
|
||||
@@ -173,6 +175,9 @@ const mockedSessionLikelyHasOversizedToolResults = vi.mocked(sessionLikelyHasOve
|
||||
const mockedTruncateOversizedToolResultsInSession = vi.mocked(
|
||||
truncateOversizedToolResultsInSession,
|
||||
);
|
||||
const mockedMarkAuthProfileFailure = vi.mocked(markAuthProfileFailure);
|
||||
const mockedClassifyFailoverReason = vi.mocked(piEmbeddedHelpers.classifyFailoverReason);
|
||||
const mockedIsFailoverAssistantError = vi.mocked(piEmbeddedHelpers.isFailoverAssistantError);
|
||||
|
||||
function makeAttemptResult(
|
||||
overrides: Partial<EmbeddedRunAttemptResult> = {},
|
||||
@@ -433,4 +438,43 @@ describe("overflow compaction in run loop", () => {
|
||||
expect(mockedCompactDirect).not.toHaveBeenCalled();
|
||||
expect(log.warn).not.toHaveBeenCalledWith(expect.stringContaining("source=assistantError"));
|
||||
});
|
||||
|
||||
it("does not cooldown auth profile for assistant format errors", async () => {
|
||||
mockedClassifyFailoverReason.mockReturnValue("format");
|
||||
mockedIsFailoverAssistantError.mockReturnValue(true);
|
||||
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
promptError: null,
|
||||
lastAssistant: {
|
||||
stopReason: "error",
|
||||
errorMessage: "Cloud Code Assist format error",
|
||||
} as EmbeddedRunAttemptResult["lastAssistant"],
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await runEmbeddedPiAgent(baseParams);
|
||||
|
||||
expect(result.meta.error).toBeUndefined();
|
||||
expect(mockedMarkAuthProfileFailure).not.toHaveBeenCalled();
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("does not cooldown auth profile for prompt format errors", async () => {
|
||||
mockedClassifyFailoverReason.mockReturnValue("format");
|
||||
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
promptError: new Error("Cloud Code Assist format error"),
|
||||
lastAssistant: {
|
||||
stopReason: "error",
|
||||
errorMessage: "Cloud Code Assist format error",
|
||||
} as EmbeddedRunAttemptResult["lastAssistant"],
|
||||
}),
|
||||
);
|
||||
|
||||
await expect(runEmbeddedPiAgent(baseParams)).rejects.toThrow("Cloud Code Assist format error");
|
||||
expect(mockedMarkAuthProfileFailure).not.toHaveBeenCalled();
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -673,7 +673,17 @@ export async function runEmbeddedPiAgent(
|
||||
};
|
||||
}
|
||||
const promptFailoverReason = classifyFailoverReason(errorText);
|
||||
if (promptFailoverReason && promptFailoverReason !== "timeout" && lastProfileId) {
|
||||
// Don't mark auth profile as failed for format errors (400 Bad Request).
|
||||
// Format errors indicate malformed session input (e.g., corrupted transcript),
|
||||
// NOT a provider/auth issue. Cooling down the profile cascades failures to
|
||||
// all sessions sharing the same auth profile.
|
||||
// See: https://github.com/openclaw/openclaw/issues/15037
|
||||
if (
|
||||
promptFailoverReason &&
|
||||
promptFailoverReason !== "timeout" &&
|
||||
promptFailoverReason !== "format" &&
|
||||
lastProfileId
|
||||
) {
|
||||
await markAuthProfileFailure({
|
||||
store: authStore,
|
||||
profileId: lastProfileId,
|
||||
@@ -753,8 +763,11 @@ export async function runEmbeddedPiAgent(
|
||||
);
|
||||
}
|
||||
|
||||
// Treat timeout as potential rate limit (Antigravity hangs on rate limit)
|
||||
const shouldRotate = (!aborted && failoverFailure) || timedOut;
|
||||
// Treat timeout as potential rate limit (Antigravity hangs on rate limit).
|
||||
// Don't rotate profiles for format errors; those are usually session input
|
||||
// issues and shouldn't affect shared auth profile health.
|
||||
const shouldRotate =
|
||||
timedOut || (!aborted && failoverFailure && assistantFailoverReason !== "format");
|
||||
|
||||
if (shouldRotate) {
|
||||
if (lastProfileId) {
|
||||
|
||||
@@ -114,10 +114,12 @@ describe("sanitizeToolUseResultPairing", () => {
|
||||
expect(out.map((m) => m.role)).toEqual(["user", "assistant"]);
|
||||
});
|
||||
|
||||
it("skips tool call extraction for assistant messages with stopReason 'error'", () => {
|
||||
it("strips tool_use blocks from assistant messages with stopReason 'error'", () => {
|
||||
// When an assistant message has stopReason: "error", its tool_use blocks may be
|
||||
// incomplete/malformed. We should NOT create synthetic tool_results for them,
|
||||
// as this causes API 400 errors: "unexpected tool_use_id found in tool_result blocks"
|
||||
// incomplete/malformed. We strip them to prevent the API from expecting matching
|
||||
// tool_results that don't exist.
|
||||
// See: https://github.com/openclaw/openclaw/issues/4597
|
||||
// See: https://github.com/openclaw/openclaw/issues/15037
|
||||
const input = [
|
||||
{
|
||||
role: "assistant",
|
||||
@@ -131,15 +133,58 @@ describe("sanitizeToolUseResultPairing", () => {
|
||||
|
||||
// Should NOT add synthetic tool results for errored messages
|
||||
expect(result.added).toHaveLength(0);
|
||||
// The assistant message should be passed through unchanged
|
||||
expect(result.messages[0]?.role).toBe("assistant");
|
||||
expect(result.messages[1]?.role).toBe("user");
|
||||
// The assistant message with only tool calls should be dropped entirely
|
||||
expect(result.messages).toHaveLength(1);
|
||||
expect(result.messages[0]?.role).toBe("user");
|
||||
});
|
||||
|
||||
it("keeps errored assistant text-only messages unchanged", () => {
|
||||
const input = [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: "I ran into trouble and explained it." }],
|
||||
stopReason: "error",
|
||||
},
|
||||
{ role: "user", content: "okay" },
|
||||
] as AgentMessage[];
|
||||
|
||||
const result = repairToolUseResultPairing(input);
|
||||
|
||||
// No tool calls were removed, so no transcript rewrite should happen.
|
||||
expect(result.messages).toBe(input);
|
||||
expect(result.messages).toHaveLength(2);
|
||||
});
|
||||
|
||||
it("skips tool call extraction for assistant messages with stopReason 'aborted'", () => {
|
||||
it("strips tool_use blocks but keeps text from errored assistant messages", () => {
|
||||
// When an errored assistant message has both text and tool_use blocks,
|
||||
// strip the tool_use blocks but keep the text content.
|
||||
const input = [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "text", text: "Let me try that..." },
|
||||
{ type: "toolCall", id: "call_error", name: "exec", arguments: {} },
|
||||
],
|
||||
stopReason: "error",
|
||||
},
|
||||
{ role: "user", content: "something went wrong" },
|
||||
] as AgentMessage[];
|
||||
|
||||
const result = repairToolUseResultPairing(input);
|
||||
|
||||
expect(result.added).toHaveLength(0);
|
||||
expect(result.messages).toHaveLength(2);
|
||||
expect(result.messages[0]?.role).toBe("assistant");
|
||||
// The assistant message should only have the text block, not the tool call
|
||||
const content = (result.messages[0] as { content: unknown[] }).content;
|
||||
expect(content).toHaveLength(1);
|
||||
expect((content[0] as { type: string }).type).toBe("text");
|
||||
expect(result.messages[1]?.role).toBe("user");
|
||||
});
|
||||
|
||||
it("strips tool_use blocks from assistant messages with stopReason 'aborted'", () => {
|
||||
// When a request is aborted mid-stream, the assistant message may have incomplete
|
||||
// tool_use blocks (with partialJson). We should NOT create synthetic tool_results.
|
||||
// tool_use blocks (with partialJson). We strip them to prevent API 400 errors.
|
||||
const input = [
|
||||
{
|
||||
role: "assistant",
|
||||
@@ -153,10 +198,9 @@ describe("sanitizeToolUseResultPairing", () => {
|
||||
|
||||
// Should NOT add synthetic tool results for aborted messages
|
||||
expect(result.added).toHaveLength(0);
|
||||
// Messages should be passed through without synthetic insertions
|
||||
expect(result.messages).toHaveLength(2);
|
||||
expect(result.messages[0]?.role).toBe("assistant");
|
||||
expect(result.messages[1]?.role).toBe("user");
|
||||
// The assistant message with only tool calls should be dropped
|
||||
expect(result.messages).toHaveLength(1);
|
||||
expect(result.messages[0]?.role).toBe("user");
|
||||
});
|
||||
|
||||
it("still repairs tool results for normal assistant messages with stopReason 'toolUse'", () => {
|
||||
@@ -178,9 +222,8 @@ describe("sanitizeToolUseResultPairing", () => {
|
||||
});
|
||||
|
||||
it("drops orphan tool results that follow an aborted assistant message", () => {
|
||||
// When an assistant message is aborted, any tool results that follow should be
|
||||
// dropped as orphans (since we skip extracting tool calls from aborted messages).
|
||||
// This addresses the edge case where a partial tool result was persisted before abort.
|
||||
// When an assistant message is aborted, its tool_use blocks are stripped.
|
||||
// Any tool results that follow should also be dropped as orphans.
|
||||
const input = [
|
||||
{
|
||||
role: "assistant",
|
||||
@@ -199,11 +242,11 @@ describe("sanitizeToolUseResultPairing", () => {
|
||||
|
||||
const result = repairToolUseResultPairing(input);
|
||||
|
||||
// The orphan tool result should be dropped
|
||||
// The orphan tool result should be dropped, and the empty assistant message too
|
||||
expect(result.droppedOrphanCount).toBe(1);
|
||||
expect(result.messages).toHaveLength(2);
|
||||
expect(result.messages[0]?.role).toBe("assistant");
|
||||
expect(result.messages[1]?.role).toBe("user");
|
||||
// Only the user message should remain
|
||||
expect(result.messages).toHaveLength(1);
|
||||
expect(result.messages[0]?.role).toBe("user");
|
||||
// No synthetic results should be added
|
||||
expect(result.added).toHaveLength(0);
|
||||
});
|
||||
|
||||
@@ -214,15 +214,36 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
|
||||
|
||||
const assistant = msg as Extract<AgentMessage, { role: "assistant" }>;
|
||||
|
||||
// Skip tool call extraction for aborted or errored assistant messages.
|
||||
// Handle aborted or errored assistant messages.
|
||||
// When stopReason is "error" or "aborted", the tool_use blocks may be incomplete
|
||||
// (e.g., partialJson: true) and should not have synthetic tool_results created.
|
||||
// Creating synthetic results for incomplete tool calls causes API 400 errors:
|
||||
// "unexpected tool_use_id found in tool_result blocks"
|
||||
// (e.g., partialJson: true). We must NOT create synthetic tool_results for incomplete
|
||||
// tool calls, but we also must NOT leave tool_use blocks in the message that the API
|
||||
// will expect matching tool_results for.
|
||||
// Fix: strip tool_use blocks from aborted/errored messages entirely.
|
||||
// If the message has no remaining content after stripping, drop it.
|
||||
// See: https://github.com/openclaw/openclaw/issues/4597
|
||||
// See: https://github.com/openclaw/openclaw/issues/15037
|
||||
const stopReason = (assistant as { stopReason?: string }).stopReason;
|
||||
if (stopReason === "error" || stopReason === "aborted") {
|
||||
out.push(msg);
|
||||
if (Array.isArray(assistant.content)) {
|
||||
const nonToolContent = assistant.content.filter((block) => {
|
||||
if (!block || typeof block !== "object") {
|
||||
return true;
|
||||
}
|
||||
return !isToolCallBlock(block);
|
||||
});
|
||||
const removedToolCalls = nonToolContent.length !== assistant.content.length;
|
||||
if (nonToolContent.length > 0) {
|
||||
out.push({ ...msg, content: nonToolContent } as AgentMessage);
|
||||
}
|
||||
if (removedToolCalls) {
|
||||
// If all content was tool calls, drop the entire message.
|
||||
// If only non-tool blocks remain, keep the message unchanged.
|
||||
changed = true;
|
||||
}
|
||||
} else {
|
||||
out.push(msg);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user