Skip to content

Commit 585d253

Browse files
authored
Merge pull request #9596 from continuedev/dallin/duplicate-tool-messages
fix: duplicate tool messages added
2 parents c559b95 + ccd0b74 commit 585d253

File tree

2 files changed

+422
-45
lines changed

2 files changed

+422
-45
lines changed
Lines changed: 392 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,392 @@
1+
import type { ChatHistoryItem, ToolStatus } from "core/index.js";
2+
import { convertFromUnifiedHistory } from "core/util/messageConversion.js";
3+
import { beforeEach, describe, expect, it, vi } from "vitest";
4+
5+
import { services } from "../services/index.js";
6+
7+
// eslint-disable-next-line
8+
import { handleToolCalls } from "./handleToolCalls.js"; // for some reason can't resolve import groups here
9+
10+
// Mock the services
11+
vi.mock("../services/index.js", () => ({
12+
services: {
13+
chatHistory: {
14+
isReady: vi.fn(),
15+
addAssistantMessage: vi.fn(),
16+
addToolResult: vi.fn(),
17+
addHistoryItem: vi.fn(),
18+
},
19+
toolPermissions: {
20+
getState: vi.fn().mockReturnValue({
21+
permissions: { defaultPermission: "allow" },
22+
}),
23+
},
24+
},
25+
serviceContainer: {
26+
get: vi.fn().mockResolvedValue({
27+
permissions: { defaultPermission: "allow" },
28+
}),
29+
},
30+
SERVICE_NAMES: {
31+
TOOL_PERMISSIONS: "toolPermissions",
32+
},
33+
}));
34+
35+
// Mock the helper functions
36+
vi.mock("./streamChatResponse.helpers.js", () => ({
37+
preprocessStreamedToolCalls: vi.fn(),
38+
executeStreamedToolCalls: vi.fn(),
39+
}));
40+
41+
// Import the mocked modules
42+
import {
43+
executeStreamedToolCalls,
44+
preprocessStreamedToolCalls,
45+
} from "./streamChatResponse.helpers.js";
46+
47+
const mockPreprocess = vi.mocked(preprocessStreamedToolCalls);
48+
const mockExecute = vi.mocked(executeStreamedToolCalls);
49+
50+
describe("handleToolCalls - duplicate tool_result prevention", () => {
51+
let chatHistory: ChatHistoryItem[];
52+
53+
beforeEach(() => {
54+
vi.clearAllMocks();
55+
chatHistory = [
56+
{
57+
message: { role: "user", content: "Test prompt" },
58+
contextItems: [],
59+
},
60+
];
61+
62+
// Default: service is ready
63+
vi.mocked(services.chatHistory.isReady).mockReturnValue(true);
64+
});
65+
66+
/**
67+
* This test verifies that preprocessing errors are stored in toolCallStates
68+
* (on the assistant message) rather than as separate tool history items.
69+
*
70+
* If errors were added as separate history items, convertFromUnifiedHistory
71+
* would generate duplicate tool_result messages - one from the standalone
72+
* tool history item and one from the toolCallStates.
73+
*/
74+
it("should store preprocessing errors in toolCallStates, not as separate history items", async () => {
75+
const toolCalls = [
76+
{
77+
id: "tool-call-1",
78+
name: "invalid_tool",
79+
arguments: {},
80+
argumentsStr: "{}",
81+
startNotified: false,
82+
},
83+
];
84+
85+
// Simulate preprocessing error
86+
mockPreprocess.mockResolvedValue({
87+
preprocessedCalls: [],
88+
errorChatEntries: [
89+
{
90+
role: "tool",
91+
tool_call_id: "tool-call-1",
92+
content: "Tool invalid_tool not found",
93+
},
94+
],
95+
});
96+
97+
mockExecute.mockResolvedValue({
98+
hasRejection: false,
99+
chatHistoryEntries: [],
100+
});
101+
102+
await handleToolCalls({
103+
toolCalls,
104+
chatHistory,
105+
content: "Let me try that tool",
106+
callbacks: undefined,
107+
isHeadless: false,
108+
});
109+
110+
// CRITICAL: addToolResult should be called to update the toolCallState
111+
expect(services.chatHistory.addToolResult).toHaveBeenCalledWith(
112+
"tool-call-1",
113+
"Tool invalid_tool not found",
114+
"errored",
115+
);
116+
117+
// CRITICAL: addHistoryItem should NOT be called
118+
// (previously, errors were added as separate history items which caused duplicates)
119+
expect(services.chatHistory.addHistoryItem).not.toHaveBeenCalled();
120+
});
121+
122+
/**
123+
* This test verifies that when converting history to API format,
124+
* we get exactly one tool_result per tool call - not duplicates.
125+
*/
126+
it("should produce exactly one tool_result per tool call when converting to API format", async () => {
127+
// Simulate history with an assistant message that has toolCallStates with output
128+
const historyWithToolResults: ChatHistoryItem[] = [
129+
{
130+
message: { role: "user", content: "Run a command" },
131+
contextItems: [],
132+
},
133+
{
134+
message: {
135+
role: "assistant",
136+
content: "I'll run that for you",
137+
toolCalls: [
138+
{
139+
id: "tool-call-1",
140+
type: "function",
141+
function: { name: "run_command", arguments: '{"cmd":"ls"}' },
142+
},
143+
],
144+
},
145+
contextItems: [],
146+
toolCallStates: [
147+
{
148+
toolCallId: "tool-call-1",
149+
toolCall: {
150+
id: "tool-call-1",
151+
type: "function",
152+
function: { name: "run_command", arguments: '{"cmd":"ls"}' },
153+
},
154+
status: "done" as ToolStatus,
155+
parsedArgs: { cmd: "ls" },
156+
output: [
157+
{
158+
content: "file1.txt\nfile2.txt",
159+
name: "Tool Result",
160+
description: "Tool execution result",
161+
},
162+
],
163+
},
164+
],
165+
},
166+
];
167+
168+
const messages = convertFromUnifiedHistory(historyWithToolResults);
169+
170+
// Count tool messages
171+
const toolMessages = messages.filter((m) => m.role === "tool");
172+
173+
// Should have exactly ONE tool message, not duplicates
174+
expect(toolMessages).toHaveLength(1);
175+
expect(toolMessages[0]).toEqual({
176+
role: "tool",
177+
content: "file1.txt\nfile2.txt",
178+
tool_call_id: "tool-call-1",
179+
});
180+
});
181+
182+
/**
183+
* This test simulates the bug that was occurring:
184+
* If tool errors are added as separate history items AND stored in toolCallStates,
185+
* convertFromUnifiedHistory would produce duplicate tool_result messages.
186+
*/
187+
it("should NOT produce duplicate tool_results even with errored tool calls", async () => {
188+
// Simulate history where error is stored in toolCallStates (correct approach)
189+
const historyWithErroredTool: ChatHistoryItem[] = [
190+
{
191+
message: { role: "user", content: "Use invalid tool" },
192+
contextItems: [],
193+
},
194+
{
195+
message: {
196+
role: "assistant",
197+
content: "",
198+
toolCalls: [
199+
{
200+
id: "tool-call-error",
201+
type: "function",
202+
function: { name: "nonexistent", arguments: "{}" },
203+
},
204+
],
205+
},
206+
contextItems: [],
207+
toolCallStates: [
208+
{
209+
toolCallId: "tool-call-error",
210+
toolCall: {
211+
id: "tool-call-error",
212+
type: "function",
213+
function: { name: "nonexistent", arguments: "{}" },
214+
},
215+
status: "errored" as ToolStatus,
216+
parsedArgs: {},
217+
output: [
218+
{
219+
content: "Tool nonexistent not found",
220+
name: "Tool Result",
221+
description: "Tool execution result",
222+
},
223+
],
224+
},
225+
],
226+
},
227+
];
228+
229+
const messages = convertFromUnifiedHistory(historyWithErroredTool);
230+
231+
// Count tool messages with this specific ID
232+
const toolMessagesForCall = messages.filter(
233+
(m) => m.role === "tool" && (m as any).tool_call_id === "tool-call-error",
234+
);
235+
236+
// Should have exactly ONE tool message for this tool call
237+
expect(toolMessagesForCall).toHaveLength(1);
238+
expect(toolMessagesForCall[0]).toEqual({
239+
role: "tool",
240+
content: "Tool nonexistent not found",
241+
tool_call_id: "tool-call-error",
242+
});
243+
});
244+
245+
/**
246+
* This test verifies the problematic pattern that WOULD cause duplicates
247+
* (for documentation purposes - this is what we're preventing).
248+
*/
249+
it("demonstrates the duplicate bug when tool messages are stored separately", () => {
250+
// BAD PATTERN: Having both a standalone tool message AND toolCallStates
251+
// This is what the old code did, and it caused the duplicate tool_result error
252+
const badHistoryWithDuplicates: ChatHistoryItem[] = [
253+
{
254+
message: { role: "user", content: "Run command" },
255+
contextItems: [],
256+
},
257+
{
258+
message: {
259+
role: "assistant",
260+
content: "",
261+
toolCalls: [
262+
{
263+
id: "duplicate-call",
264+
type: "function",
265+
function: { name: "run_command", arguments: '{"cmd":"ls"}' },
266+
},
267+
],
268+
},
269+
contextItems: [],
270+
toolCallStates: [
271+
{
272+
toolCallId: "duplicate-call",
273+
toolCall: {
274+
id: "duplicate-call",
275+
type: "function",
276+
function: { name: "run_command", arguments: '{"cmd":"ls"}' },
277+
},
278+
status: "done" as ToolStatus,
279+
parsedArgs: { cmd: "ls" },
280+
output: [
281+
{
282+
content: "Result from toolCallStates",
283+
name: "Tool Result",
284+
description: "Tool execution result",
285+
},
286+
],
287+
},
288+
],
289+
},
290+
// BAD: Standalone tool message that should NOT exist
291+
// (This is what the old buggy code was adding)
292+
{
293+
message: {
294+
role: "tool",
295+
content: "Result from standalone message",
296+
toolCallId: "duplicate-call",
297+
},
298+
contextItems: [],
299+
},
300+
];
301+
302+
const messages = convertFromUnifiedHistory(badHistoryWithDuplicates);
303+
304+
// Count tool messages with this ID - this demonstrates the bug
305+
const toolMessagesForCall = messages.filter(
306+
(m) => m.role === "tool" && (m as any).tool_call_id === "duplicate-call",
307+
);
308+
309+
// With the bad pattern, we get TWO tool messages (the bug!)
310+
// This would cause: "each tool_use must have a single result"
311+
expect(toolMessagesForCall).toHaveLength(2);
312+
313+
// The fix ensures we never create this bad pattern in the first place
314+
});
315+
316+
/**
317+
* Test that the fallback path (when service is not ready) also
318+
* updates toolCallStates instead of adding separate items.
319+
*/
320+
it("should update toolCallStates in fallback mode for preprocessing errors", async () => {
321+
// Service not ready - use fallback path
322+
vi.mocked(services.chatHistory.isReady).mockReturnValue(false);
323+
324+
const toolCalls = [
325+
{
326+
id: "fallback-tool-1",
327+
name: "broken_tool",
328+
arguments: { arg: "value" },
329+
argumentsStr: '{"arg":"value"}',
330+
startNotified: false,
331+
},
332+
];
333+
334+
// Simulate preprocessing error
335+
mockPreprocess.mockResolvedValue({
336+
preprocessedCalls: [],
337+
errorChatEntries: [
338+
{
339+
role: "tool",
340+
tool_call_id: "fallback-tool-1",
341+
content: "Tool broken_tool not found",
342+
},
343+
],
344+
});
345+
346+
mockExecute.mockResolvedValue({
347+
hasRejection: false,
348+
chatHistoryEntries: [],
349+
});
350+
351+
await handleToolCalls({
352+
toolCalls,
353+
chatHistory,
354+
content: "",
355+
callbacks: undefined,
356+
isHeadless: false,
357+
});
358+
359+
// In fallback mode, service methods should NOT be called for adding results
360+
expect(services.chatHistory.addToolResult).not.toHaveBeenCalled();
361+
expect(services.chatHistory.addHistoryItem).not.toHaveBeenCalled();
362+
363+
// The local chatHistory should have the assistant message with toolCallStates
364+
// (handleToolCalls adds this via createHistoryItem in fallback mode)
365+
const assistantItem = chatHistory.find(
366+
(item) =>
367+
item.message.role === "assistant" && item.toolCallStates?.length,
368+
);
369+
370+
expect(assistantItem).toBeDefined();
371+
const toolState = assistantItem!.toolCallStates!.find(
372+
(ts) => ts.toolCallId === "fallback-tool-1",
373+
);
374+
375+
// The toolState should be updated with the error
376+
expect(toolState).toBeDefined();
377+
expect(toolState!.status).toBe("errored");
378+
expect(toolState!.output).toEqual([
379+
{
380+
content: "Tool broken_tool not found",
381+
name: "Tool Result",
382+
description: "Tool execution result",
383+
},
384+
]);
385+
386+
// CRITICAL: No separate tool message should be added to chatHistory
387+
const separateToolMessages = chatHistory.filter(
388+
(item) => item.message.role === "tool",
389+
);
390+
expect(separateToolMessages).toHaveLength(0);
391+
});
392+
});

0 commit comments

Comments
 (0)