Skip to content

Agent#

Agent #

Bases: ABC

Source code in src/agoge/agent/agent.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class Agent(ABC):
    @abstractmethod
    def __init__(
        self,
        inference_mngr: ActorHandle[InferenceManager],
    ):
        self.inference_mngr = inference_mngr
        # Optional tracer for observability - set by Runner after instantiation
        self.tracer: Tracer | None = None

    def init_after_reset(self):
        """Will be called right after the environment is reset"""
        return

    @abstractmethod
    async def act(self, observation: Chat, available_tool_schemas: list[dict]) -> tuple[list[Chat], AssistantMessage]:
        """Receives an observation from the environment and decides on an action.

        A list of Chats can be returned which will be forwarded to the training buffer.

        Args:
            observation: A chat, typically generated by the environment after it resets or after a step
            available_tool_schemas: List of schemas that the agent can use as tools

        Returns:
            Tuple[List[Chat], ChatMessage]:
                * List[Chat]: The messages which will be forwarded to the training buffer (and typically be trained on)
                * AssistantMessage: The action that will be performed in the environment.
                                    This typically consists of one or more tool_calls.
        """
        ...

act(observation, available_tool_schemas) abstractmethod async #

Receives an observation from the environment and decides on an action.

A list of Chats can be returned which will be forwarded to the training buffer.

Parameters:

Name Type Description Default
observation Chat

A chat, typically generated by the environment after it resets or after a step

required
available_tool_schemas list[dict]

List of schemas that the agent can use as tools

required

Returns:

Type Description
tuple[list[Chat], AssistantMessage]

Tuple[List[Chat], ChatMessage]: * List[Chat]: The messages which will be forwarded to the training buffer (and typically be trained on) * AssistantMessage: The action that will be performed in the environment. This typically consists of one or more tool_calls.

Source code in src/agoge/agent/agent.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
@abstractmethod
async def act(self, observation: Chat, available_tool_schemas: list[dict]) -> tuple[list[Chat], AssistantMessage]:
    """Receives an observation from the environment and decides on an action.

    A list of Chats can be returned which will be forwarded to the training buffer.

    Args:
        observation: A chat, typically generated by the environment after it resets or after a step
        available_tool_schemas: List of schemas that the agent can use as tools

    Returns:
        Tuple[List[Chat], ChatMessage]:
            * List[Chat]: The messages which will be forwarded to the training buffer (and typically be trained on)
            * AssistantMessage: The action that will be performed in the environment.
                                This typically consists of one or more tool_calls.
    """
    ...

init_after_reset() #

Will be called right after the environment is reset

Source code in src/agoge/agent/agent.py
24
25
26
def init_after_reset(self):
    """Will be called right after the environment is reset"""
    return

DynamicPromptAgent #

Bases: SimpleAgent

Agent that handles dynamic system prompts from environment observations.

Source code in src/agoge/agent/dynamic_prompt_agent.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
class DynamicPromptAgent(SimpleAgent):
    """Agent that handles dynamic system prompts from environment observations."""

    def __init__(
        self,
        inference_mngr: ActorHandle[InferenceManager],
        *,
        model: str,
        history_messages_limit: dict[MessageLabel, int] | None = None,
        system_prompt: str = "You are a helpful assistant.",
        message_label: MessageLabel = MessageLabel.AGENT,
        request_overrides: dict[str, Any] | None = None,
    ) -> None:
        super().__init__(
            inference_mngr=inference_mngr,
            model=model,
            history_messages_limit=history_messages_limit,
            system_prompt=system_prompt,
            message_label=message_label,
            request_overrides=request_overrides,
        )
        self._first_observation = True
        self._extracted_system_prompt = False

    def init_after_reset(self) -> None:
        """Reset for new episode. Wait for first observation to extract system prompt."""
        self._first_observation = True
        self._extracted_system_prompt = False
        self.history = Chat(messages=[])

    async def act(
        self,
        observation: Chat,
        available_tool_schemas: list[dict[str, Any]] | None = None,
    ) -> tuple[list[Chat], AssistantMessage]:
        if self._first_observation:
            self._first_observation = False
            observation = self._extract_system_prompt_from_observation(observation)

        if observation.messages:
            self.history += observation

        response = await self.generate_output(available_tool_schemas=available_tool_schemas)
        return [self.get_history_view()], response

    def _extract_system_prompt_from_observation(self, observation: Chat) -> Chat:
        """Extract system prompt from first observation if present."""
        if not observation.messages:
            self._initialize_with_default_prompt()
            return observation

        first_msg = observation.messages[0]

        if isinstance(first_msg, SystemMessage):
            system_prompt_content = first_msg.content
            if isinstance(system_prompt_content, list):
                text_parts = [part.get("text", "") for part in system_prompt_content if part.get("type") == "text"]
                system_prompt_content = " ".join(text_parts)

            self.system_prompt = system_prompt_content
            self._extracted_system_prompt = True
            self.history = Chat(messages=[first_msg])
            logger.info(f"Extracted dynamic system prompt ({len(system_prompt_content)} chars)")
            return Chat(messages=observation.messages[1:])

        self._initialize_with_default_prompt()
        return observation

    def _initialize_with_default_prompt(self) -> None:
        """Initialize history with default system prompt."""
        self.history = Chat(messages=[SystemMessage(content=self.system_prompt)])

init_after_reset() #

Reset for new episode. Wait for first observation to extract system prompt.

Source code in src/agoge/agent/dynamic_prompt_agent.py
40
41
42
43
44
def init_after_reset(self) -> None:
    """Reset for new episode. Wait for first observation to extract system prompt."""
    self._first_observation = True
    self._extracted_system_prompt = False
    self.history = Chat(messages=[])

DynamicPromptReactAgent #

Bases: ReactAgent

ReAct agent that extracts system prompt from first observation.

This agent combines: 1. ReAct loop for text-based tool calling 2. Dynamic system prompt extraction

Source code in src/agoge/agent/dynamic_prompt_react_agent.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
class DynamicPromptReactAgent(ReactAgent):
    """ReAct agent that extracts system prompt from first observation.

    This agent combines:
    1. ReAct loop for text-based tool calling
    2. Dynamic system prompt extraction
    """

    def __init__(
        self,
        *args,
        system_prompt: str = "",  # Default will be overridden
        **kwargs,
    ) -> None:
        super().__init__(*args, system_prompt=system_prompt, **kwargs)
        self._first_observation = True

    def init_after_reset(self) -> None:
        """Reset state for new episode."""
        self._first_observation = True
        self.history = Chat(messages=[])

    async def act(
        self,
        observation: Chat,
        available_tool_schemas: list[dict[str, Any]] | None = None,
    ) -> tuple[list[Chat], AssistantMessage]:
        """Process observation, extracting system prompt if this is the first observation."""
        if self._first_observation:
            self._first_observation = False
            observation = self._extract_system_prompt_from_observation(observation)

        if observation.messages:
            self.history += observation

        response = await self.generate_output(available_tool_schemas=available_tool_schemas)
        return [self.get_history_view()], response

    def _extract_system_prompt_from_observation(self, observation: Chat) -> Chat:
        """Extract system prompt from first observation if present.

        Args:
            observation: First observation from environment

        Returns:
            Observation with system message removed (if it was extracted)
        """
        if not observation.messages:
            self._initialize_with_default_prompt()
            return observation

        first_msg = observation.messages[0]

        if isinstance(first_msg, SystemMessage):
            # Extract system prompt content
            system_prompt_content = first_msg.content
            if isinstance(system_prompt_content, list):
                text_parts = [part.get("text", "") for part in system_prompt_content if part.get("type") == "text"]
                system_prompt_content = " ".join(text_parts)

            self.system_prompt = system_prompt_content
            self.history = Chat(messages=[first_msg])
            logger.info(f"Extracted dynamic system prompt ({len(system_prompt_content)} chars)")

            # Return observation without the system message
            return Chat(messages=observation.messages[1:])

        # No system message found, use default
        self._initialize_with_default_prompt()
        return observation

    def _initialize_with_default_prompt(self) -> None:
        """Initialize history with default system prompt."""
        self.history = Chat(messages=[SystemMessage(content=self.system_prompt)])

act(observation, available_tool_schemas=None) async #

Process observation, extracting system prompt if this is the first observation.

Source code in src/agoge/agent/dynamic_prompt_react_agent.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
async def act(
    self,
    observation: Chat,
    available_tool_schemas: list[dict[str, Any]] | None = None,
) -> tuple[list[Chat], AssistantMessage]:
    """Process observation, extracting system prompt if this is the first observation."""
    if self._first_observation:
        self._first_observation = False
        observation = self._extract_system_prompt_from_observation(observation)

    if observation.messages:
        self.history += observation

    response = await self.generate_output(available_tool_schemas=available_tool_schemas)
    return [self.get_history_view()], response

init_after_reset() #

Reset state for new episode.

Source code in src/agoge/agent/dynamic_prompt_react_agent.py
30
31
32
33
def init_after_reset(self) -> None:
    """Reset state for new episode."""
    self._first_observation = True
    self.history = Chat(messages=[])

ReactAgent #

Bases: Agent

ReAct agent following ARE's implementation pattern.

This agent uses the ReAct (Reasoning + Acting) loop: - Model outputs: Thought + Action (JSON format) - Environment provides: Observation

Source code in src/agoge/agent/react_agent.py
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
class ReactAgent(Agent):
    """ReAct agent following ARE's implementation pattern.

    This agent uses the ReAct (Reasoning + Acting) loop:
    - Model outputs: Thought + Action (JSON format)
    - Environment provides: Observation
    """

    def __init__(
        self,
        inference_mngr: ActorHandle[InferenceManager],
        *,
        model: str,
        history_messages_limit: dict[MessageLabel, int] | None = None,
        system_prompt: str = "You are a helpful assistant.",  # Default will be overridden
        message_label: MessageLabel = MessageLabel.AGENT,
        request_overrides: dict[str, Any] | None = None,
    ) -> None:
        super().__init__(inference_mngr=inference_mngr)
        self.model = model
        self.history_messages_limit = history_messages_limit or {}
        self.system_prompt = system_prompt
        self.message_label = message_label
        self.request_overrides = request_overrides or {}

        self.init_after_reset()

    def init_after_reset(self) -> None:
        self.history = Chat(messages=[SystemMessage(content=self.system_prompt)])
        # Tracing: track response flow from previous vllm.inference for chaining
        self._trace_prev_response_flow_id = None
        self._trace_last_response_flow_id = None  # For termination at traj_out

    def get_history_view(self) -> Chat:
        return self.history.history_view(limits=self.history_messages_limit)

    def _normalize_content(self, raw_content: str | list | None) -> list[TextPart]:
        """Normalize raw content from API response into TextPart objects.

        Args:
            raw_content: Raw content from message payload (string, list, or None)

        Returns:
            List of TextPart objects with Python boolean strings normalized to JSON format
        """
        if isinstance(raw_content, str):
            normalized = raw_content.replace("False", "false").replace("True", "true")
            return [TextPart(text=normalized)]
        elif isinstance(raw_content, list):
            content_parts = []
            for chunk in raw_content:
                if chunk.get("type") == "text" and "text" in chunk:
                    text = chunk["text"]
                    text = text.replace("False", "false").replace("True", "true")
                    content_parts.append(TextPart(text=text))
            return content_parts
        else:
            return []

    def _truncate_content_parts(self, content_parts: list[TextPart]) -> list[TextPart]:
        """Truncate content parts to first action only.

        Args:
            content_parts: Original content parts from API response

        Returns:
            Truncated content parts containing only the first action
        """
        if not content_parts:
            return []

        truncated_parts = []
        for part in content_parts:
            truncated_text = truncate_to_first_action(part.text)
            truncated_parts.append(TextPart(text=truncated_text))
        return truncated_parts

    def _parse_tool_calls_from_content(self, content_parts: list[TextPart]) -> list[ToolFunctionCall]:
        """Parse ReAct format content to extract tool calls.

        Args:
            content_parts: Content parts to parse (should be truncated first)

        Returns:
            List of parsed tool calls
        """
        tool_calls = []
        if content_parts:
            for part in content_parts:
                parsed_calls = parse_react_response(part.text)
                tool_calls.extend(parsed_calls)

        if tool_calls:
            tool_names = [tc.function.get("name", "unknown") for tc in tool_calls]
            logger.debug(f"ReAct agent parsed {len(tool_calls)} tool call(s): {', '.join(tool_names)}")
        else:
            logger.debug("ReAct agent: No tool calls found in response")

        return tool_calls

    async def generate_output(
        self,
        available_tool_schemas: list[dict[str, Any]] | None = None,
        response_format: type[BaseModel] | None = None,
        append_assistant_message: bool = True,
    ) -> AssistantMessage:
        """Generate output using ReAct format."""
        chat = self.get_history_view()

        request_payload: dict[str, Any] = {
            "messages": chat,
            "model": self.model,
            "stop": ["<end_action>", "Observation:"],
        }

        # Don't send tool_schemas for ReAct - tools are described in system prompt
        if available_tool_schemas:
            logger.debug(
                f"ReAct agent has {len(available_tool_schemas)} tools available "
                "(described in system prompt, not sent to API)"
            )

        if response_format is not None:
            request_payload["response_format"] = response_format

        request_payload.update(self.request_overrides)
        request = InferenceRequest(**request_payload)

        logger.debug(f"Sending ReAct inference request to model: {self.model}")

        # Create flow ID for try->dispatch connection
        try_dispatch_flow_id = obs.create_flow_id("try_dispatch")

        with obs.retry_span_safe(
            self.tracer,
            "agent.try_inference",
            category="inference",
            flow_ids=try_dispatch_flow_id,
            terminating_flow_ids=self._trace_prev_response_flow_id,
            max_attempts=1,
            model=self.model,
            semantic="agent_inference_attempt",
        ) as retry:
            # Emit dispatch instant and setup request flow IDs
            obs.emit_inference_dispatch(self.tracer, request, try_dispatch_flow_id, self.model)

            # Start remote call (returns ObjectRef immediately)
            response_ref = self.inference_mngr.create_chat_completion.remote(request)

            # Await happens outside the micro-span
            response: ChatCompletion = await response_ref

            # Record inference response receipt and get chain flow for next iteration
            _trace_chain_flow_id = obs.record_inference_response(
                self.tracer, request.trace_response_flow_id, self.model
            )

            # Process response content
            message_payload = response.choices[0].message.model_dump()
            content_parts = self._normalize_content(message_payload.get("content"))
            truncated_content_parts = self._truncate_content_parts(content_parts)
            tool_calls = self._parse_tool_calls_from_content(truncated_content_parts)

            retry.set_status("success", tool_count=len(tool_calls) if tool_calls else 0)

        assistant_message = AssistantMessage(
            content=truncated_content_parts if truncated_content_parts else None,
            tool_calls=tool_calls if tool_calls else None,
            label=self.message_label,
        )

        if append_assistant_message:
            self.history = self.history + Chat(messages=[assistant_message])

        # Update flow tracking for chaining (use chain_flow, not response_flow)
        # response_flow was already terminated at receive_inference, chain_flow continues
        self._trace_prev_response_flow_id = _trace_chain_flow_id
        self._trace_last_response_flow_id = _trace_chain_flow_id

        return assistant_message

    @obs.traced_method("agent.act", category="inference")
    async def act(
        self,
        observation: Chat,
        available_tool_schemas: list[dict[str, Any]] | None,
    ) -> tuple[list[Chat], AssistantMessage]:
        """Process observation and generate action using ReAct format."""
        self.history += observation
        response = await self.generate_output(available_tool_schemas=available_tool_schemas)
        return [self.get_history_view()], response

act(observation, available_tool_schemas) async #

Process observation and generate action using ReAct format.

Source code in src/agoge/agent/react_agent.py
394
395
396
397
398
399
400
401
402
403
@obs.traced_method("agent.act", category="inference")
async def act(
    self,
    observation: Chat,
    available_tool_schemas: list[dict[str, Any]] | None,
) -> tuple[list[Chat], AssistantMessage]:
    """Process observation and generate action using ReAct format."""
    self.history += observation
    response = await self.generate_output(available_tool_schemas=available_tool_schemas)
    return [self.get_history_view()], response

generate_output(available_tool_schemas=None, response_format=None, append_assistant_message=True) async #

Generate output using ReAct format.

Source code in src/agoge/agent/react_agent.py
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
async def generate_output(
    self,
    available_tool_schemas: list[dict[str, Any]] | None = None,
    response_format: type[BaseModel] | None = None,
    append_assistant_message: bool = True,
) -> AssistantMessage:
    """Generate output using ReAct format."""
    chat = self.get_history_view()

    request_payload: dict[str, Any] = {
        "messages": chat,
        "model": self.model,
        "stop": ["<end_action>", "Observation:"],
    }

    # Don't send tool_schemas for ReAct - tools are described in system prompt
    if available_tool_schemas:
        logger.debug(
            f"ReAct agent has {len(available_tool_schemas)} tools available "
            "(described in system prompt, not sent to API)"
        )

    if response_format is not None:
        request_payload["response_format"] = response_format

    request_payload.update(self.request_overrides)
    request = InferenceRequest(**request_payload)

    logger.debug(f"Sending ReAct inference request to model: {self.model}")

    # Create flow ID for try->dispatch connection
    try_dispatch_flow_id = obs.create_flow_id("try_dispatch")

    with obs.retry_span_safe(
        self.tracer,
        "agent.try_inference",
        category="inference",
        flow_ids=try_dispatch_flow_id,
        terminating_flow_ids=self._trace_prev_response_flow_id,
        max_attempts=1,
        model=self.model,
        semantic="agent_inference_attempt",
    ) as retry:
        # Emit dispatch instant and setup request flow IDs
        obs.emit_inference_dispatch(self.tracer, request, try_dispatch_flow_id, self.model)

        # Start remote call (returns ObjectRef immediately)
        response_ref = self.inference_mngr.create_chat_completion.remote(request)

        # Await happens outside the micro-span
        response: ChatCompletion = await response_ref

        # Record inference response receipt and get chain flow for next iteration
        _trace_chain_flow_id = obs.record_inference_response(
            self.tracer, request.trace_response_flow_id, self.model
        )

        # Process response content
        message_payload = response.choices[0].message.model_dump()
        content_parts = self._normalize_content(message_payload.get("content"))
        truncated_content_parts = self._truncate_content_parts(content_parts)
        tool_calls = self._parse_tool_calls_from_content(truncated_content_parts)

        retry.set_status("success", tool_count=len(tool_calls) if tool_calls else 0)

    assistant_message = AssistantMessage(
        content=truncated_content_parts if truncated_content_parts else None,
        tool_calls=tool_calls if tool_calls else None,
        label=self.message_label,
    )

    if append_assistant_message:
        self.history = self.history + Chat(messages=[assistant_message])

    # Update flow tracking for chaining (use chain_flow, not response_flow)
    # response_flow was already terminated at receive_inference, chain_flow continues
    self._trace_prev_response_flow_id = _trace_chain_flow_id
    self._trace_last_response_flow_id = _trace_chain_flow_id

    return assistant_message

SimpleAgent #

Bases: Agent

Minimal agent that routes every request through the InferenceManager.

Source code in src/agoge/agent/simple_agent.py
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
class SimpleAgent(Agent):
    """Minimal agent that routes every request through the ``InferenceManager``."""

    def __init__(
        self,
        inference_mngr: ActorHandle[InferenceManager],
        *,
        model: str,
        history_messages_limit: dict[MessageLabel, int] | None = None,
        system_prompt: str = "You are a helpful assistant.",
        message_label: MessageLabel = MessageLabel.AGENT,
        request_overrides: dict[str, Any] | None = None,
    ) -> None:
        super().__init__(inference_mngr=inference_mngr)
        self.model = model
        self.history_messages_limit = history_messages_limit or {}
        self.system_prompt = system_prompt
        self.message_label = message_label
        self.request_overrides = request_overrides or {}

        self.init_after_reset()

    def init_after_reset(self) -> None:
        self.history = Chat(messages=[SystemMessage(content=self.system_prompt)])
        # Tracing: track response flow from previous vllm.inference for chaining
        self._trace_prev_response_flow_id = None
        self._trace_last_response_flow_id = None  # For termination at traj_out

    def get_history_view(self) -> Chat:
        return self.history.history_view(limits=self.history_messages_limit)

    async def generate_output(
        self,
        available_tool_schemas: list[dict[str, Any]] | None = None,
        response_format: type[BaseModel] | None = None,
        append_assistant_message: bool = True,
    ) -> AssistantMessage:
        chat = self.get_history_view()

        request_payload: dict[str, Any] = {
            "messages": chat,
            "model": self.model,
        }

        if available_tool_schemas:
            request_payload["tool_schemas"] = available_tool_schemas
            logger.debug(f"Generating output with {len(available_tool_schemas)} tools available")

        if response_format is not None:
            request_payload["response_format"] = response_format

        request_payload.update(self.request_overrides)

        request = InferenceRequest(**request_payload)

        logger.debug(f"Sending inference request to model: {self.model}")

        # Create flow ID for try->dispatch connection
        try_dispatch_flow_id = obs.create_flow_id("try_dispatch")

        with obs.retry_span_safe(
            self.tracer,
            "agent.try_inference",
            category="inference",
            flow_ids=try_dispatch_flow_id,
            terminating_flow_ids=self._trace_prev_response_flow_id,
            max_attempts=1,
            model=self.model,
            semantic="agent_inference_attempt",
        ) as retry:
            # Emit dispatch instant and setup request flow IDs
            obs.emit_inference_dispatch(self.tracer, request, try_dispatch_flow_id, self.model)

            # Start remote call (returns ObjectRef immediately)
            response_ref = self.inference_mngr.create_chat_completion.remote(request)

            # Await happens outside the micro-span
            response: ChatCompletion = await response_ref

            # Record inference response receipt and get chain flow for next iteration
            _trace_chain_flow_id = obs.record_inference_response(
                self.tracer, request.trace_response_flow_id, self.model
            )

            message = response.choices[0].message
            message_payload = message.model_dump()

            content_parts = _normalise_content(message_payload.get("content"))
            tool_calls = _normalise_tool_calls(message_payload.get("tool_calls"))

            if tool_calls:
                tool_names = [tc.function.get("name", "unknown") for tc in tool_calls]
                logger.debug(f"Received response with {len(tool_calls)} tool call(s): {', '.join(tool_names)}")

            retry.set_status("success", tool_count=len(tool_calls) if tool_calls else 0)

        assistant_message = AssistantMessage(
            content=content_parts if content_parts else None,
            tool_calls=tool_calls if tool_calls else None,
            label=self.message_label,
        )

        if append_assistant_message:
            self.history = self.history + Chat(messages=[assistant_message])

        # Update flow tracking for chaining (use chain_flow, not response_flow)
        # response_flow was already terminated at receive_inference, chain_flow continues
        self._trace_prev_response_flow_id = _trace_chain_flow_id
        self._trace_last_response_flow_id = _trace_chain_flow_id

        return assistant_message

    @obs.traced_method("agent.act", category="inference")
    async def act(
        self,
        observation: Chat,
        available_tool_schemas: list[dict[str, Any]] | None,
    ) -> tuple[list[Chat], AssistantMessage]:
        self.history += observation
        response = await self.generate_output(available_tool_schemas=available_tool_schemas)
        return [self.get_history_view()], response