Skip to content

generation

Generation

Generation(
    server: BaseServer,
    args: GenArgs,
    *,
    max_relay_rounds: int = 0,
    mock_response: Optional[
        Union[CompletionResponse, str]
    ] = None,
    llm_executor_type: ExecutorType = ExecutorType.LLM_THREAD_POOL,
    lazy_eval: bool = False,
    _ctx: Optional[PromptContext] = None,
    **kwargs: Any
)

Bases: Generic[M]

Represents a generation call to the model.

Parameters:

  • server (BaseServer) –

    An LLM server where the generation request will be sent.

  • args (GenArgs) –

    The arguments of the generation call.

  • max_relay_rounds (int, default: 0 ) –

    the maximum number of relay rounds to continue the unfinished text generation.

  • mock_response (Optional[Union[CompletionResponse, str]], default: None ) –

    A mock response for the generation call.

  • llm_executor_type (ExecutorType, default: LLM_THREAD_POOL ) –

    The type of the executor to run the LLM call.

  • lazy_eval (bool, default: False ) –

    If True, the generation call will be evaluated lazily.

  • _ctx (Optional[PromptContext], default: None ) –

    The prompt context filled automatically by the APPL function.

  • **kwargs (Any, default: {} ) –

    Extra arguments for the generation call.

Source code in src/appl/core/generation.py
def __init__(
    self,
    server: BaseServer,
    args: GenArgs,
    *,
    max_relay_rounds: int = 0,
    mock_response: Optional[Union[CompletionResponse, str]] = None,
    llm_executor_type: ExecutorType = ExecutorType.LLM_THREAD_POOL,
    lazy_eval: bool = False,
    _ctx: Optional[PromptContext] = None,
    **kwargs: Any,
    # kwargs used for extra args for the create method
) -> None:
    """Initialize the Generation object.

    Args:
        server: An LLM server where the generation request will be sent.
        args: The arguments of the generation call.
        max_relay_rounds: the maximum number of relay rounds to continue the unfinished text generation.
        mock_response: A mock response for the generation call.
        llm_executor_type: The type of the executor to run the LLM call.
        lazy_eval: If True, the generation call will be evaluated lazily.
        _ctx: The prompt context filled automatically by the APPL function.
        **kwargs: Extra arguments for the generation call.
    """
    # name needs to be unique and ordered, so it has to be generated in the main thread
    gen_name_prefix = get_gen_name_prefix()
    # take the value before increment
    self._cnt = inc_thread_local(f"{gen_name_prefix}_gen_cnt") - 1
    if gen_name_prefix is None:
        self._id = f"@gen_{self._cnt}"
    else:
        self._id = f"@{gen_name_prefix}_gen_{self._cnt}"

    self._server = server
    self._model_name = server.model_name
    self._args = args
    self._max_relay_rounds = max_relay_rounds
    self._mock_response = mock_response
    self._llm_executor_type = llm_executor_type
    self._lazy_eval = lazy_eval
    self._ctx = _ctx
    self._extra_args = kwargs
    self._num_raw_completions = 0
    self._cached_response: Optional[CompletionResponse] = None

    add_to_trace(GenerationInitEvent(name=self.id))
    log_llm_call_args = global_vars.configs.settings.logging.display.llm_call_args
    if log_llm_call_args:
        logger.info(
            f"Call generation [{self.id}] with args: {args} and kwargs: {kwargs}"
        )

    if isinstance(mock_response, CompletionResponse):

        def get_response() -> CompletionResponse:
            return mock_response

        self._call = self._wrap_response(get_response)
    else:
        if mock_response:
            # use litellm's mock response
            kwargs.update({"mock_response": mock_response})
        self._call = self._wrap_response(self._call_llm())

    # tools
    self._tools: Sequence[BaseTool] = args.tools
    self._name2tools = {tool.name: tool for tool in self._tools}

id property

id: str

The unique ID of the generation.

is_message property

is_message: bool

Whether the response is a text message.

is_obj property

is_obj: bool

Whether the response is an object.

is_tool_call property

is_tool_call: bool

Whether the response is a tool call.

message property

message: Optional[str]

The message of the response.

response property

The response of the generation call.

response_obj property

response_obj: M

The object of the response.

response_type property

response_type: ResponseType

The type of the response.

results property

results: Union[M, str, List[ToolCall]]

The results of the response.

str_future property

str_future: StringFuture

The StringFuture representation of the response.

text_stream property

text_stream

Get the response of the generation as a text stream.

tool_calls property

tool_calls: List[ToolCall]

The tool calls of the response.

as_prompt

as_prompt() -> Union[AIMessage, StringFuture]

Get the response of the generation as a promptable object.

Source code in src/appl/core/generation.py
def as_prompt(self) -> Union[AIMessage, StringFuture]:
    """Get the response of the generation as a promptable object."""
    if self._args.tools:
        if self.is_tool_call:
            return AIMessage(tool_calls=self.tool_calls)
    # return a future object of value: str(self._call()), without blocking
    return StringFuture(CallFuture(self._call))

run_tool_calls

run_tool_calls(
    filter_fn: Optional[
        Callable[[List[ToolCall]], List[ToolCall]]
    ] = None,
    parallel: bool = False,
    executor_type: ExecutorType = ExecutorType.GENERAL_THREAD_POOL,
    log_results: Optional[bool] = None,
) -> List[ToolMessage]

Run all tool calls in the generation and return the results.

Parameters:

  • filter_fn (Optional[Callable[[List[ToolCall]], List[ToolCall]]], default: None ) –

    A function that takes a list of ToolCall objects and returns a filtered list of ToolCall objects. This function can be used to filter the tool calls that will be run.

  • parallel (bool, default: False ) –

    If True, run the tool calls in parallel. Default to False.

  • executor_type (ExecutorType, default: GENERAL_THREAD_POOL ) –

    The type of the executor to run the tool calls, can be "general_thread_pool", "general_process_pool", "new_thread" or "new_process".

  • log_results (Optional[bool], default: None ) –

    If True, log the results of the tool calls. Note This will wait for the results to be ready. Default to use the setting in configs.

Returns:

Source code in src/appl/core/generation.py
def run_tool_calls(
    self,
    filter_fn: Optional[Callable[[List[ToolCall]], List[ToolCall]]] = None,
    parallel: bool = False,
    executor_type: ExecutorType = ExecutorType.GENERAL_THREAD_POOL,
    log_results: Optional[bool] = None,
) -> List[ToolMessage]:
    """Run all tool calls in the generation and return the results.

    Args:
        filter_fn:
            A function that takes a list of ToolCall objects and returns
            a filtered list of ToolCall objects. This function can be
            used to filter the tool calls that will be run.
        parallel: If True, run the tool calls in parallel. Default to False.
        executor_type:
            The type of the executor to run the tool calls, can be
            "general_thread_pool", "general_process_pool", "new_thread" or
            "new_process".
        log_results:
            If True, log the results of the tool calls. Note This will wait for
            the results to be ready. Default to use the setting in configs.

    Returns:
        A list of ToolMessage objects.
    """
    if not self.is_tool_call:
        raise ValueError("Error: The Generation is not a tool call")
    if log_results is None:
        log_results = global_vars.configs.settings.logging.display.tool_results
    tool_calls = self.tool_calls
    if filter_fn:
        tool_calls = filter_fn(tool_calls)
    messages = []
    for tc in tool_calls:
        role = MessageRole(MessageRoleType.TOOL, tc.name)
        try:
            tool_result = self._call_tool(
                tc.name, tc.args, parallel=parallel, executor_type=executor_type
            )
            msg = ToolMessage(
                tool_result, role=role, tool_call_id=tc.id, has_error=False
            )
        except Exception as e:
            logger.error(f"Error running tool call: {tc.name}({tc.args})")
            logger.error(e)
            msg = ToolMessage(str(e), role=role, tool_call_id=tc.id, has_error=True)
        messages.append(msg)
    if log_results:  # this will wait for the results to be ready
        for msg in messages:
            logger.info(f"Tool call result: {msg}")
    return messages

get_gen_name_prefix

get_gen_name_prefix() -> Optional[str]

Get the prefix for generation names in the current thread.

Source code in src/appl/core/generation.py
def get_gen_name_prefix() -> Optional[str]:
    """Get the prefix for generation names in the current thread."""
    gen_name_prefix = get_thread_local(APPL_GEN_NAME_PREFIX_KEY, None)
    if gen_name_prefix is None:
        thread_name = threading.current_thread().name
        if thread_name != "MainThread":
            gen_name_prefix = thread_name
    return gen_name_prefix

set_gen_name_prefix

set_gen_name_prefix(prefix: str) -> None

Set the prefix for generation names in the current thread.

Source code in src/appl/core/generation.py
def set_gen_name_prefix(prefix: str) -> None:
    """Set the prefix for generation names in the current thread."""
    set_thread_local(APPL_GEN_NAME_PREFIX_KEY, prefix)