AdaptorWorker

`QuantizationLLMAdaptor`

This is the adaptor for the Quantization LLM model

Source code in Agent/modules/quantization_llm/adaptor_worker.py

class QuantizationLLMAdaptor:
    """
    This is the adaptor for the Quantization LLM model
    """

    def __init__(self, model_config: QuantizationLLMModelConfig):
        self.model_config = model_config
        self.model_path = model_config.model_path()
        self.llm = self.model_config.llm

    def create_completion(self, prompt: str) -> str:
        """
        Create completion for the given prompt
        Args:
            prompt (str): The prompt to generate completion for the model

        Returns:
            str: The completion generated by the model

        """

        output = self.llm(
            f"Q: {prompt} A: ",
            max_tokens=500,  # Generate up to 32 tokens, set to None to generate up to the end of the context window
            stop=[
                "Q:",
                "\n",
            ],  # Stop generating just before the model would generate a new question
            echo=True,  # Echo the prompt back in the output
        )
        logger.info(f"Response: {output}")
        return output

    def create_chat_completion(
        self,
        prompt: str = None,
        messages: List[Dict[str, str]] = None,
        tools: List[ChatCompletionTool] = None,
        tool_choice: ChatCompletionToolChoiceOption = None,
        *args,
        **kwargs,
    ):
        """
        Create chat completion for the given prompt and messages
        Args:
            prompt (str): The prompt to generate completion for the model
            messages (List[Dict[str, str]]): The messages to generate completion for the model
            tools (List[ChatCompletionTool]): The tools to use for chat completion
            tool_choice (ChatCompletionToolChoiceOption): The tool choice to use for chat completion
            *args:
            **kwargs:

        Returns:

        """
        if messages is not None:
            """
            This is trying to replicate passing all params chat completion provided via llama_cpp
            """

            logger.info(f"Creating chat completion for messages: {messages}")
            return self.llm.create_chat_completion(
                messages=messages, tools=tools, tool_choice=tool_choice
            )

        if prompt:
            """
            Simple version of it, without message "role" definition
            """

            res = self.llm.create_chat_completion(
                messages=[
                    {"role": "user", "content": prompt},
                ]
            )
            return res

        raise ValueError("Prompt or messages are required")

    def create_embedding(self, text: str) -> List[float]:
        """
        Create embedding for the given text
        Args:
            text (str): The text to generate embedding for

        Returns:
            List[float]: The embedding generated by the model

        """
        if text is None:
            raise ValueError("Text is required")

        logger.info(f"Creating embedding for text: {text}")
        return self.llm.create_embedding(text)

`create_chat_completion(prompt=None, messages=None, tools=None, tool_choice=None, *args, **kwargs)`

Create chat completion for the given prompt and messages Args: prompt (str): The prompt to generate completion for the model messages (List[Dict[str, str]]): The messages to generate completion for the model tools (List[ChatCompletionTool]): The tools to use for chat completion tool_choice (ChatCompletionToolChoiceOption): The tool choice to use for chat completion args: *kwargs:

Returns:

Source code in Agent/modules/quantization_llm/adaptor_worker.py

def create_chat_completion(
    self,
    prompt: str = None,
    messages: List[Dict[str, str]] = None,
    tools: List[ChatCompletionTool] = None,
    tool_choice: ChatCompletionToolChoiceOption = None,
    *args,
    **kwargs,
):
    """
    Create chat completion for the given prompt and messages
    Args:
        prompt (str): The prompt to generate completion for the model
        messages (List[Dict[str, str]]): The messages to generate completion for the model
        tools (List[ChatCompletionTool]): The tools to use for chat completion
        tool_choice (ChatCompletionToolChoiceOption): The tool choice to use for chat completion
        *args:
        **kwargs:

    Returns:

    """
    if messages is not None:
        """
        This is trying to replicate passing all params chat completion provided via llama_cpp
        """

        logger.info(f"Creating chat completion for messages: {messages}")
        return self.llm.create_chat_completion(
            messages=messages, tools=tools, tool_choice=tool_choice
        )

    if prompt:
        """
        Simple version of it, without message "role" definition
        """

        res = self.llm.create_chat_completion(
            messages=[
                {"role": "user", "content": prompt},
            ]
        )
        return res

    raise ValueError("Prompt or messages are required")

`create_completion(prompt)`

Create completion for the given prompt Args: prompt (str): The prompt to generate completion for the model

Returns:

Name	Type	Description
`str`	`str`	The completion generated by the model

Source code in Agent/modules/quantization_llm/adaptor_worker.py

def create_completion(self, prompt: str) -> str:
    """
    Create completion for the given prompt
    Args:
        prompt (str): The prompt to generate completion for the model

    Returns:
        str: The completion generated by the model

    """

    output = self.llm(
        f"Q: {prompt} A: ",
        max_tokens=500,  # Generate up to 32 tokens, set to None to generate up to the end of the context window
        stop=[
            "Q:",
            "\n",
        ],  # Stop generating just before the model would generate a new question
        echo=True,  # Echo the prompt back in the output
    )
    logger.info(f"Response: {output}")
    return output

`create_embedding(text)`

Create embedding for the given text Args: text (str): The text to generate embedding for

Returns:

Type	Description
`List[float]`	List[float]: The embedding generated by the model

Source code in Agent/modules/quantization_llm/adaptor_worker.py

def create_embedding(self, text: str) -> List[float]:
    """
    Create embedding for the given text
    Args:
        text (str): The text to generate embedding for

    Returns:
        List[float]: The embedding generated by the model

    """
    if text is None:
        raise ValueError("Text is required")

    logger.info(f"Creating embedding for text: {text}")
    return self.llm.create_embedding(text)