Skip to content

AdaptorWorker

QuantizationLLMAdaptor

This is the adaptor for the Quantization LLM model

Source code in Agent/modules/quantization_llm/adaptor_worker.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
class QuantizationLLMAdaptor:
    """
    This is the adaptor for the Quantization LLM model
    """

    def __init__(self, model_config: QuantizationLLMModelConfig):
        self.model_config = model_config
        self.model_path = model_config.model_path()
        self.llm = self.model_config.llm

    def create_completion(self, prompt: str) -> str:
        """
        Create completion for the given prompt
        Args:
            prompt (str): The prompt to generate completion for the model

        Returns:
            str: The completion generated by the model

        """

        output = self.llm(
            f"Q: {prompt} A: ",
            max_tokens=500,  # Generate up to 32 tokens, set to None to generate up to the end of the context window
            stop=[
                "Q:",
                "\n",
            ],  # Stop generating just before the model would generate a new question
            echo=True,  # Echo the prompt back in the output
        )
        logger.info(f"Response: {output}")
        return output

    def create_chat_completion(
        self,
        prompt: str = None,
        messages: List[Dict[str, str]] = None,
        tools: List[ChatCompletionTool] = None,
        tool_choice: ChatCompletionToolChoiceOption = None,
        *args,
        **kwargs,
    ):
        """
        Create chat completion for the given prompt and messages
        Args:
            prompt (str): The prompt to generate completion for the model
            messages (List[Dict[str, str]]): The messages to generate completion for the model
            tools (List[ChatCompletionTool]): The tools to use for chat completion
            tool_choice (ChatCompletionToolChoiceOption): The tool choice to use for chat completion
            *args:
            **kwargs:

        Returns:

        """
        if messages is not None:
            """
            This is trying to replicate passing all params chat completion provided via llama_cpp
            """

            logger.info(f"Creating chat completion for messages: {messages}")
            return self.llm.create_chat_completion(
                messages=messages, tools=tools, tool_choice=tool_choice
            )

        if prompt:
            """
            Simple version of it, without message "role" definition
            """

            res = self.llm.create_chat_completion(
                messages=[
                    {"role": "user", "content": prompt},
                ]
            )
            return res

        raise ValueError("Prompt or messages are required")

    def create_embedding(self, text: str) -> List[float]:
        """
        Create embedding for the given text
        Args:
            text (str): The text to generate embedding for

        Returns:
            List[float]: The embedding generated by the model

        """
        if text is None:
            raise ValueError("Text is required")

        logger.info(f"Creating embedding for text: {text}")
        return self.llm.create_embedding(text)

create_chat_completion(prompt=None, messages=None, tools=None, tool_choice=None, *args, **kwargs)

Create chat completion for the given prompt and messages Args: prompt (str): The prompt to generate completion for the model messages (List[Dict[str, str]]): The messages to generate completion for the model tools (List[ChatCompletionTool]): The tools to use for chat completion tool_choice (ChatCompletionToolChoiceOption): The tool choice to use for chat completion args: *kwargs:

Returns:

Source code in Agent/modules/quantization_llm/adaptor_worker.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
def create_chat_completion(
    self,
    prompt: str = None,
    messages: List[Dict[str, str]] = None,
    tools: List[ChatCompletionTool] = None,
    tool_choice: ChatCompletionToolChoiceOption = None,
    *args,
    **kwargs,
):
    """
    Create chat completion for the given prompt and messages
    Args:
        prompt (str): The prompt to generate completion for the model
        messages (List[Dict[str, str]]): The messages to generate completion for the model
        tools (List[ChatCompletionTool]): The tools to use for chat completion
        tool_choice (ChatCompletionToolChoiceOption): The tool choice to use for chat completion
        *args:
        **kwargs:

    Returns:

    """
    if messages is not None:
        """
        This is trying to replicate passing all params chat completion provided via llama_cpp
        """

        logger.info(f"Creating chat completion for messages: {messages}")
        return self.llm.create_chat_completion(
            messages=messages, tools=tools, tool_choice=tool_choice
        )

    if prompt:
        """
        Simple version of it, without message "role" definition
        """

        res = self.llm.create_chat_completion(
            messages=[
                {"role": "user", "content": prompt},
            ]
        )
        return res

    raise ValueError("Prompt or messages are required")

create_completion(prompt)

Create completion for the given prompt Args: prompt (str): The prompt to generate completion for the model

Returns:

Name Type Description
str str

The completion generated by the model

Source code in Agent/modules/quantization_llm/adaptor_worker.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def create_completion(self, prompt: str) -> str:
    """
    Create completion for the given prompt
    Args:
        prompt (str): The prompt to generate completion for the model

    Returns:
        str: The completion generated by the model

    """

    output = self.llm(
        f"Q: {prompt} A: ",
        max_tokens=500,  # Generate up to 32 tokens, set to None to generate up to the end of the context window
        stop=[
            "Q:",
            "\n",
        ],  # Stop generating just before the model would generate a new question
        echo=True,  # Echo the prompt back in the output
    )
    logger.info(f"Response: {output}")
    return output

create_embedding(text)

Create embedding for the given text Args: text (str): The text to generate embedding for

Returns:

Type Description
List[float]

List[float]: The embedding generated by the model

Source code in Agent/modules/quantization_llm/adaptor_worker.py
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def create_embedding(self, text: str) -> List[float]:
    """
    Create embedding for the given text
    Args:
        text (str): The text to generate embedding for

    Returns:
        List[float]: The embedding generated by the model

    """
    if text is None:
        raise ValueError("Text is required")

    logger.info(f"Creating embedding for text: {text}")
    return self.llm.create_embedding(text)