Text2Speech

`Text2Speech`

Source code in Agent/modules/text_to_speech/text2speech.py

class Text2Speech:

    def __init__(self, model_name: str = "openai", to_s3: bool = False):
        """
        Initialize the STT object

        Args:
            model_name (str): The name of the model to use
            to_s3 (bool): If the audio file should be uploaded to S3
        """

        self.tts = None
        self.model_name = model_name
        self.to_s3 = to_s3

    def handle_task(self, task: Task) -> Task:
        """
        Args:
            task (Task): The task to handle

        Returns:
            The task with the result
        """
        TimeLogger.log_task(task, "start_text2speech")
        text2speech_parameters = Text2SpeechParameters(**task.parameters)
        logger.info(f"Text to speech: {text2speech_parameters.text}")

        if self.model_name == "openai":
            return self.text_to_speech_openai(
                task=task, task_param=text2speech_parameters
            )
        TimeLogger.log_task(task, "end_text2speech")
        return task

    def text_to_speech_openai(
        self, task: Task, task_param: Text2SpeechParameters
    ) -> Task:
        """
        Convert the text to speech using OpenAI API
        Args:
            task (Task): The task to handle
            task_param (Text2SpeechParameters): The parameters for the task

        Returns:

        """
        result_profile = {}
        latency_profile = {}
        audio_file_path = DATA_DIR / "tts" / f"{task.id}.mp3"
        # if folder does not exist, create it
        audio_file_path.parent.mkdir(parents=True, exist_ok=True)
        audio_file_path = audio_file_path.as_posix()

        client = OpenAI()
        with time_tracker("openai_tts", latency_profile, TrackType.MODEL.value):
            response = client.audio.speech.create(
                model="tts-1",
                voice="alloy",
                input=task_param.text,
            )
        with time_tracker("save_audio", latency_profile, TrackType.TRANSFER.value):
            response.stream_to_file(audio_file_path)

        result_profile["audio_file_path"] = audio_file_path.split("/")[-1]

        if self.to_s3:
            with time_tracker("to_s3", latency_profile, TrackType.TRANSFER.value):
                self.upload_to_s3(audio_file_path, f"tts/{task.id}.mp3")

        task.result_status = ResultStatus.completed.value
        task.result_json.result_profile.update(result_profile)
        task.result_json.latency_profile.update(latency_profile)
        return task

    @staticmethod
    def upload_to_s3(file_path: str, s3_key: str):
        """
        Upload the file to S3
        Args:
            file_path (str): The path to the file
            s3_key (str): The key to use in S3

        """
        s3_client = BOTO3_SESSION.client("s3")
        s3_client.upload_file(
            file_path,
            S3_BUCKET,
            s3_key,
        )

`init(model_name='openai', to_s3=False)`

Initialize the STT object

Parameters:

Name	Type	Description	Default
`model_name`	`str`	The name of the model to use	`'openai'`
`to_s3`	`bool`	If the audio file should be uploaded to S3	`False`

Source code in Agent/modules/text_to_speech/text2speech.py

def __init__(self, model_name: str = "openai", to_s3: bool = False):
    """
    Initialize the STT object

    Args:
        model_name (str): The name of the model to use
        to_s3 (bool): If the audio file should be uploaded to S3
    """

    self.tts = None
    self.model_name = model_name
    self.to_s3 = to_s3

`handle_task(task)`

Parameters:

Name	Type	Description	Default
`task`	`Task`	The task to handle	required

Returns:

Type	Description
`Task`	The task with the result

Source code in Agent/modules/text_to_speech/text2speech.py

def handle_task(self, task: Task) -> Task:
    """
    Args:
        task (Task): The task to handle

    Returns:
        The task with the result
    """
    TimeLogger.log_task(task, "start_text2speech")
    text2speech_parameters = Text2SpeechParameters(**task.parameters)
    logger.info(f"Text to speech: {text2speech_parameters.text}")

    if self.model_name == "openai":
        return self.text_to_speech_openai(
            task=task, task_param=text2speech_parameters
        )
    TimeLogger.log_task(task, "end_text2speech")
    return task

`text_to_speech_openai(task, task_param)`

Convert the text to speech using OpenAI API Args: task (Task): The task to handle task_param (Text2SpeechParameters): The parameters for the task

Returns:

Source code in Agent/modules/text_to_speech/text2speech.py

def text_to_speech_openai(
    self, task: Task, task_param: Text2SpeechParameters
) -> Task:
    """
    Convert the text to speech using OpenAI API
    Args:
        task (Task): The task to handle
        task_param (Text2SpeechParameters): The parameters for the task

    Returns:

    """
    result_profile = {}
    latency_profile = {}
    audio_file_path = DATA_DIR / "tts" / f"{task.id}.mp3"
    # if folder does not exist, create it
    audio_file_path.parent.mkdir(parents=True, exist_ok=True)
    audio_file_path = audio_file_path.as_posix()

    client = OpenAI()
    with time_tracker("openai_tts", latency_profile, TrackType.MODEL.value):
        response = client.audio.speech.create(
            model="tts-1",
            voice="alloy",
            input=task_param.text,
        )
    with time_tracker("save_audio", latency_profile, TrackType.TRANSFER.value):
        response.stream_to_file(audio_file_path)

    result_profile["audio_file_path"] = audio_file_path.split("/")[-1]

    if self.to_s3:
        with time_tracker("to_s3", latency_profile, TrackType.TRANSFER.value):
            self.upload_to_s3(audio_file_path, f"tts/{task.id}.mp3")

    task.result_status = ResultStatus.completed.value
    task.result_json.result_profile.update(result_profile)
    task.result_json.latency_profile.update(latency_profile)
    return task

`upload_to_s3(file_path, s3_key)` `staticmethod`

Upload the file to S3 Args: file_path (str): The path to the file s3_key (str): The key to use in S3

Source code in Agent/modules/text_to_speech/text2speech.py

@staticmethod
def upload_to_s3(file_path: str, s3_key: str):
    """
    Upload the file to S3
    Args:
        file_path (str): The path to the file
        s3_key (str): The key to use in S3

    """
    s3_client = BOTO3_SESSION.client("s3")
    s3_client.upload_file(
        file_path,
        S3_BUCKET,
        s3_key,
    )

Text2Speech

Text2Speech

__init__(model_name='openai', to_s3=False)

handle_task(task)

text_to_speech_openai(task, task_param)

upload_to_s3(file_path, s3_key) staticmethod

`Text2Speech`

`init(model_name='openai', to_s3=False)`

`handle_task(task)`

`text_to_speech_openai(task, task_param)`

`upload_to_s3(file_path, s3_key)` `staticmethod`