o
    "i2~                     @   s   d dl Z d dlZd dlmZ ddlmZ ddlmZm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ e e jdZG dd dZG dd dZdS )    N)JSONDecodeError   )ApiError)AsyncClientWrapperSyncClientWrapper)jsonable_encoder)remove_none_from_dict)RequestOptions)%PronunciationDictionaryVersionLocator)VoiceSettings.c                   @      e Zd ZdefddZddeeedddedeje	 deje d	ed
eje deje
 dejeje  deje deje fddZddeeedddedeje	 deje d	ed
eje deje
 dejeje  deje deje fddZdS )TextToSpeechClientclient_wrapperc                C   
   || _ d S N_client_wrapperselfr    r   d/var/www/html/voicebot/backend/venv/lib/python3.10/site-packages/elevenlabs/text_to_speech/client.py__init__      
zTextToSpeechClient.__init__Noptimize_streaming_latencyoutput_formatmodel_idvoice_settings!pronunciation_dictionary_locatorsrequest_optionsvoice_idr   r   textr   r   r   r   returnc                c   s   d|i}	|t ur||	d< |t ur||	d< |t ur||	d< | jjjdtj| j  ddt| tt	||d|d	urC|
d
i ni |d	u sR|
dd	u rVt|	ni t|	tt	|
di tt	i | j |d	urw|
di ni |d	ur|
dd	ur|
dndd|d	ur|
dnddB}
d|
j  krdk rn n|
 D ]}|V  q	 W d	   d	S |
  z|
 }W n ty   t|
j|
jdw t|
j|d1 sw   Y  d	S )u  
        Converts text into speech using a voice of your choice and returns audio.

        Parameters:
            - voice_id: str. Voice ID to be used, you can use https://api.elevenlabs.io/v1/voices to list all the available voices.

            - optimize_streaming_latency: typing.Optional[int]. You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
                                                                0 - default mode (no latency optimizations)
                                                                1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
                                                                2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
                                                                3 - max latency optimizations
                                                                4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).

                                                                Defaults to 0.
            - output_format: typing.Optional[str]. Output format of the generated audio. Must be one of:
                                                   mp3_22050_32 - output format, mp3 with 22.05kHz sample rate at 32kbps.
                                                   mp3_44100_32 - output format, mp3 with 44.1kHz sample rate at 32kbps.
                                                   mp3_44100_64 - output format, mp3 with 44.1kHz sample rate at 64kbps.
                                                   mp3_44100_96 - output format, mp3 with 44.1kHz sample rate at 96kbps.
                                                   mp3_44100_128 - default output format, mp3 with 44.1kHz sample rate at 128kbps.
                                                   mp3_44100_192 - output format, mp3 with 44.1kHz sample rate at 192kbps. Requires you to be subscribed to Creator tier or above.
                                                   pcm_16000 - PCM format (S16LE) with 16kHz sample rate.
                                                   pcm_22050 - PCM format (S16LE) with 22.05kHz sample rate.
                                                   pcm_24000 - PCM format (S16LE) with 24kHz sample rate.
                                                   pcm_44100 - PCM format (S16LE) with 44.1kHz sample rate. Requires you to be subscribed to Independent Publisher tier or above.
                                                   ulaw_8000 - μ-law format (sometimes written mu-law, often approximated as u-law) with 8kHz sample rate. Note that this format is commonly used for Twilio audio inputs.
            - text: str. The text that will get converted into speech.

            - model_id: typing.Optional[str]. Identifier of the model that will be used, you can query them using GET /v1/models. The model needs to have support for text to speech, you can check this using the can_do_text_to_speech property.

            - voice_settings: typing.Optional[VoiceSettings]. Voice settings overriding stored setttings for the given voice. They are applied only on the given request.

            - pronunciation_dictionary_locators: typing.Optional[typing.Sequence[PronunciationDictionaryVersionLocator]]. A list of pronunciation dictionary locators (id, version_id) to be applied to the text. They will be applied in order. You may have up to 3 locators per request

            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
        ---
        from elevenlabs import PronunciationDictionaryVersionLocator, VoiceSettings
        from elevenlabs.client import ElevenLabs

        client = ElevenLabs(
            api_key="YOUR_API_KEY",
        )
        client.text_to_speech.convert(
            voice_id="string",
            optimize_streaming_latency=1,
            output_format="string",
            text="string",
            model_id="string",
            voice_settings=VoiceSettings(
                stability=1.1,
                similarity_boost=1.1,
                style=1.1,
                use_speaker_boost=True,
            ),
            pronunciation_dictionary_locators=[
                PronunciationDictionaryVersionLocator(
                    pronunciation_dictionary_id="string",
                    version_id="string",
                )
            ],
        )
        r!   r   r   r   POST/v1/text-to-speech/r   r   Nadditional_query_parametersadditional_body_parametersadditional_headerstimeout_in_seconds<   r   max_retriesparamsjsonheaderstimeoutretriesr,      ,  status_codebodyOMITr   httpx_clientstreamurllibparseurljoinget_base_urlr   r   getget_headersr6   
iter_bytesreadr/   r   r   r!   r   r    r   r   r!   r   r   r   r   _request	_response_chunk_response_jsonr   r   r   convert   sx   L	
%*zTextToSpeechClient.convertc                c   s   d|i}	|t ur||	d< |t ur||	d< |t ur||	d< | jjjdtj| j  ddt| dtt	||d	|d
urD|
di ni |d
u sS|
dd
u rWt|	ni t|	tt	|
di tt	i | j |d
urx|
di ni |d
ur|
dd
ur|
dndd|d
ur|
dnddB}
d|
j  krdk rn n|
 D ]}|V  q	 W d
   d
S |
  z|
 }W n ty   t|
j|
jdw t|
j|d1 sw   Y  d
S )u  
        Converts text into speech using a voice of your choice and returns audio as an audio stream.

        Parameters:
            - voice_id: str. Voice ID to be used, you can use https://api.elevenlabs.io/v1/voices to list all the available voices.

            - optimize_streaming_latency: typing.Optional[int]. You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
                                                                0 - default mode (no latency optimizations)
                                                                1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
                                                                2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
                                                                3 - max latency optimizations
                                                                4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).

                                                                Defaults to 0.
            - output_format: typing.Optional[str]. Output format of the generated audio. Must be one of:
                                                   mp3_22050_32 - output format, mp3 with 22.05kHz sample rate at 32kbps.
                                                   mp3_44100_32 - output format, mp3 with 44.1kHz sample rate at 32kbps.
                                                   mp3_44100_64 - output format, mp3 with 44.1kHz sample rate at 64kbps.
                                                   mp3_44100_96 - output format, mp3 with 44.1kHz sample rate at 96kbps.
                                                   mp3_44100_128 - default output format, mp3 with 44.1kHz sample rate at 128kbps.
                                                   mp3_44100_192 - output format, mp3 with 44.1kHz sample rate at 192kbps. Requires you to be subscribed to Creator tier or above.
                                                   pcm_16000 - PCM format (S16LE) with 16kHz sample rate.
                                                   pcm_22050 - PCM format (S16LE) with 22.05kHz sample rate.
                                                   pcm_24000 - PCM format (S16LE) with 24kHz sample rate.
                                                   pcm_44100 - PCM format (S16LE) with 44.1kHz sample rate. Requires you to be subscribed to Independent Publisher tier or above.
                                                   ulaw_8000 - μ-law format (sometimes written mu-law, often approximated as u-law) with 8kHz sample rate. Note that this format is commonly used for Twilio audio inputs.
            - text: str. The text that will get converted into speech.

            - model_id: typing.Optional[str]. Identifier of the model that will be used, you can query them using GET /v1/models. The model needs to have support for text to speech, you can check this using the can_do_text_to_speech property.

            - voice_settings: typing.Optional[VoiceSettings]. Voice settings overriding stored setttings for the given voice. They are applied only on the given request.

            - pronunciation_dictionary_locators: typing.Optional[typing.Sequence[PronunciationDictionaryVersionLocator]]. A list of pronunciation dictionary locators (id, version_id) to be applied to the text. They will be applied in order. You may have up to 3 locators per request

            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
        ---
        from elevenlabs import PronunciationDictionaryVersionLocator, VoiceSettings
        from elevenlabs.client import ElevenLabs

        client = ElevenLabs(
            api_key="YOUR_API_KEY",
        )
        client.text_to_speech.convert_as_stream(
            voice_id="string",
            optimize_streaming_latency=1,
            output_format="string",
            text="string",
            model_id="string",
            voice_settings=VoiceSettings(
                stability=1.1,
                similarity_boost=1.1,
                style=1.1,
                use_speaker_boost=True,
            ),
            pronunciation_dictionary_locators=[
                PronunciationDictionaryVersionLocator(
                    pronunciation_dictionary_id="string",
                    version_id="string",
                )
            ],
        )
        r!   r   r   r   r#   r$   r%   /streamr&   Nr'   r(   r)   r*   r+   r   r,   r-   r3   r4   r5   r8   rD   r   r   r   convert_as_stream   sx   L	
%*z$TextToSpeechClient.convert_as_stream)__name__
__module____qualname__r   r   r9   strtypingOptionalintr   Sequencer
   r	   IteratorbytesrI   rK   r   r   r   r   r      r    	
 		r   c                   @   r   )AsyncTextToSpeechClientr   c                C   r   r   r   r   r   r   r   r   !  r   z AsyncTextToSpeechClient.__init__Nr   r    r   r   r!   r   r   r   r   r"   c                C  s  d|i}	|t ur||	d< |t ur||	d< |t ur||	d< | jjjdtj| j  ddt| tt	||d|d	urC|
d
i ni |d	u sR|
dd	u rVt|	ni t|	tt	|
di tt	i | j |d	urw|
di ni |d	ur|
dd	ur|
dndd|d	ur|
dndd4 I d	H M}
d|
j  krdk rn n|
 2 z	3 d	H W }|V  q6 	 W d	  I d	H  d	S |
 I d	H  z|
 }W n ty   t|
j|
jdw t|
j|d1 I d	H sw   Y  d	S )u
  
        Converts text into speech using a voice of your choice and returns audio.

        Parameters:
            - voice_id: str. Voice ID to be used, you can use https://api.elevenlabs.io/v1/voices to list all the available voices.

            - optimize_streaming_latency: typing.Optional[int]. You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
                                                                0 - default mode (no latency optimizations)
                                                                1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
                                                                2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
                                                                3 - max latency optimizations
                                                                4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).

                                                                Defaults to 0.
            - output_format: typing.Optional[str]. Output format of the generated audio. Must be one of:
                                                   mp3_22050_32 - output format, mp3 with 22.05kHz sample rate at 32kbps.
                                                   mp3_44100_32 - output format, mp3 with 44.1kHz sample rate at 32kbps.
                                                   mp3_44100_64 - output format, mp3 with 44.1kHz sample rate at 64kbps.
                                                   mp3_44100_96 - output format, mp3 with 44.1kHz sample rate at 96kbps.
                                                   mp3_44100_128 - default output format, mp3 with 44.1kHz sample rate at 128kbps.
                                                   mp3_44100_192 - output format, mp3 with 44.1kHz sample rate at 192kbps. Requires you to be subscribed to Creator tier or above.
                                                   pcm_16000 - PCM format (S16LE) with 16kHz sample rate.
                                                   pcm_22050 - PCM format (S16LE) with 22.05kHz sample rate.
                                                   pcm_24000 - PCM format (S16LE) with 24kHz sample rate.
                                                   pcm_44100 - PCM format (S16LE) with 44.1kHz sample rate. Requires you to be subscribed to Independent Publisher tier or above.
                                                   ulaw_8000 - μ-law format (sometimes written mu-law, often approximated as u-law) with 8kHz sample rate. Note that this format is commonly used for Twilio audio inputs.
            - text: str. The text that will get converted into speech.

            - model_id: typing.Optional[str]. Identifier of the model that will be used, you can query them using GET /v1/models. The model needs to have support for text to speech, you can check this using the can_do_text_to_speech property.

            - voice_settings: typing.Optional[VoiceSettings]. Voice settings overriding stored setttings for the given voice. They are applied only on the given request.

            - pronunciation_dictionary_locators: typing.Optional[typing.Sequence[PronunciationDictionaryVersionLocator]]. A list of pronunciation dictionary locators (id, version_id) to be applied to the text. They will be applied in order. You may have up to 3 locators per request

            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
        ---
        from elevenlabs import PronunciationDictionaryVersionLocator, VoiceSettings
        from elevenlabs.client import AsyncElevenLabs

        client = AsyncElevenLabs(
            api_key="YOUR_API_KEY",
        )
        await client.text_to_speech.convert(
            voice_id="string",
            optimize_streaming_latency=1,
            output_format="string",
            text="string",
            model_id="string",
            voice_settings=VoiceSettings(
                stability=1.1,
                similarity_boost=1.1,
                style=1.1,
                use_speaker_boost=True,
            ),
            pronunciation_dictionary_locators=[
                PronunciationDictionaryVersionLocator(
                    pronunciation_dictionary_id="string",
                    version_id="string",
                )
            ],
        )
        r!   r   r   r   r#   r$   r%   r&   Nr'   r(   r)   r*   r+   r   r,   r-   r3   r4   r5   r9   r   r:   r;   r<   r=   r>   r?   r   r   r@   rA   r6   aiter_bytesareadr/   r   r   r!   rD   r   r   r   rI   $  sz   L	
%*zAsyncTextToSpeechClient.convertc                C  s  d|i}	|t ur||	d< |t ur||	d< |t ur||	d< | jjjdtj| j  ddt| dtt	||d	|d
urD|
di ni |d
u sS|
dd
u rWt|	ni t|	tt	|
di tt	i | j |d
urx|
di ni |d
ur|
dd
ur|
dndd|d
ur|
dndd4 I d
H M}
d|
j  krdk rn n|
 2 z	3 d
H W }|V  q6 	 W d
  I d
H  d
S |
 I d
H  z|
 }W n ty   t|
j|
jdw t|
j|d1 I d
H sw   Y  d
S )u'  
        Converts text into speech using a voice of your choice and returns audio as an audio stream.

        Parameters:
            - voice_id: str. Voice ID to be used, you can use https://api.elevenlabs.io/v1/voices to list all the available voices.

            - optimize_streaming_latency: typing.Optional[int]. You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
                                                                0 - default mode (no latency optimizations)
                                                                1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
                                                                2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
                                                                3 - max latency optimizations
                                                                4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).

                                                                Defaults to 0.
            - output_format: typing.Optional[str]. Output format of the generated audio. Must be one of:
                                                   mp3_22050_32 - output format, mp3 with 22.05kHz sample rate at 32kbps.
                                                   mp3_44100_32 - output format, mp3 with 44.1kHz sample rate at 32kbps.
                                                   mp3_44100_64 - output format, mp3 with 44.1kHz sample rate at 64kbps.
                                                   mp3_44100_96 - output format, mp3 with 44.1kHz sample rate at 96kbps.
                                                   mp3_44100_128 - default output format, mp3 with 44.1kHz sample rate at 128kbps.
                                                   mp3_44100_192 - output format, mp3 with 44.1kHz sample rate at 192kbps. Requires you to be subscribed to Creator tier or above.
                                                   pcm_16000 - PCM format (S16LE) with 16kHz sample rate.
                                                   pcm_22050 - PCM format (S16LE) with 22.05kHz sample rate.
                                                   pcm_24000 - PCM format (S16LE) with 24kHz sample rate.
                                                   pcm_44100 - PCM format (S16LE) with 44.1kHz sample rate. Requires you to be subscribed to Independent Publisher tier or above.
                                                   ulaw_8000 - μ-law format (sometimes written mu-law, often approximated as u-law) with 8kHz sample rate. Note that this format is commonly used for Twilio audio inputs.
            - text: str. The text that will get converted into speech.

            - model_id: typing.Optional[str]. Identifier of the model that will be used, you can query them using GET /v1/models. The model needs to have support for text to speech, you can check this using the can_do_text_to_speech property.

            - voice_settings: typing.Optional[VoiceSettings]. Voice settings overriding stored setttings for the given voice. They are applied only on the given request.

            - pronunciation_dictionary_locators: typing.Optional[typing.Sequence[PronunciationDictionaryVersionLocator]]. A list of pronunciation dictionary locators (id, version_id) to be applied to the text. They will be applied in order. You may have up to 3 locators per request

            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
        ---
        from elevenlabs import PronunciationDictionaryVersionLocator, VoiceSettings
        from elevenlabs.client import AsyncElevenLabs

        client = AsyncElevenLabs(
            api_key="YOUR_API_KEY",
        )
        await client.text_to_speech.convert_as_stream(
            voice_id="string",
            optimize_streaming_latency=1,
            output_format="string",
            text="string",
            model_id="string",
            voice_settings=VoiceSettings(
                stability=1.1,
                similarity_boost=1.1,
                style=1.1,
                use_speaker_boost=True,
            ),
            pronunciation_dictionary_locators=[
                PronunciationDictionaryVersionLocator(
                    pronunciation_dictionary_id="string",
                    version_id="string",
                )
            ],
        )
        r!   r   r   r   r#   r$   r%   rJ   r&   Nr'   r(   r)   r*   r+   r   r,   r-   r3   r4   r5   rX   rD   r   r   r   rK     sz   L	
%*z)AsyncTextToSpeechClient.convert_as_stream)rL   rM   rN   r   r   r9   rO   rP   rQ   rR   r   rS   r
   r	   AsyncIteratorrU   rI   rK   r   r   r   r   rW      rV   rW   )rP   urllib.parser<   json.decoderr   core.api_errorr   core.client_wrapperr   r   core.jsonable_encoderr   core.remove_none_from_dictr   core.request_optionsr	   .types.pronunciation_dictionary_version_locatorr
   types.voice_settingsr   castAnyr9   r   rW   r   r   r   r   <module>   s     