o
    "iQ                  	   @   s   d dl Z d dlZd dlZd dlZd dlZd dl mZmZmZmZmZ ddl	m
Z
mZ ddlmZmZ ddlmZmZmZmZ ddlmZ ddlmZ ed	d
eddddddZeZeZeZdedefddZe e j dZ!G dd de
Z"G dd deZ#dS )    N)IteratorOptionalUnionr   AsyncIterator   )BaseElevenLabsAsyncBaseElevenLabs)RequestOptionsApiError)VoiceVoiceSettings%PronunciationDictionaryVersionLocatorModel)ElevenLabsEnvironment)RealtimeTextToSpeechClientEXAVITQu4vr4xnSDxMaLRachelgQ?g      ?g        T)	stabilitysimilarity_booststyleuse_speaker_boost)voice_idnamesettingsvalreturnc                 C   s   t td| S )Nz^[a-zA-Z0-9]{20}$)boolrematch)r    r   U/var/www/html/voicebot/backend/venv/lib/python3.10/site-packages/elevenlabs/client.pyis_voice_id"   s   r!   .c                       s4  e Zd ZdZdejedddddej	e
 dedej	e
 d	ej	e d
ej	ej f
 fddZ		d$de
deje
 de
dej	e
 dej	e defddZeejddddedddee
ee
 f deeeef dej	e deeef dej	e ded e	e
 d!ej	eje   dej	e dee!ee! f fd"d#Z"  Z#S )%
ElevenLabsa  
    Use this class to access the different functions within the SDK. You can instantiate any number of clients with different configuration that will propogate to these functions.

    Parameters:
        - base_url: typing.Optional[str]. The base url to use for requests from the client.

        - environment: ElevenLabsEnvironment. The environment to use for requests from the client. from .environment import ElevenLabsEnvironment

                                              Defaults to ElevenLabsEnvironment.PRODUCTION

        - api_key: typing.Optional[str].

        - timeout: typing.Optional[float]. The timeout to be used, in seconds, for requests by default the timeout is 60 seconds.

        - httpx_client: typing.Optional[httpx.Client]. The httpx client to use for making requests, a preconfigured client is used by default, however this is useful should you want to pass in any custom httpx configuration.
    ---
    from elevenlabs.client import ElevenLabs

    client = ElevenLabs(
        api_key="YOUR_API_KEY",
    )
    NELEVEN_API_KEY<   base_urlenvironmentapi_keytimeouthttpx_clientr&   r'   r(   r)   r*   c                   s(   t  j|||||d t| jd| _d S )Nr%   )client_wrapper)super__init__r   _client_wrappertext_to_speech)selfr&   r'   r(   r)   r*   	__class__r   r    r-   A   s   	zElevenLabs.__init__r   filesdescriptionlabelsrequest_optionsr   c                 C   s>   | j j||dd |D tt|pi d}| j j|j|dS )a  
          This is a manually maintained helper function that clones a voice from a set of audio files.
          **NOTE**: This function is a helper function and is simply making 
          calls to the `add` and `get` functions of the `voices` endpoint.

          Parameters:
              - name: str. The name that identifies this voice. This will be displayed in the dropdown of the website.

              - files: typing.List[str]. The filepaths of the audio files to be used to create the voice.

              - description: str. How would you describe the voice?

              - labels: str. Serialized labels dictionary for the voice.

              - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
        c                 S      g | ]}t |d qS rbopen.0filer   r   r    
<listcomp>n       z$ElevenLabs.clone.<locals>.<listcomp>r   r4   r3   r5   r6   voicesaddstrjsondumpsgetr   r0   r   r3   r4   r5   r6   add_voice_responser   r   r    cloneS   s   zElevenLabs.cloneeleven_monolingual_v1r   Fmp3_44100_128voicevoice_settingsmodeloptimize_streaming_latencystreamoutput_format!pronunciation_dictionary_locatorsr6   textrP   rQ   rR   rS   rT   rU   rV   c       	      
      sJ  t  trt r }
nGt  tr:| jj|	d}td| t fdd|jD d}|du r7td  dd|}
nt  trP j	}
|t
jkrO jdurO j}nt
j	}
t |tr[|}nt |trc|j}|rt |trx| jj|
|||||	||d	S t |tr| jj|
|||	|d
S tddt |tstdd| jj|
||||||	|dS )uP  
            - text: Union[str, Iterator[str]]. The string or stream of strings that will get converted into speech.

            - voice: str. A voice id, name, or voice response. Defaults to the Rachel voice. 

            - model: typing.Optional[str]. Identifier of the model that will be used, you can query them using GET /v1/models. 
                                           The model needs to have support for text to speech, you can check this using the 
                                           can_do_text_to_speech property.                                                                

            - optimize_streaming_latency: typing.Optional[int]. You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
                                                                0 - default mode (no latency optimizations)
                                                                1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
                                                                2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
                                                                3 - max latency optimizations
                                                                4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).

                                                                Defaults to 0.
            
            - stream: bool. If true, the function will return a generator that will yield the audio in chunks.    

                            Defaults to False.                                                                

            - output_format: typing.Optional[str]. Output format of the generated audio. Must be one of:
                                                   mp3_22050_32 - output format, mp3 with 22.05kHz sample rate at 32kbps.
                                                   mp3_44100_32 - output format, mp3 with 44.1kHz sample rate at 32kbps.
                                                   mp3_44100_64 - output format, mp3 with 44.1kHz sample rate at 64kbps.
                                                   mp3_44100_96 - output format, mp3 with 44.1kHz sample rate at 96kbps.
                                                   mp3_44100_128 - default output format, mp3 with 44.1kHz sample rate at 128kbps.
                                                   mp3_44100_192 - output format, mp3 with 44.1kHz sample rate at 192kbps. Requires you to be subscribed to Creator tier or above.
                                                   pcm_16000 - PCM format (S16LE) with 16kHz sample rate.
                                                   pcm_22050 - PCM format (S16LE) with 22.05kHz sample rate.
                                                   pcm_24000 - PCM format (S16LE) with 24kHz sample rate.
                                                   pcm_44100 - PCM format (S16LE) with 44.1kHz sample rate. Requires you to be subscribed to Independent Publisher tier or above.
                                                   ulaw_8000 - μ-law format (sometimes written mu-law, often approximated as u-law) with 8kHz sample rate. Note that this format is commonly used for Twilio audio inputs.

                                                    Defaults to mp3_44100_128.
            
            - voice_settings: typing.Optional[VoiceSettings]. Voice settings overriding stored setttings for the given voice. They are applied only on the given request.

            - pronunciation_dictionary_locators: typing.Optional[typing.Sequence[PronunciationDictionaryVersionLocator]]. A list of pronunciation dictionary locators (id, version_id) to be applied to the text. They will be applied in order. You may have up to 3 locators per request                                                    

            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
        rB   voices_responsec                 3        | ]}|j  kr|jV  qd S Nr   r   r=   vrP   r   r    	<genexpr>       z&ElevenLabs.generate.<locals>.<genexpr>NVoice  not found.body)r   rQ   rS   rU   rW   r6   rV   model_id)r   rQ   rW   r6   re   z)Text is neither a string nor an iterator.+Text must be a string when stream is False.r   re   rQ   rS   rU   rW   r6   rV   )
isinstancerF   r!   rD   get_allprintnextr
   r   r   DEFAULT_VOICEr   r   re   r/   convert_as_streamr   convert_realtimeconvertr0   rW   rP   rQ   rR   rS   rT   rU   rV   r6   r   rX   maybe_voice_idre   r   r^   r    generatev   sj   :












zElevenLabs.generate)NN)$__name__
__module____qualname____doc__r   
PRODUCTIONosgetenvtypingr   rF   floathttpxClientr-   Listr	   r   rL   rl   r   OMITr   r   VoiceId	VoiceNamer   ModelIdr   intr   Sequencer   bytesrr   __classcell__r   r   r1   r    r"   *   s    

'
	
r"   c                   @   s   e Zd ZdZ	ddedeje dededeje de	fd	d
Z
eejddddedddedeeee	f deje deeef deje dedee dejeje  deje deeee f fddZdS )AsyncElevenLabsa(  
    Use this class to access the different functions within the SDK. You can instantiate any number of clients with different configuration that will propogate to these functions.

    Parameters:
        - base_url: typing.Optional[str]. The base url to use for requests from the client.

        - environment: ElevenLabsEnvironment. The environment to use for requests from the client. from .environment import ElevenLabsEnvironment

                                              Defaults to ElevenLabsEnvironment.PRODUCTION

        - api_key: typing.Optional[str].

        - timeout: typing.Optional[float]. The timeout to be used, in seconds, for requests by default the timeout is 60 seconds.

        - httpx_client: typing.Optional[httpx.AsyncClient]. The httpx client to use for making requests, a preconfigured client is used by default, however this is useful should you want to pass in any custom httpx configuration.
    ---
    from elevenlabs.client import AsyncElevenLabs

    client = AsyncElevenLabs(
        api_key="YOUR_API_KEY",
    )
    Nr   r3   r4   r5   r6   r   c                    sL   | j j||dd |D tt|pi dI dH }| j j|j|dI dH S )a  
          This is a manually mnaintained helper function that generates a 
          voice from provided text.

          **NOTE**: This function is a helper function and is simply making 
          calls to the `text_to_speech.convert` and`text_to_speech.convert_as_stream`
          functions.

          Parameters:
              - name: str. The name that identifies this voice. This will be displayed in the dropdown of the website.

              - files: typing.List[str]. The filepaths of the audio files to be used to create the voice.

              - description: str. How would you describe the voice?

              - labels: str. Serialized labels dictionary for the voice.

              - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
        c                 S   r7   r8   r:   r<   r   r   r    r?   "  r@   z)AsyncElevenLabs.clone.<locals>.<listcomp>rA   NrB   rC   rJ   r   r   r    rL     s   zAsyncElevenLabs.clonerM   r   FrN   rO   rW   rP   rQ   rR   rS   rT   rU   rV   c       	      
      s  t  trt r }
nCt  tr7| jj|	dI dH }t fdd|jD d}|s4td  dd|}
nt  trM j}
|t	j
krL j
durL j
}nt	j}
t |trX|}nt |tr`|j}|rp| jj|
||||||	|dS t |tsztd	d| jj|
||||||	|dS )
u{  
          This is a manually mnaintained helper function that generates a 
          voice from provided text.

          **NOTE**: This function is a helper function and is simply making 
          calls to the `text_to_speech.convert` and`text_to_speech.convert_as_stream`
          functions.

            - text: str. The string that will get converted into speech. The Async client does not support streaming.

            - voice: str. A voice id, name, or voice response. Defaults to the Rachel voice. 

            - model: typing.Optional[str]. Identifier of the model that will be used, you can query them using GET /v1/models. 
                                           The model needs to have support for text to speech, you can check this using the 
                                           can_do_text_to_speech property.                                                                

            - optimize_streaming_latency: typing.Optional[int]. You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
                                                                0 - default mode (no latency optimizations)
                                                                1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
                                                                2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
                                                                3 - max latency optimizations
                                                                4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).

                                                                Defaults to 0.
            
            - stream: bool. If true, the function will return a generator that will yield the audio in chunks.    

                            Defaults to False.                                                                

            - output_format: typing.Optional[str]. Output format of the generated audio. Must be one of:
                                                   mp3_22050_32 - output format, mp3 with 22.05kHz sample rate at 32kbps.
                                                   mp3_44100_32 - output format, mp3 with 44.1kHz sample rate at 32kbps.
                                                   mp3_44100_64 - output format, mp3 with 44.1kHz sample rate at 64kbps.
                                                   mp3_44100_96 - output format, mp3 with 44.1kHz sample rate at 96kbps.
                                                   mp3_44100_128 - default output format, mp3 with 44.1kHz sample rate at 128kbps.
                                                   mp3_44100_192 - output format, mp3 with 44.1kHz sample rate at 192kbps. Requires you to be subscribed to Creator tier or above.
                                                   pcm_16000 - PCM format (S16LE) with 16kHz sample rate.
                                                   pcm_22050 - PCM format (S16LE) with 22.05kHz sample rate.
                                                   pcm_24000 - PCM format (S16LE) with 24kHz sample rate.
                                                   pcm_44100 - PCM format (S16LE) with 44.1kHz sample rate. Requires you to be subscribed to Independent Publisher tier or above.
                                                   ulaw_8000 - μ-law format (sometimes written mu-law, often approximated as u-law) with 8kHz sample rate. Note that this format is commonly used for Twilio audio inputs.

                                                    Defaults to mp3_44100_128.
            
            - voice_settings: typing.Optional[VoiceSettings]. Voice settings overriding stored setttings for the given voice. They are applied only on the given request.

            - pronunciation_dictionary_locators: typing.Optional[typing.Sequence[PronunciationDictionaryVersionLocator]]. A list of pronunciation dictionary locators (id, version_id) to be applied to the text. They will be applied in order. You may have up to 3 locators per request                                                    

            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
        rB   Nc                 3   rY   rZ   r[   r\   r^   r   r    r_   o  r`   z+AsyncElevenLabs.generate.<locals>.<genexpr>ra   rb   rc   rg   rf   )rh   rF   r!   rD   ri   rk   r
   r   r   rl   r   r   re   r/   rm   ro   rp   r   r^   r    rr   *  sV   A







zAsyncElevenLabs.generaterZ   )rs   rt   ru   rv   rF   rz   r~   r   r	   r   rL   rl   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rr   r   r   r   r    r      s^    
*
	
r   )$rz   rG   r   rx   r|   r   r   r   r   base_clientr   r   corer	   r
   typesr   r   r   r   r'   r   realtime_ttsr   rl   rF   r   r   r   r   r!   castAnyr   r"   r   r   r   r   r    <module>   s4     C