o
    (i                  
   @   s8  d dl Z d dlmZ d dlZd dlZd dlmZmZmZm	Z	 d dl
mZ d dlmZmZmZ d dlmZ d dlmZ d dlmZ eejZed	d	ed
Ze ZeejdZG dd deZejjed de de de fddZ!d$de de"de"dee  fddZ#de de de de fddZ$d%de d e d!e"de fd"d#Z%dS )&    N)List)create_engineColumn
ForeignKeyText)UUID)declarative_basesessionmakerSession)Vector)OpenAI)settingsF)
autocommit	autoflushbind)api_keyc                   @   sZ   e Zd ZdZeedddejdZeeddZ	eeddZ
eeddZeedZdS )	DocumentChunkdocument_chunksT)as_uuid)primary_keydefaultF)nullablei   N)__name__
__module____qualname____tablename__r   pg_UUIDuuiduuid4id
project_iddocument_idr   contentr   	embedding r$   r$   -/var/www/html/voicebot/backend/rag_manager.pyr      s    r   )r   	file_path	file_typereturnc           	      C   s   d}|dkr5t | d }t|}|jD ]}| }|r"||d 7 }qW d    |S 1 s.w   Y  |S |dkrMt| }|jD ]	}||jd 7 }qA|S |dkrmt | ddd	}|	 }W d    |S 1 shw   Y  |S )
N pdfrb
docxtxtrzutf-8)encoding)
openPyPDF2	PdfReaderpagesextract_textr-   Document
paragraphstextread)	r&   r'   r8   freaderpage	extracteddocparar$   r$   r%   r5      s4   






r5        r8   
chunk_sizeoverlapc                 C   sJ   g }d}t | }||k r#|| }|| ||  ||| 7 }||k s|S )Nr   )lenappend)r8   rB   rC   chunksstarttext_lengthendr$   r$   r%   
chunk_text/   s   rJ   r    r!   c              
   C   s   t ||}t|}t }zRz&|D ]}tjj|dd}|jd j}	t| |||	d}
|	|
 q|
  W n tyP } z|  td|  W Y d }~nd }~ww W |  d S W |  d S |  w )Ntext-embedding-3-smallinputmodelr   )r    r!   r"   r#   zError processing document: )r5   rJ   SessionLocalclient
embeddingscreatedatar#   r   addcommit	Exceptionrollbackprintclose)r    r!   r&   r'   r8   rF   dbchunkresponseembedding_vector	doc_chunker$   r$   r%   process_and_store_document9   s6   
r`      querylimitc                 C   s~   t jj|dd}|jd j}t }z(|ttj	| k
tj|| }ddd |D }|W |  S |  w )NrK   rL   r   z

c                 S   s   g | ]}|j qS r$   )r"   ).0r/   r$   r$   r%   
<listcomp>c   s    z)search_knowledge_base.<locals>.<listcomp>)rP   rQ   rR   rS   r#   rO   rb   r   filterr    order_bycosine_distancerc   alljoinrY   )r    rb   rc   r\   query_embeddingrZ   resultscontextr$   r$   r%   search_knowledge_baseT   s$   


rn   )r@   rA   )ra   )&r   typingr   r2   r-   
sqlalchemyr   r   r   r   sqlalchemy.dialects.postgresqlr   r   sqlalchemy.ormr   r	   r
   pgvector.sqlalchemyr   openair   configr   DATABASE_URLenginerO   BaseOPENAI_API_KEYrP   r   metadata
create_allstrr5   intrJ   r`   rn   r$   r$   r$   r%   <module>   s(    
 
 