import sys from pathlib import Path from typing import Tuple, List, Dict # Add project root to sys.path sys.path.append(str(Path(__file__).parent.parent)) import config from modules.retriever import Retriever from modules.reranker import Reranker from modules.llm_handler import LLMHandler class RAGSystem: def __init__(self): print("Initializing RAG System Components...") self.retriever = Retriever() # Delay heavy model loading until needed or load now? Load now for responsiveness. self.reranker = Reranker() self.llm = LLMHandler() print("RAG System Ready.") def process_query(self, query: str, api_key: str, message_history: List = None, use_reranker: bool = True, top_k_retrieval: int = config.DEFAULT_TOP_K_RETRIEVAL, top_k_rerank: int = config.DEFAULT_TOP_K_RERANK, temperature: float = config.DEFAULT_TEMPERATURE, search_method: str = 'hybrid', legal_area: str = None ) -> Tuple[str, List[Dict]]: """ Main RAG pipeline: 1. Retrieve documents (Hybrid/BM25/Semantic Search) 2. Rerank documents (Cross Encoder) 3. Generate answer (LLM) """ # Ensure top_k values are integers (handle Gradio sliders passing floats) top_k_retrieval = int(top_k_retrieval) top_k_rerank = int(top_k_rerank) if not api_key: return "Будь ласка, введіть API ключ (Groq) для продовження.", [] # 1. Retrieval print(f"Retrieving for: {query} (method: {search_method}, legal_area: {legal_area})") retrieved_chunks = self.retriever.search(query, top_k=top_k_retrieval, method=search_method, legal_area=legal_area) # 2. Reranking if use_reranker and retrieved_chunks: print("Reranking...") final_chunks = self.reranker.rank(query, retrieved_chunks, top_k=top_k_rerank) else: final_chunks = retrieved_chunks[:top_k_rerank] # 3. Generation print("Generating answer...") answer = self.llm.generate_answer(query, final_chunks, api_key, temperature) return answer, final_chunks # Singleton instance to reuse models across API calls rag_system = RAGSystem()