Fix README, update config (Top-K=10, Alpha=0.3), fix requirements, and add data files
Browse files- .gitignore +5 -1
- app.py +1 -1
- config.py +3 -3
- data/embeddings.pt +3 -0
- data/parsed_chunks.json +3 -0
- requirements.txt +1 -1
.gitignore
CHANGED
|
@@ -1,4 +1,8 @@
|
|
| 1 |
.DS_Store
|
| 2 |
__pycache__
|
| 3 |
.env
|
| 4 |
-
.venv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
.DS_Store
|
| 2 |
__pycache__
|
| 3 |
.env
|
| 4 |
+
.venv
|
| 5 |
+
*debug*
|
| 6 |
+
*test*
|
| 7 |
+
*verify*
|
| 8 |
+
*example*
|
app.py
CHANGED
|
@@ -99,7 +99,7 @@ with gr.Blocks(title="Асистент із Законодавства") as demo
|
|
| 99 |
|
| 100 |
with gr.Accordion("🛠️ Розширені параметри", open=False):
|
| 101 |
use_reranker = gr.Checkbox(label="Використовувати Reranker", value=True)
|
| 102 |
-
top_k = gr.Slider(label="Кількість джерел", minimum=1, maximum=
|
| 103 |
temperature = gr.Slider(label="Температура генерації", minimum=0.0, maximum=1.0, step=0.1, value=0.5)
|
| 104 |
|
| 105 |
# --- Main Content Area ---
|
|
|
|
| 99 |
|
| 100 |
with gr.Accordion("🛠️ Розширені параметри", open=False):
|
| 101 |
use_reranker = gr.Checkbox(label="Використовувати Reranker", value=True)
|
| 102 |
+
top_k = gr.Slider(label="Кількість джерел", minimum=1, maximum=20, step=1, value=config.DEFAULT_TOP_K_RERANK)
|
| 103 |
temperature = gr.Slider(label="Температура генерації", minimum=0.0, maximum=1.0, step=0.1, value=0.5)
|
| 104 |
|
| 105 |
# --- Main Content Area ---
|
config.py
CHANGED
|
@@ -20,13 +20,13 @@ LLM_MODEL_NAME = "groq/llama-3.3-70b-versatile"
|
|
| 20 |
|
| 21 |
# Search Parameters
|
| 22 |
# Search Parameters
|
| 23 |
-
DEFAULT_TOP_K_RETRIEVAL =
|
| 24 |
-
DEFAULT_TOP_K_RERANK =
|
| 25 |
DEFAULT_TEMPERATURE = 0.5 # Balanced temperature for helpful but accurate answers
|
| 26 |
|
| 27 |
# Retriever Tuning
|
| 28 |
MIN_CHUNK_LENGTH = 50 # Filter out stub chunks like "Стаття 207."
|
| 29 |
-
HYBRID_ALPHA = 0.
|
| 30 |
MIN_BM25_SCORE = 0.05 # Lower threshold to let good semantic hits through
|
| 31 |
|
| 32 |
# System Prompts
|
|
|
|
| 20 |
|
| 21 |
# Search Parameters
|
| 22 |
# Search Parameters
|
| 23 |
+
DEFAULT_TOP_K_RETRIEVAL = 100 # Increased to improved recall
|
| 24 |
+
DEFAULT_TOP_K_RERANK = 10 # Reduce context window noise, show only best matches
|
| 25 |
DEFAULT_TEMPERATURE = 0.5 # Balanced temperature for helpful but accurate answers
|
| 26 |
|
| 27 |
# Retriever Tuning
|
| 28 |
MIN_CHUNK_LENGTH = 50 # Filter out stub chunks like "Стаття 207."
|
| 29 |
+
HYBRID_ALPHA = 0.3 # Semantic weight (higher = more semantic focus)
|
| 30 |
MIN_BM25_SCORE = 0.05 # Lower threshold to let good semantic hits through
|
| 31 |
|
| 32 |
# System Prompts
|
data/embeddings.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5733ed92c92f88d694cec4f5b6b6ce0515c3236410f17833614b13fb883db6c
|
| 3 |
+
size 32942251
|
data/parsed_chunks.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd5fdcc621fd1a352f3e172dd01d976b3076c9efdacd35da05875afaa0c6f6d6
|
| 3 |
+
size 27563582
|
requirements.txt
CHANGED
|
@@ -4,7 +4,7 @@ python-dotenv
|
|
| 4 |
sentence-transformers
|
| 5 |
rank_bm25
|
| 6 |
torch
|
| 7 |
-
numpy
|
| 8 |
pymorphy3
|
| 9 |
pymorphy3-dicts-uk
|
| 10 |
beautifulsoup4
|
|
|
|
| 4 |
sentence-transformers
|
| 5 |
rank_bm25
|
| 6 |
torch
|
| 7 |
+
numpy<2
|
| 8 |
pymorphy3
|
| 9 |
pymorphy3-dicts-uk
|
| 10 |
beautifulsoup4
|