dmytrotm commited on
Commit
ffb4f8d
·
1 Parent(s): a046a96

Fix README, update config (Top-K=10, Alpha=0.3), fix requirements, and add data files

Browse files
Files changed (6) hide show
  1. .gitignore +5 -1
  2. app.py +1 -1
  3. config.py +3 -3
  4. data/embeddings.pt +3 -0
  5. data/parsed_chunks.json +3 -0
  6. requirements.txt +1 -1
.gitignore CHANGED
@@ -1,4 +1,8 @@
1
  .DS_Store
2
  __pycache__
3
  .env
4
- .venv
 
 
 
 
 
1
  .DS_Store
2
  __pycache__
3
  .env
4
+ .venv
5
+ *debug*
6
+ *test*
7
+ *verify*
8
+ *example*
app.py CHANGED
@@ -99,7 +99,7 @@ with gr.Blocks(title="Асистент із Законодавства") as demo
99
 
100
  with gr.Accordion("🛠️ Розширені параметри", open=False):
101
  use_reranker = gr.Checkbox(label="Використовувати Reranker", value=True)
102
- top_k = gr.Slider(label="Кількість джерел", minimum=1, maximum=10, step=1, value=5)
103
  temperature = gr.Slider(label="Температура генерації", minimum=0.0, maximum=1.0, step=0.1, value=0.5)
104
 
105
  # --- Main Content Area ---
 
99
 
100
  with gr.Accordion("🛠️ Розширені параметри", open=False):
101
  use_reranker = gr.Checkbox(label="Використовувати Reranker", value=True)
102
+ top_k = gr.Slider(label="Кількість джерел", minimum=1, maximum=20, step=1, value=config.DEFAULT_TOP_K_RERANK)
103
  temperature = gr.Slider(label="Температура генерації", minimum=0.0, maximum=1.0, step=0.1, value=0.5)
104
 
105
  # --- Main Content Area ---
config.py CHANGED
@@ -20,13 +20,13 @@ LLM_MODEL_NAME = "groq/llama-3.3-70b-versatile"
20
 
21
  # Search Parameters
22
  # Search Parameters
23
- DEFAULT_TOP_K_RETRIEVAL = 60 # Increased to improved recall
24
- DEFAULT_TOP_K_RERANK = 5 # Reduce context window noise, show only best matches
25
  DEFAULT_TEMPERATURE = 0.5 # Balanced temperature for helpful but accurate answers
26
 
27
  # Retriever Tuning
28
  MIN_CHUNK_LENGTH = 50 # Filter out stub chunks like "Стаття 207."
29
- HYBRID_ALPHA = 0.7 # Semantic weight (higher = more semantic focus)
30
  MIN_BM25_SCORE = 0.05 # Lower threshold to let good semantic hits through
31
 
32
  # System Prompts
 
20
 
21
  # Search Parameters
22
  # Search Parameters
23
+ DEFAULT_TOP_K_RETRIEVAL = 100 # Increased to improved recall
24
+ DEFAULT_TOP_K_RERANK = 10 # Reduce context window noise, show only best matches
25
  DEFAULT_TEMPERATURE = 0.5 # Balanced temperature for helpful but accurate answers
26
 
27
  # Retriever Tuning
28
  MIN_CHUNK_LENGTH = 50 # Filter out stub chunks like "Стаття 207."
29
+ HYBRID_ALPHA = 0.3 # Semantic weight (higher = more semantic focus)
30
  MIN_BM25_SCORE = 0.05 # Lower threshold to let good semantic hits through
31
 
32
  # System Prompts
data/embeddings.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5733ed92c92f88d694cec4f5b6b6ce0515c3236410f17833614b13fb883db6c
3
+ size 32942251
data/parsed_chunks.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd5fdcc621fd1a352f3e172dd01d976b3076c9efdacd35da05875afaa0c6f6d6
3
+ size 27563582
requirements.txt CHANGED
@@ -4,7 +4,7 @@ python-dotenv
4
  sentence-transformers
5
  rank_bm25
6
  torch
7
- numpy
8
  pymorphy3
9
  pymorphy3-dicts-uk
10
  beautifulsoup4
 
4
  sentence-transformers
5
  rank_bm25
6
  torch
7
+ numpy<2
8
  pymorphy3
9
  pymorphy3-dicts-uk
10
  beautifulsoup4