André Oliveira
refactored app
c2fcdce
raw
history blame
21.8 kB
import gradio as gr
import requests
import json
import os
import threading
from models import OptimizeRequest, AutotuneRequest, QARequest
from api import start_api
# Start FastAPI server in background
threading.Thread(target=start_api, daemon=True).start()
# Base URL for internal calls
BASE_INTERNAL = "http://127.0.0.1:8000"
def call_api(endpoint: str, payload: dict) -> str:
try:
r = requests.post(f"{BASE_INTERNAL}{endpoint}", json=payload, timeout=120)
return json.dumps(r.json(), indent=2)
except Exception as e:
return str(e)
def clear_cache_tool(docs_path="data/docs"):
"""
🧹 Clear Cache MCP Tool
Deletes all files and directories inside docs_path on the server.
Accepts:
- local paths (str), default='data/docs/'
"""
try:
r = requests.post(
f"{BASE_INTERNAL}/clear_cache",
data={"docs_path": docs_path},
timeout=60
)
r.raise_for_status()
return r.json()
except Exception as e:
return {"error": str(e)}
def upload_docs_tool(files, docs_path="data/docs"):
"""
Upload documents to the server's docs folder via FastAPI /upload_docs.
Accepts:
- local file paths (str)
- URLs (str)
- file-like objects
"""
import shutil, tempfile
os.makedirs(docs_path, exist_ok=True)
files_payload = []
temp_files = []
try:
for f in files:
if isinstance(f, str) and f.startswith(("http://", "https://")):
# Download URL to a temp file (txt aware)
resp = requests.get(f, timeout=60)
resp.raise_for_status()
# create temp file with proper extension
ext = os.path.splitext(f)[1] or ".txt"
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext)
if "text" in resp.headers.get("Content-Type", "").lower():
tmp.write(resp.text.encode("utf-8"))
else:
tmp.write(resp.content)
tmp.close()
temp_files.append(tmp.name)
files_payload.append(("files", open(tmp.name, "rb")))
elif isinstance(f, str):
# Local file path
files_payload.append(("files", open(f, "rb")))
else:
# File-like object
files_payload.append(("files", f))
resp = requests.post(
f"{BASE_INTERNAL}/upload_docs",
files=files_payload,
data={"docs_path": docs_path}
)
resp.raise_for_status()
return resp.json()
finally:
# Close all file handles
for _, file_obj in files_payload:
if not file_obj.closed:
file_obj.close()
# Clean up temp files
for tmp_file in temp_files:
try:
os.unlink(tmp_file)
except Exception:
pass
def optimize_rag_tool_(payload: str) -> str:
"""🔧 Explicit optimization request: user provides all pipeline configs manually."""
return call_api("/optimize_rag", json.loads(payload))
def autotune_tool_(payload: str) -> str:
"""🔧 Autotune RAG: recommends chunk sizes and embedding models automatically."""
return call_api("/autotune_rag", json.loads(payload))
def generate_qa_tool_(payload: str) -> str:
"""🧩 Generates a validation QA dataset for RAG evaluation."""
return call_api("/generate_validation_qa", json.loads(payload))
# Assign Pydantic docstrings
optimize_rag_tool_.__doc__ = OptimizeRequest.__doc__
autotune_tool_.__doc__ = AutotuneRequest.__doc__
generate_qa_tool_.__doc__ = QARequest.__doc__
def model_to_json(model_cls) -> str:
return json.dumps({k: v.default for k, v in model_cls.model_fields.items()}, indent=2)
# Default inputs
DEFAULT_UPLOAD_PATH = "data/docs"
DEFAULT_OPTIMIZE_JSON = model_to_json(OptimizeRequest)
DEFAULT_AUTOTUNE_JSON = model_to_json(AutotuneRequest)
DEFAULT_QA_JSON = model_to_json(QARequest)
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🤖 Ragmint MCP Server")
gr.HTML("""
<div style="display:flex; gap:5px; flex-wrap:wrap; align-items:center;">
<a href="https://huggingface.co/spaces/andyolivers/ragmint-mcp-server">
<img src="https://img.shields.io/badge/HF-Space-blue" alt="HF Space">
</a>
<img src="https://img.shields.io/badge/Python-3.9%2B-blue?logo=python" alt="Python">
<a href="https://pypi.org/project/ragmint/">
<img src="https://img.shields.io/pypi/v/ragmint?color=blue" alt="HF Space">
</a>
<img src="https://img.shields.io/badge/License-Apache%202.0-green" alt="License">
<img src="https://img.shields.io/badge/MCP-Enabled-green" alt="MCP">
<img src="https://img.shields.io/badge/Status-Beta-orange" alt="Status">
<img src="https://img.shields.io/badge/Optuna-Bayesian%20Optimization-6f42c1?logo=optuna&logoColor=white" alt="Optuna">
<img src="https://img.shields.io/badge/Google%20Gemini-LLM-lightblue?logo=google&logoColor=white" alt="Google Gemini 2.5">
</div>
""")
gr.Markdown("""
**AI-Powered Optimization for RAG Pipelines**
This server provides **6 MCP Tools** for RAG pipeline tuning, dataset generation & workspace control — all programmatically accessible through MCP clients like **Claude Desktop, Cursor, VS Code MCP Extension**, and more.
<br>
## 🔧 MCP Tools (AI-Driven & Automated)
- 📄 **Upload Docs**: Upload .txt files to workspace for evaluation
- 🔗 **Upload URLs**: Import remote .txt docs via URLs
- 🧠 **Optimize RAG**: Full hyperparameter search (Grid / Random / Bayesian) with metrics
- ⚙️ **Autotune RAG**: Automated recommendations for best chunking + embeddings
- ❓ **Generate QA Dataset**: Create validation QA pairs with LLMs for benchmarking
- 🧹 **Clear Cache**: Reset workspace and delete stored docs
<br>
## 🧠 What Ragmint Solves
- Automated RAG hyperparameter optimization
- Retriever, embedding, reranker selection
- Synthetic validation QA generation
- Evaluation metrics (faithfulness, latency, etc.)
- Experiment tracking & reproducible pipeline comparison
🔬 **Built for RAG engineers, researchers, and LLM developers** who want consistent performance improvement without trial-and-error.
<br>
## 🧠 Powered by
- **Optuna** (Bayesian Optimization)
- **Google Gemini 2.5 Flash Lite / Pro**
- **FAISS, Chroma, BM25, scikit-learn retrievers**
- **Sentence-Transformers / BGE embeddings**
<br>
## 🌐 MCP Connection
**HuggingFace Space**
https://huggingface.co/spaces/andyolivers/ragmint-mcp-server
**MCP Endpoint (SSE — Recommended)**
https://andyolivers-ragmint-mcp-server.hf.space/gradio_api/mcp/sse
<br>
## 📦 Example MCP Use Cases
- 🧠 Run Auto-Optimization for RAG pipelines
- 📊 Compare embedding + retriever combinations
- ❓ Automatically generate QA validation datasets
- 🔁 Rapid experiment iteration inside Claude / Cursor
<br>
## 🧩 MCP Tools Overview
| MCP Tool | Core Function |
|----------|---------------|
| upload_docs | Upload .txt documents |
| upload_urls | Import documents from external URLs |
| optimize_rag | Hyperparameter search with metrics |
| autotune | Automated RAG configuration suggestions |
| generate_qa | Synthetic QA generation |
| clear_cache | Clean workspace |
---
""")
with gr.Tab("📂 Upload"):
with gr.Row():
# Upload Documents
with gr.Column(scale=1):
gr.Markdown("## Upload Documents")
gr.Markdown("📂 Upload files (local paths or URLs) to your `data/docs` folder")
upload_files = gr.File(file_count="multiple", type="filepath")
upload_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
upload_btn = gr.Button("Upload", variant="primary")
upload_out = gr.JSON(label="Response")
upload_btn.click(upload_docs_tool, inputs=[upload_files, upload_path], outputs=upload_out)
# Upload MCP Documents (no file uploader)
with gr.Column(scale=1):
gr.Markdown("## Upload Documents from URLs")
gr.Markdown("📂 Upload files (URLs) to your `data/docs` folder on MCP.")
'''upload_mcp_input = gr.Textbox(
lines=5,
placeholder='Enter list of URLs (e.g., ["https://example.com/example.txt",...])',
label="Files (JSON list)"
)'''
upload_mcp_input = gr.TextArea(
placeholder="Paste URLs (one per line without commas)",
label="URLs"
)
def upload_urls_tool(text, path):
"""
Upload documents to the server's docs folder via FastAPI /upload_docs.
Accepts:
- local file paths (str)
- URLs (str)
- file-like objects
"""
urls = [u.strip() for u in text.split("\n") if u.strip()]
return upload_docs_tool(urls, path)
upload_mcp_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
upload_mcp_btn = gr.Button("Upload", variant="primary")
upload_mcp_out = gr.JSON(label="Response")
# MCP callable function
'''def upload_urls_tool(files_json, docs_path):
"""
Upload documents to the server's docs folder via MCP.
Accepts:
- URLs (str)
"""
import ast
try:
files = ast.literal_eval(files_json)
except Exception:
return {"error": "Invalid JSON list of files"}
return upload_docs_tool(files, docs_path)'''
upload_mcp_btn.click(
upload_urls_tool,
inputs=[upload_mcp_input, upload_mcp_path],
outputs=upload_mcp_out
)
gr.Markdown("---")
with gr.Tab("⚡ Autotune"):
# Autotune RAG
with gr.Column():
gr.Markdown("## Autotune RAG")
gr.Markdown(" ⚡ Automatically tunes RAG pipeline parameters based on document analysis.")
'''gr.Markdown(AutotuneRequest.__doc__ or "No description available.")
autotune_input = gr.Textbox(lines=12, value=DEFAULT_AUTOTUNE_JSON, label="AutotuneRequest JSON")
autotune_btn = gr.Button("Autotune", variant="primary")
autotune_out = gr.Textbox(lines=15, label="Response")
autotune_btn.click(autotune_tool, inputs=autotune_input, outputs=autotune_out)'''
with gr.Accordion("⚙ Settings", open=False):
docs_path = gr.Textbox(value="data/docs", label="Docs Path")
embedding_model = gr.Textbox(
value="sentence-transformers/all-MiniLM-L6-v2",
label="Embedding Model"
)
num_chunk_pairs = gr.Slider(
minimum=1, maximum=20, step=1, value=5, label="Number of chunk pairs"
)
metric = gr.Dropdown(
choices=["faithfulness"],
value="faithfulness",
label="Metric"
)
search_type = gr.Dropdown(
choices=["grid", "random", "bayesian"],
value="grid",
label="Search Type"
)
trials = gr.Slider(
minimum=1, maximum=100, step=1, value=5, label="Optimization Trials"
)
validation_choice = gr.Dropdown(
choices=["generate", ""],
value="generate",
label="Validation Choice"
)
llm_model = gr.Textbox(
value="gemini-2.5-flash-lite",
label="LLM Model"
)
autotune_btn = gr.Button("Autotune", variant="primary")
autotune_out = gr.Textbox(label="Response", lines=15)
def autotune_tool(*args):
(
docs_path, embedding_model, num_chunk_pairs, metric,
search_type, trials, validation_choice, llm_model
) = args
payload = {
"docs_path": docs_path,
"embedding_model": embedding_model,
"num_chunk_pairs": num_chunk_pairs,
"metric": metric,
"search_type": search_type,
"trials": trials,
"validation_choice": validation_choice,
"llm_model": llm_model
}
return autotune_tool_(json.dumps(payload))
autotune_tool.__doc__ = AutotuneRequest.__doc__
autotune_btn.click(
autotune_tool,
inputs=[
docs_path, embedding_model, num_chunk_pairs, metric,
search_type, trials, validation_choice, llm_model
],
outputs=autotune_out
)
with gr.Accordion("Parameter Information", open=False):
gr.Markdown(AutotuneRequest.__doc__ or "No description available.")
gr.Markdown("---")
with gr.Tab("🔧 Optimize"):
# Optimize RAG
with gr.Column():
gr.Markdown("## Optimize RAG")
gr.Markdown("🔧 Explicit optimization request for RAG (Retrieval-Augmented Generation) pipelines.")
'''gr.Markdown(OptimizeRequest.__doc__ or "No description available.")
optimize_input = gr.Textbox(lines=12, value=DEFAULT_OPTIMIZE_JSON, label="OptimizeRequest JSON")
optimize_btn = gr.Button("Optimize", variant="primary")
optimize_out = gr.Textbox(lines=15, label="Response")
optimize_btn.click(optimize_rag_tool, inputs=optimize_input, outputs=optimize_out)'''
# Parameters accordion
with gr.Accordion("⚙ Settings", open=False):
docs_path = gr.Textbox(value="data/docs", label="Docs Path")
retriever = gr.CheckboxGroup(
choices=["faiss", "chroma", "numpy", "sklearn","bm25"],
value="faiss",
label="Search Type"
)
embedding_model = gr.Textbox(
value="sentence-transformers/all-MiniLM-L6-v2",
label="Embedding Model(s) (comma-separated)"
)
strategy = gr.CheckboxGroup(
choices=["fixed","token","sentence"],
value="fixed",
label="RAG Strategy"
)
chunk_sizes = gr.Textbox(
value="200,400,600",
label="Chunk Sizes (comma-separated integers)"
)
overlaps = gr.Textbox(
value="50,100,200",
label="Overlaps (comma-separated integers)"
)
rerankers = gr.Dropdown(
choices=["mmr"],
value="mmr",
label="Rerankers"
)
search_type = gr.Dropdown(
choices=["grid", "random", "bayesian"],
value="grid",
label="Search Type"
)
trials = gr.Slider(
minimum=1, maximum=100, step=1, value=5,
label="Number of Trials"
)
metric = gr.Dropdown(
choices=["faithfulness"],
value="faithfulness",
label="Metric"
)
validation_choice = gr.Dropdown(
choices=["generate", ""],
value="generate",
label="Validation Choice"
)
llm_model = gr.Textbox(
value="gemini-2.5-flash-lite",
label="LLM Model"
)
optimize_btn = gr.Button("Optimize", variant="primary")
optimize_out = gr.Textbox(label="Response", lines=15)
# Function to convert inputs into payload and call API
def optimize_rag_tool(*args):
(
docs_path, retriever, embedding_model, strategy, chunk_sizes,
overlaps, rerankers, search_type, trials, metric,
validation_choice, llm_model
) = args
payload = {
"docs_path": docs_path,
"retriever": [r.strip() for r in retriever.split(",") if r.strip()],
"embedding_model": [e.strip() for e in embedding_model.split(",") if e.strip()],
"strategy": [s.strip() for s in strategy.split(",") if s.strip()],
"chunk_sizes": [int(c) for c in chunk_sizes.split(",") if c.strip()],
"overlaps": [int(o) for o in overlaps.split(",") if o.strip()],
"rerankers": [r.strip() for r in rerankers.split(",") if r.strip()],
"search_type": search_type,
"trials": trials,
"metric": metric,
"validation_choice": validation_choice,
"llm_model": llm_model
}
return optimize_rag_tool_(json.dumps(payload))
optimize_rag_tool.__doc__ = OptimizeRequest.__doc__
optimize_btn.click(
optimize_rag_tool,
inputs=[
docs_path, retriever, embedding_model, strategy, chunk_sizes,
overlaps, rerankers, search_type, trials, metric,
validation_choice, llm_model
],
outputs=optimize_out
)
with gr.Accordion("Parameter Information", open=False):
gr.Markdown(OptimizeRequest.__doc__ or "No description available.")
gr.Markdown("---")
with gr.Tab("🧩 Generate QA"):
# Generate QA
with gr.Column():
'''gr.Markdown("## Generate QA")
gr.Markdown(QARequest.__doc__ or "No description available.")
qa_input = gr.Textbox(lines=12, value=DEFAULT_QA_JSON, label="QARequest JSON")
qa_btn = gr.Button("Submit", variant="primary")
qa_out = gr.Textbox(lines=15, label="Response")
qa_btn.click(generate_qa_tool, inputs=qa_input, outputs=qa_out)
gr.Markdown("---")'''
gr.Markdown("## Generate QA")
gr.Markdown("🧩 Generate a validation QA dataset from documents for RAG evaluation.")
with gr.Tab("🧩 Generate QA"):
with gr.Accordion("⚙ Settings", open=False):
docs_path = gr.Textbox(value="data/docs", label="Docs Path")
llm_model = gr.Textbox(value="gemini-2.5-flash-lite", label="LLM Model")
batch_size = gr.Slider(1, 50, step=1, value=5, label="Batch Size")
min_q = gr.Slider(1, 20, step=1, value=3, label="Min Questions")
max_q = gr.Slider(1, 50, step=1, value=25, label="Max Questions")
qa_btn = gr.Button("Generate QA", variant="primary")
qa_out = gr.Textbox(lines=15, label="Response")
def generate_qa_tool(*args):
docs_path, llm_model, batch_size, min_q, max_q = args
return generate_qa_tool_(json.dumps({
"docs_path": docs_path,
"llm_model": llm_model,
"batch_size": batch_size,
"min_q": min_q,
"max_q": max_q
}))
generate_qa_tool.__doc__ = QARequest.__doc__
qa_btn.click(
generate_qa_tool,
inputs=[docs_path, llm_model, batch_size, min_q, max_q],
outputs=qa_out
)
with gr.Accordion("Parameter Information", open=False):
gr.Markdown(QARequest.__doc__ or "No description available.")
gr.Markdown("---")
with gr.Tab("🧹 Clear Cache"):
# Clear Cache
with gr.Column():
gr.Markdown("## Clear Cache")
gr.Markdown("🧹 Deletes all files and directories inside docs_path on the server.")
clear_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path to Clear")
clear_btn = gr.Button("Clear Cache", variant="primary")
clear_out = gr.JSON(label="Response")
clear_btn.click(clear_cache_tool, inputs=[clear_path], outputs=clear_out)
gr.Markdown("---")
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
mcp_server=True,
show_error=True
)