Spaces:

MCP-1st-Birthday
/

ragmint-mcp-server

Running

André Oliveira

refactored app

c2fcdce about 1 month ago

21.8 kB

	import gradio as gr
	import requests
	import json
	import os
	import threading
	from models import OptimizeRequest, AutotuneRequest, QARequest
	from api import start_api



	# Start FastAPI server in background
	threading.Thread(target=start_api, daemon=True).start()

	# Base URL for internal calls
	BASE_INTERNAL = "http://127.0.0.1:8000"


	def call_api(endpoint: str, payload: dict) -> str:
	try:
	r = requests.post(f"{BASE_INTERNAL}{endpoint}", json=payload, timeout=120)
	return json.dumps(r.json(), indent=2)
	except Exception as e:
	return str(e)


	def clear_cache_tool(docs_path="data/docs"):
	"""
	🧹 Clear Cache MCP Tool
	Deletes all files and directories inside docs_path on the server.
	Accepts:
	- local paths (str), default='data/docs/'
	"""
	try:
	r = requests.post(
	f"{BASE_INTERNAL}/clear_cache",
	data={"docs_path": docs_path},
	timeout=60
	)
	r.raise_for_status()
	return r.json()
	except Exception as e:
	return {"error": str(e)}


	def upload_docs_tool(files, docs_path="data/docs"):
	"""
	Upload documents to the server's docs folder via FastAPI /upload_docs.
	Accepts:
	- local file paths (str)
	- URLs (str)
	- file-like objects
	"""
	import shutil, tempfile

	os.makedirs(docs_path, exist_ok=True)
	files_payload = []

	temp_files = []

	try:
	for f in files:
	if isinstance(f, str) and f.startswith(("http://", "https://")):
	# Download URL to a temp file (txt aware)
	resp = requests.get(f, timeout=60)
	resp.raise_for_status()

	# create temp file with proper extension
	ext = os.path.splitext(f)[1] or ".txt"
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext)

	if "text" in resp.headers.get("Content-Type", "").lower():
	tmp.write(resp.text.encode("utf-8"))
	else:
	tmp.write(resp.content)

	tmp.close()
	temp_files.append(tmp.name)

	files_payload.append(("files", open(tmp.name, "rb")))

	elif isinstance(f, str):
	# Local file path
	files_payload.append(("files", open(f, "rb")))

	else:
	# File-like object
	files_payload.append(("files", f))

	resp = requests.post(
	f"{BASE_INTERNAL}/upload_docs",
	files=files_payload,
	data={"docs_path": docs_path}
	)
	resp.raise_for_status()
	return resp.json()

	finally:
	# Close all file handles
	for _, file_obj in files_payload:
	if not file_obj.closed:
	file_obj.close()
	# Clean up temp files
	for tmp_file in temp_files:
	try:
	os.unlink(tmp_file)
	except Exception:
	pass


	def optimize_rag_tool_(payload: str) -> str:
	"""🔧 Explicit optimization request: user provides all pipeline configs manually."""
	return call_api("/optimize_rag", json.loads(payload))


	def autotune_tool_(payload: str) -> str:
	"""🔧 Autotune RAG: recommends chunk sizes and embedding models automatically."""
	return call_api("/autotune_rag", json.loads(payload))


	def generate_qa_tool_(payload: str) -> str:
	"""🧩 Generates a validation QA dataset for RAG evaluation."""
	return call_api("/generate_validation_qa", json.loads(payload))


	# Assign Pydantic docstrings
	optimize_rag_tool_.__doc__ = OptimizeRequest.__doc__
	autotune_tool_.__doc__ = AutotuneRequest.__doc__
	generate_qa_tool_.__doc__ = QARequest.__doc__


	def model_to_json(model_cls) -> str:
	return json.dumps({k: v.default for k, v in model_cls.model_fields.items()}, indent=2)


	# Default inputs
	DEFAULT_UPLOAD_PATH = "data/docs"
	DEFAULT_OPTIMIZE_JSON = model_to_json(OptimizeRequest)
	DEFAULT_AUTOTUNE_JSON = model_to_json(AutotuneRequest)
	DEFAULT_QA_JSON = model_to_json(QARequest)


	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🤖 Ragmint MCP Server")

	gr.HTML("""
	<div style="display:flex; gap:5px; flex-wrap:wrap; align-items:center;">
	<a href="https://huggingface.co/spaces/andyolivers/ragmint-mcp-server">
	<img src="https://img.shields.io/badge/HF-Space-blue" alt="HF Space">
	</a>
	<img src="https://img.shields.io/badge/Python-3.9%2B-blue?logo=python" alt="Python">
	<a href="https://pypi.org/project/ragmint/">
	<img src="https://img.shields.io/pypi/v/ragmint?color=blue" alt="HF Space">
	</a>
	<img src="https://img.shields.io/badge/License-Apache%202.0-green" alt="License">
	<img src="https://img.shields.io/badge/MCP-Enabled-green" alt="MCP">
	<img src="https://img.shields.io/badge/Status-Beta-orange" alt="Status">
	<img src="https://img.shields.io/badge/Optuna-Bayesian%20Optimization-6f42c1?logo=optuna&logoColor=white" alt="Optuna">
	<img src="https://img.shields.io/badge/Google%20Gemini-LLM-lightblue?logo=google&logoColor=white" alt="Google Gemini 2.5">
	</div>
	""")

	gr.Markdown("""
	AI-Powered Optimization for RAG Pipelines

	This server provides 6 MCP Tools for RAG pipeline tuning, dataset generation & workspace control — all programmatically accessible through MCP clients like Claude Desktop, Cursor, VS Code MCP Extension, and more.

	<br>

	## 🔧 MCP Tools (AI-Driven & Automated)

	- 📄 Upload Docs: Upload .txt files to workspace for evaluation
	- 🔗 Upload URLs: Import remote .txt docs via URLs
	- 🧠 Optimize RAG: Full hyperparameter search (Grid / Random / Bayesian) with metrics
	- ⚙️ Autotune RAG: Automated recommendations for best chunking + embeddings
	- ❓ Generate QA Dataset: Create validation QA pairs with LLMs for benchmarking
	- 🧹 Clear Cache: Reset workspace and delete stored docs

	<br>

	## 🧠 What Ragmint Solves

	- Automated RAG hyperparameter optimization
	- Retriever, embedding, reranker selection
	- Synthetic validation QA generation
	- Evaluation metrics (faithfulness, latency, etc.)
	- Experiment tracking & reproducible pipeline comparison

	🔬 Built for RAG engineers, researchers, and LLM developers who want consistent performance improvement without trial-and-error.

	<br>

	## 🧠 Powered by

	- Optuna (Bayesian Optimization)
	- Google Gemini 2.5 Flash Lite / Pro
	- FAISS, Chroma, BM25, scikit-learn retrievers
	- Sentence-Transformers / BGE embeddings

	<br>

	## 🌐 MCP Connection

	HuggingFace Space
	https://huggingface.co/spaces/andyolivers/ragmint-mcp-server

	MCP Endpoint (SSE — Recommended)
	https://andyolivers-ragmint-mcp-server.hf.space/gradio_api/mcp/sse

	<br>

	## 📦 Example MCP Use Cases

	- 🧠 Run Auto-Optimization for RAG pipelines
	- 📊 Compare embedding + retriever combinations
	- ❓ Automatically generate QA validation datasets
	- 🔁 Rapid experiment iteration inside Claude / Cursor

	<br>

	## 🧩 MCP Tools Overview

	\| MCP Tool \| Core Function \|
	\|----------\|---------------\|
	\| upload_docs \| Upload .txt documents \|
	\| upload_urls \| Import documents from external URLs \|
	\| optimize_rag \| Hyperparameter search with metrics \|
	\| autotune \| Automated RAG configuration suggestions \|
	\| generate_qa \| Synthetic QA generation \|
	\| clear_cache \| Clean workspace \|

	---

	""")

	with gr.Tab("📂 Upload"):
	with gr.Row():
	# Upload Documents
	with gr.Column(scale=1):
	gr.Markdown("## Upload Documents")
	gr.Markdown("📂 Upload files (local paths or URLs) to your `data/docs` folder")
	upload_files = gr.File(file_count="multiple", type="filepath")
	upload_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
	upload_btn = gr.Button("Upload", variant="primary")
	upload_out = gr.JSON(label="Response")
	upload_btn.click(upload_docs_tool, inputs=[upload_files, upload_path], outputs=upload_out)


	# Upload MCP Documents (no file uploader)
	with gr.Column(scale=1):
	gr.Markdown("## Upload Documents from URLs")
	gr.Markdown("📂 Upload files (URLs) to your `data/docs` folder on MCP.")
	'''upload_mcp_input = gr.Textbox(
	lines=5,
	placeholder='Enter list of URLs (e.g., ["https://example.com/example.txt",...])',
	label="Files (JSON list)"
	)'''

	upload_mcp_input = gr.TextArea(
	placeholder="Paste URLs (one per line without commas)",
	label="URLs"
	)



	def upload_urls_tool(text, path):
	"""
	Upload documents to the server's docs folder via FastAPI /upload_docs.
	Accepts:
	- local file paths (str)
	- URLs (str)
	- file-like objects
	"""

	urls = [u.strip() for u in text.split("\n") if u.strip()]
	return upload_docs_tool(urls, path)

	upload_mcp_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
	upload_mcp_btn = gr.Button("Upload", variant="primary")
	upload_mcp_out = gr.JSON(label="Response")

	# MCP callable function
	'''def upload_urls_tool(files_json, docs_path):
	"""
	Upload documents to the server's docs folder via MCP.
	Accepts:
	- URLs (str)
	"""
	import ast
	try:
	files = ast.literal_eval(files_json)
	except Exception:
	return {"error": "Invalid JSON list of files"}
	return upload_docs_tool(files, docs_path)'''

	upload_mcp_btn.click(
	upload_urls_tool,
	inputs=[upload_mcp_input, upload_mcp_path],
	outputs=upload_mcp_out
	)

	gr.Markdown("---")



	with gr.Tab("⚡ Autotune"):
	# Autotune RAG
	with gr.Column():
	gr.Markdown("## Autotune RAG")
	gr.Markdown(" ⚡ Automatically tunes RAG pipeline parameters based on document analysis.")

	'''gr.Markdown(AutotuneRequest.__doc__ or "No description available.")
	autotune_input = gr.Textbox(lines=12, value=DEFAULT_AUTOTUNE_JSON, label="AutotuneRequest JSON")
	autotune_btn = gr.Button("Autotune", variant="primary")
	autotune_out = gr.Textbox(lines=15, label="Response")
	autotune_btn.click(autotune_tool, inputs=autotune_input, outputs=autotune_out)'''

	with gr.Accordion("⚙ Settings", open=False):
	docs_path = gr.Textbox(value="data/docs", label="Docs Path")

	embedding_model = gr.Textbox(
	value="sentence-transformers/all-MiniLM-L6-v2",
	label="Embedding Model"
	)

	num_chunk_pairs = gr.Slider(
	minimum=1, maximum=20, step=1, value=5, label="Number of chunk pairs"
	)

	metric = gr.Dropdown(
	choices=["faithfulness"],
	value="faithfulness",
	label="Metric"
	)

	search_type = gr.Dropdown(
	choices=["grid", "random", "bayesian"],
	value="grid",
	label="Search Type"
	)

	trials = gr.Slider(
	minimum=1, maximum=100, step=1, value=5, label="Optimization Trials"
	)

	validation_choice = gr.Dropdown(
	choices=["generate", ""],
	value="generate",
	label="Validation Choice"
	)

	llm_model = gr.Textbox(
	value="gemini-2.5-flash-lite",
	label="LLM Model"
	)

	autotune_btn = gr.Button("Autotune", variant="primary")
	autotune_out = gr.Textbox(label="Response", lines=15)


	def autotune_tool(*args):
	(
	docs_path, embedding_model, num_chunk_pairs, metric,
	search_type, trials, validation_choice, llm_model
	) = args

	payload = {
	"docs_path": docs_path,
	"embedding_model": embedding_model,
	"num_chunk_pairs": num_chunk_pairs,
	"metric": metric,
	"search_type": search_type,
	"trials": trials,
	"validation_choice": validation_choice,
	"llm_model": llm_model
	}

	return autotune_tool_(json.dumps(payload))


	autotune_tool.__doc__ = AutotuneRequest.__doc__
	autotune_btn.click(
	autotune_tool,
	inputs=[
	docs_path, embedding_model, num_chunk_pairs, metric,
	search_type, trials, validation_choice, llm_model
	],
	outputs=autotune_out
	)

	with gr.Accordion("Parameter Information", open=False):
	gr.Markdown(AutotuneRequest.__doc__ or "No description available.")

	gr.Markdown("---")


	with gr.Tab("🔧 Optimize"):
	# Optimize RAG
	with gr.Column():
	gr.Markdown("## Optimize RAG")
	gr.Markdown("🔧 Explicit optimization request for RAG (Retrieval-Augmented Generation) pipelines.")
	'''gr.Markdown(OptimizeRequest.__doc__ or "No description available.")
	optimize_input = gr.Textbox(lines=12, value=DEFAULT_OPTIMIZE_JSON, label="OptimizeRequest JSON")
	optimize_btn = gr.Button("Optimize", variant="primary")
	optimize_out = gr.Textbox(lines=15, label="Response")
	optimize_btn.click(optimize_rag_tool, inputs=optimize_input, outputs=optimize_out)'''

	# Parameters accordion
	with gr.Accordion("⚙ Settings", open=False):
	docs_path = gr.Textbox(value="data/docs", label="Docs Path")

	retriever = gr.CheckboxGroup(
	choices=["faiss", "chroma", "numpy", "sklearn","bm25"],
	value="faiss",
	label="Search Type"
	)

	embedding_model = gr.Textbox(
	value="sentence-transformers/all-MiniLM-L6-v2",
	label="Embedding Model(s) (comma-separated)"
	)

	strategy = gr.CheckboxGroup(
	choices=["fixed","token","sentence"],
	value="fixed",
	label="RAG Strategy"
	)


	chunk_sizes = gr.Textbox(
	value="200,400,600",
	label="Chunk Sizes (comma-separated integers)"
	)

	overlaps = gr.Textbox(
	value="50,100,200",
	label="Overlaps (comma-separated integers)"
	)

	rerankers = gr.Dropdown(
	choices=["mmr"],
	value="mmr",
	label="Rerankers"
	)


	search_type = gr.Dropdown(
	choices=["grid", "random", "bayesian"],
	value="grid",
	label="Search Type"
	)

	trials = gr.Slider(
	minimum=1, maximum=100, step=1, value=5,
	label="Number of Trials"
	)

	metric = gr.Dropdown(
	choices=["faithfulness"],
	value="faithfulness",
	label="Metric"
	)

	validation_choice = gr.Dropdown(
	choices=["generate", ""],
	value="generate",
	label="Validation Choice"
	)

	llm_model = gr.Textbox(
	value="gemini-2.5-flash-lite",
	label="LLM Model"
	)

	optimize_btn = gr.Button("Optimize", variant="primary")
	optimize_out = gr.Textbox(label="Response", lines=15)


	# Function to convert inputs into payload and call API
	def optimize_rag_tool(*args):
	(
	docs_path, retriever, embedding_model, strategy, chunk_sizes,
	overlaps, rerankers, search_type, trials, metric,
	validation_choice, llm_model
	) = args

	payload = {
	"docs_path": docs_path,
	"retriever": [r.strip() for r in retriever.split(",") if r.strip()],
	"embedding_model": [e.strip() for e in embedding_model.split(",") if e.strip()],
	"strategy": [s.strip() for s in strategy.split(",") if s.strip()],
	"chunk_sizes": [int(c) for c in chunk_sizes.split(",") if c.strip()],
	"overlaps": [int(o) for o in overlaps.split(",") if o.strip()],
	"rerankers": [r.strip() for r in rerankers.split(",") if r.strip()],
	"search_type": search_type,
	"trials": trials,
	"metric": metric,
	"validation_choice": validation_choice,
	"llm_model": llm_model
	}

	return optimize_rag_tool_(json.dumps(payload))


	optimize_rag_tool.__doc__ = OptimizeRequest.__doc__

	optimize_btn.click(
	optimize_rag_tool,
	inputs=[
	docs_path, retriever, embedding_model, strategy, chunk_sizes,
	overlaps, rerankers, search_type, trials, metric,
	validation_choice, llm_model
	],
	outputs=optimize_out
	)


	with gr.Accordion("Parameter Information", open=False):
	gr.Markdown(OptimizeRequest.__doc__ or "No description available.")
	gr.Markdown("---")


	with gr.Tab("🧩 Generate QA"):
	# Generate QA
	with gr.Column():
	'''gr.Markdown("## Generate QA")
	gr.Markdown(QARequest.__doc__ or "No description available.")
	qa_input = gr.Textbox(lines=12, value=DEFAULT_QA_JSON, label="QARequest JSON")
	qa_btn = gr.Button("Submit", variant="primary")
	qa_out = gr.Textbox(lines=15, label="Response")
	qa_btn.click(generate_qa_tool, inputs=qa_input, outputs=qa_out)
	gr.Markdown("---")'''

	gr.Markdown("## Generate QA")
	gr.Markdown("🧩 Generate a validation QA dataset from documents for RAG evaluation.")

	with gr.Tab("🧩 Generate QA"):

	with gr.Accordion("⚙ Settings", open=False):
	docs_path = gr.Textbox(value="data/docs", label="Docs Path")
	llm_model = gr.Textbox(value="gemini-2.5-flash-lite", label="LLM Model")
	batch_size = gr.Slider(1, 50, step=1, value=5, label="Batch Size")
	min_q = gr.Slider(1, 20, step=1, value=3, label="Min Questions")
	max_q = gr.Slider(1, 50, step=1, value=25, label="Max Questions")

	qa_btn = gr.Button("Generate QA", variant="primary")
	qa_out = gr.Textbox(lines=15, label="Response")


	def generate_qa_tool(*args):
	docs_path, llm_model, batch_size, min_q, max_q = args
	return generate_qa_tool_(json.dumps({
	"docs_path": docs_path,
	"llm_model": llm_model,
	"batch_size": batch_size,
	"min_q": min_q,
	"max_q": max_q
	}))


	generate_qa_tool.__doc__ = QARequest.__doc__

	qa_btn.click(
	generate_qa_tool,
	inputs=[docs_path, llm_model, batch_size, min_q, max_q],
	outputs=qa_out
	)

	with gr.Accordion("Parameter Information", open=False):
	gr.Markdown(QARequest.__doc__ or "No description available.")

	gr.Markdown("---")


	with gr.Tab("🧹 Clear Cache"):
	# Clear Cache
	with gr.Column():
	gr.Markdown("## Clear Cache")
	gr.Markdown("🧹 Deletes all files and directories inside docs_path on the server.")
	clear_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path to Clear")
	clear_btn = gr.Button("Clear Cache", variant="primary")
	clear_out = gr.JSON(label="Response")
	clear_btn.click(clear_cache_tool, inputs=[clear_path], outputs=clear_out)
	gr.Markdown("---")

	if __name__ == "__main__":

	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	mcp_server=True,
	show_error=True
	)