sedrick-keh-tri
support datasets table
bccb934
import gradio as gr
import sqlite3
import pandas as pd
from huggingface_hub import hf_hub_download
import os
# Download the database from HF dataset
def download_database():
"""Download vla_foundry.db from the HF dataset"""
try:
db_path = hf_hub_download(
repo_id="TRI-ML/vla_foundry_db",
filename="vla_foundry.db",
repo_type="dataset"
)
return db_path
except Exception as e:
print(f"Error downloading database: {e}")
return None
# Load data from database
def load_database_tables(db_path):
"""Load all tables from the database"""
if not db_path or not os.path.exists(db_path):
return {}, []
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# Get all table names
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = [row[0] for row in cursor.fetchall()]
# Load each table into a DataFrame
table_data = {}
for table in tables:
try:
df = pd.read_sql_query(f"SELECT * FROM {table}", conn)
table_data[table] = df
except Exception as e:
print(f"Error loading table {table}: {e}")
conn.close()
return table_data, tables
# Initialize database
db_path = download_database()
table_data, table_names = load_database_tables(db_path)
# Column width configuration (in pixels)
# Specify widths for specific columns across all tables
# Format: {"column_name": width_in_px}
column_widths = {
"created_by": 150,
"cfg": 500,
"git_commit_hash": 150,
"git_branch": 150,
"git_remote_url": 150,
"git_has_local_changes": 150,
"git_local_chances": 150,
"dataset_source_paths": 500,
"model_checkpoints_path": 500,
"fixed_model_path": 500,
"completed": 150,
}
# Function to get table data with filters
def get_filtered_data(table_name, search_query="", column_filter="All", sort_column="", sort_order="Ascending"):
"""Get filtered and sorted data from selected table"""
if table_name not in table_data:
return pd.DataFrame()
df = table_data[table_name].copy()
# Apply search filter
if search_query.strip():
# Search across all columns
mask = df.astype(str).apply(lambda x: x.str.contains(search_query, case=False, na=False)).any(axis=1)
df = df[mask]
# Apply column-specific filter
if column_filter != "All" and column_filter in df.columns:
# Show only rows where the selected column has non-null values
df = df[df[column_filter].notna()]
# Apply sorting
if sort_column and sort_column in df.columns:
ascending = (sort_order == "Ascending")
df = df.sort_values(by=sort_column, ascending=ascending)
return df
def update_column_choices(table_name):
"""Update column choices based on selected table"""
if table_name not in table_data:
return gr.update(choices=["All"]), gr.update(choices=[])
columns = ["All"] + list(table_data[table_name].columns)
return gr.update(choices=columns, value="All"), gr.update(choices=list(table_data[table_name].columns), value="")
def get_table_info(table_name):
"""Get information about the selected table"""
if table_name not in table_data:
return "No table selected"
df = table_data[table_name]
info = f"**Table: {table_name}**\n\n"
info += f"- Total rows: {len(df)}\n"
info += f"- Total columns: {len(df.columns)}\n"
info += f"- Columns: {', '.join(df.columns)}\n"
return info
# Create Gradio interface with CSS
css = """
/* Default table cell styling */
table td,
table th {
white-space: nowrap !important;
overflow-x: auto !important;
font-size: 12px !important;
padding: 4px 8px !important;
}
/* Make table container scrollable */
.table-wrap {
max-height: 600px !important;
overflow-y: auto !important;
overflow-x: visible !important;
}
/* Ensure table uses fixed layout */
table {
table-layout: fixed !important;
}
/* Prevent editing - make cells read-only but allow selection */
table input,
table textarea {
background-color: transparent !important;
border: none !important;
cursor: text !important;
user-select: text !important;
}
/* Make cells read-only by preventing editing on focus */
table input:focus,
table textarea:focus {
outline: none !important;
}
"""
with gr.Blocks(title="VLA Foundry Database Viewer", css=css) as demo:
gr.Markdown("# 🤖 VLA Foundry Database Viewer")
gr.Markdown("Explore the VLA Foundry database with searchable, filterable, and sortable tables.")
if not table_data:
gr.Markdown("⚠️ **Error**: Could not load database. Please check if the database file exists in the dataset.")
else:
# Top section: Table selector and info
with gr.Row():
with gr.Column(scale=2):
table_selector = gr.Dropdown(
choices=table_names,
label="Select Table",
value=table_names[0] if table_names else None
)
with gr.Column(scale=3):
table_info = gr.Markdown(
value=get_table_info(table_names[0]) if table_names else ""
)
# Filters and sorting section
with gr.Row():
search_box = gr.Textbox(
label="Search (across all columns)",
placeholder="Enter search term...",
value="",
scale=2
)
column_filter = gr.Dropdown(
choices=["All"],
label="Filter by Column (show non-null)",
value="All",
scale=1
)
sort_column = gr.Dropdown(
choices=[],
label="Sort by Column",
value="",
scale=1
)
sort_order = gr.Radio(
choices=["Ascending", "Descending"],
label="Sort Order",
value="Ascending",
scale=1
)
clear_btn = gr.Button("Clear Filters", scale=1)
# Data table with custom column widths
initial_df = table_data[table_names[0]] if table_names else pd.DataFrame()
# Build column_widths list based on the column_widths config
col_width_list = []
if not initial_df.empty:
for col in initial_df.columns:
col_width_list.append(column_widths.get(col, 300))
data_table = gr.Dataframe(
value=initial_df,
label="Table Data",
interactive=True,
wrap=False,
column_widths=col_width_list if col_width_list else None
)
# Event handlers
def update_table(table_name, search, col_filter, sort_col, sort_ord):
filtered_df = get_filtered_data(table_name, search, col_filter, sort_col, sort_ord)
info = get_table_info(table_name)
# Build column widths for the current table
col_width_list = []
if not filtered_df.empty:
for col in filtered_df.columns:
col_width_list.append(column_widths.get(col, 300))
return gr.update(value=filtered_df, column_widths=col_width_list if col_width_list else None), info
# Update columns when table changes
def on_table_change(table_name):
# Reset filters and update column choices
col_filter_update, sort_col_update = update_column_choices(table_name)
# Get the updated table with reset filters
filtered_df = get_filtered_data(table_name, "", "All", "", "Ascending")
info = get_table_info(table_name)
# Build column widths for the new table
col_width_list = []
if not filtered_df.empty:
for col in filtered_df.columns:
col_width_list.append(column_widths.get(col, 300))
table_update = gr.update(value=filtered_df, column_widths=col_width_list if col_width_list else None)
return col_filter_update, sort_col_update, table_update, info, "", "Ascending"
table_selector.change(
fn=on_table_change,
inputs=[table_selector],
outputs=[column_filter, sort_column, data_table, table_info, search_box, sort_order]
)
# Update table when filters/sorting change
for component in [search_box, column_filter, sort_column, sort_order]:
component.change(
fn=update_table,
inputs=[table_selector, search_box, column_filter, sort_column, sort_order],
outputs=[data_table, table_info]
)
# Clear filters
def clear_filters():
return "", "All", "", "Ascending"
clear_btn.click(
fn=clear_filters,
outputs=[search_box, column_filter, sort_column, sort_order]
).then(
fn=update_table,
inputs=[table_selector, search_box, column_filter, sort_column, sort_order],
outputs=[data_table, table_info]
)
if __name__ == "__main__":
demo.launch()