Update src/App.js
Browse files- src/App.js +60 -150
src/App.js
CHANGED
|
@@ -9,164 +9,60 @@ const UrologyLeaderboard = () => {
|
|
| 9 |
const [loading, setLoading] = useState(true);
|
| 10 |
const [refreshing, setRefreshing] = useState(false);
|
| 11 |
const [lastUpdated, setLastUpdated] = useState(null);
|
| 12 |
-
|
| 13 |
-
const mockData = [
|
| 14 |
-
{
|
| 15 |
-
model: "openai/gpt-4o-mini",
|
| 16 |
-
baseModel: "",
|
| 17 |
-
accuracy: 0.18543046357615803,
|
| 18 |
-
totalQuestions: 151,
|
| 19 |
-
correctAnswers: 28,
|
| 20 |
-
license: "API Service",
|
| 21 |
-
submitType: "openrouter",
|
| 22 |
-
submittedTime: "2025-05-22T16:53:10Z",
|
| 23 |
-
params: 0,
|
| 24 |
-
precision: "float16",
|
| 25 |
-
status: "FINISHED"
|
| 26 |
-
},
|
| 27 |
-
{
|
| 28 |
-
model: "anthropic/claude-3-sonnet",
|
| 29 |
-
baseModel: "claude-3-sonnet",
|
| 30 |
-
accuracy: 0.32450331125827815,
|
| 31 |
-
totalQuestions: 151,
|
| 32 |
-
correctAnswers: 49,
|
| 33 |
-
license: "API Service",
|
| 34 |
-
submitType: "openrouter",
|
| 35 |
-
submittedTime: "2025-05-22T14:30:25Z",
|
| 36 |
-
params: 0,
|
| 37 |
-
precision: "float16",
|
| 38 |
-
status: "FINISHED"
|
| 39 |
-
},
|
| 40 |
-
{
|
| 41 |
-
model: "meta-llama/llama-3.1-70b",
|
| 42 |
-
baseModel: "llama-3.1-70b",
|
| 43 |
-
accuracy: 0.27814569536423844,
|
| 44 |
-
totalQuestions: 151,
|
| 45 |
-
correctAnswers: 42,
|
| 46 |
-
license: "Apache 2.0",
|
| 47 |
-
submitType: "huggingface",
|
| 48 |
-
submittedTime: "2025-05-22T12:15:30Z",
|
| 49 |
-
params: 70000000000,
|
| 50 |
-
precision: "bfloat16",
|
| 51 |
-
status: "FINISHED"
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
model: "mistralai/mixtral-8x7b",
|
| 55 |
-
baseModel: "mixtral-8x7b",
|
| 56 |
-
accuracy: 0.23841059602649006,
|
| 57 |
-
totalQuestions: 151,
|
| 58 |
-
correctAnswers: 36,
|
| 59 |
-
license: "Apache 2.0",
|
| 60 |
-
submitType: "huggingface",
|
| 61 |
-
submittedTime: "2025-05-22T10:45:15Z",
|
| 62 |
-
params: 46700000000,
|
| 63 |
-
precision: "bfloat16",
|
| 64 |
-
status: "FINISHED"
|
| 65 |
-
},
|
| 66 |
-
{
|
| 67 |
-
model: "google/gemini-pro",
|
| 68 |
-
baseModel: "gemini-pro",
|
| 69 |
-
accuracy: 0.29801324503311255,
|
| 70 |
-
totalQuestions: 151,
|
| 71 |
-
correctAnswers: 45,
|
| 72 |
-
license: "API Service",
|
| 73 |
-
submitType: "google",
|
| 74 |
-
submittedTime: "2025-05-22T08:20:40Z",
|
| 75 |
-
params: 0,
|
| 76 |
-
precision: "float16",
|
| 77 |
-
status: "FINISHED"
|
| 78 |
-
},
|
| 79 |
-
{
|
| 80 |
-
model: "cohere/command-r-plus",
|
| 81 |
-
baseModel: "command-r-plus",
|
| 82 |
-
accuracy: 0.35761589403973510,
|
| 83 |
-
totalQuestions: 151,
|
| 84 |
-
correctAnswers: 54,
|
| 85 |
-
license: "API Service",
|
| 86 |
-
submitType: "cohere",
|
| 87 |
-
submittedTime: "2025-05-21T18:12:30Z",
|
| 88 |
-
params: 0,
|
| 89 |
-
precision: "float16",
|
| 90 |
-
status: "FINISHED"
|
| 91 |
-
},
|
| 92 |
-
{
|
| 93 |
-
model: "openai/gpt-4-turbo",
|
| 94 |
-
baseModel: "gpt-4-turbo",
|
| 95 |
-
accuracy: 0.41721854304635763,
|
| 96 |
-
totalQuestions: 151,
|
| 97 |
-
correctAnswers: 63,
|
| 98 |
-
license: "API Service",
|
| 99 |
-
submitType: "openai",
|
| 100 |
-
submittedTime: "2025-05-21T16:45:10Z",
|
| 101 |
-
params: 0,
|
| 102 |
-
precision: "float16",
|
| 103 |
-
status: "FINISHED"
|
| 104 |
-
},
|
| 105 |
-
{
|
| 106 |
-
model: "microsoft/phi-3-medium",
|
| 107 |
-
baseModel: "phi-3-medium",
|
| 108 |
-
accuracy: 0.21854304635761590,
|
| 109 |
-
totalQuestions: 151,
|
| 110 |
-
correctAnswers: 33,
|
| 111 |
-
license: "MIT",
|
| 112 |
-
submitType: "azure",
|
| 113 |
-
submittedTime: "2025-05-21T14:22:45Z",
|
| 114 |
-
params: 14000000000,
|
| 115 |
-
precision: "float16",
|
| 116 |
-
status: "FINISHED"
|
| 117 |
-
}
|
| 118 |
-
];
|
| 119 |
|
| 120 |
const loadData = async () => {
|
| 121 |
try {
|
| 122 |
-
// Intentar cargar datos reales desde HuggingFace
|
| 123 |
const response = await fetch('https://datasets-server.huggingface.co/rows?dataset=SASLeaderboard/results&config=default&split=train');
|
| 124 |
|
| 125 |
-
if (response.ok) {
|
| 126 |
-
|
| 127 |
-
console.log('Raw HuggingFace data:', data);
|
| 128 |
-
|
| 129 |
-
// Procesar los datos reales
|
| 130 |
-
const processedData = data.rows.map(row => {
|
| 131 |
-
const config = row.row.config;
|
| 132 |
-
const results = row.row.results;
|
| 133 |
-
|
| 134 |
-
return {
|
| 135 |
-
model: config.model || 'Unknown Model',
|
| 136 |
-
baseModel: config.base_model || '',
|
| 137 |
-
accuracy: results.overall?.accuracy || 0,
|
| 138 |
-
totalQuestions: results.overall?.total_questions || 151,
|
| 139 |
-
correctAnswers: Math.round((results.overall?.accuracy || 0) * (results.overall?.total_questions || 151)),
|
| 140 |
-
license: config.license || 'Unknown',
|
| 141 |
-
submitType: config.submit_type || 'unknown',
|
| 142 |
-
submittedTime: config.submitted_time || new Date().toISOString(),
|
| 143 |
-
params: config.params || 0,
|
| 144 |
-
precision: config.precision || 'unknown',
|
| 145 |
-
status: config.status || 'UNKNOWN'
|
| 146 |
-
};
|
| 147 |
-
});
|
| 148 |
-
|
| 149 |
-
console.log('Processed data:', processedData);
|
| 150 |
-
return processedData;
|
| 151 |
-
} else {
|
| 152 |
-
console.log('HuggingFace API failed, using mock data');
|
| 153 |
-
return mockData;
|
| 154 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
} catch (error) {
|
| 156 |
-
console.
|
| 157 |
-
|
| 158 |
}
|
| 159 |
};
|
| 160 |
|
| 161 |
const refreshData = async () => {
|
| 162 |
setRefreshing(true);
|
|
|
|
| 163 |
try {
|
| 164 |
const newData = await loadData();
|
| 165 |
-
|
| 166 |
-
setData(shuffledData);
|
| 167 |
setLastUpdated(new Date());
|
| 168 |
} catch (error) {
|
| 169 |
-
|
|
|
|
| 170 |
} finally {
|
| 171 |
setRefreshing(false);
|
| 172 |
}
|
|
@@ -174,10 +70,16 @@ const UrologyLeaderboard = () => {
|
|
| 174 |
|
| 175 |
useEffect(() => {
|
| 176 |
const initializeData = async () => {
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
};
|
| 182 |
initializeData();
|
| 183 |
}, []);
|
|
@@ -666,15 +568,23 @@ const UrologyLeaderboard = () => {
|
|
| 666 |
React.createElement('div', { style: styles.infoCard },
|
| 667 |
React.createElement('h3', { style: { fontSize: '20px', fontWeight: '600', color: 'white', marginBottom: '12px' } }, '馃搳 About This Evaluation'),
|
| 668 |
React.createElement('p', { style: { color: '#d1d5db', marginBottom: '12px' } }, 'This leaderboard evaluates natural language models on their ability to answer urology questions. Models must respond to multiple-choice questions about urological knowledge, demonstrating their understanding and mastery of this medical specialty.'),
|
| 669 |
-
React.createElement('p', { style: { color: '#d1d5db', marginBottom: '16px' } },
|
| 670 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 671 |
React.createElement('span', null, 'Dataset: ', React.createElement('a', {
|
| 672 |
href: 'https://huggingface.co/datasets/SASLeaderboard/results',
|
| 673 |
style: { color: '#60a5fa', textDecoration: 'none' },
|
| 674 |
target: '_blank',
|
| 675 |
rel: 'noopener noreferrer'
|
| 676 |
-
}, 'SASLeaderboard/results'))
|
| 677 |
-
lastUpdated && React.createElement('span', { style: { fontSize: '14px' } }, `Last updated: ${lastUpdated.toLocaleString('en-US')}`)
|
| 678 |
)
|
| 679 |
),
|
| 680 |
React.createElement('div', { style: styles.academicCard },
|
|
|
|
| 9 |
const [loading, setLoading] = useState(true);
|
| 10 |
const [refreshing, setRefreshing] = useState(false);
|
| 11 |
const [lastUpdated, setLastUpdated] = useState(null);
|
| 12 |
+
const [error, setError] = useState(null);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
const loadData = async () => {
|
| 15 |
try {
|
|
|
|
| 16 |
const response = await fetch('https://datasets-server.huggingface.co/rows?dataset=SASLeaderboard/results&config=default&split=train');
|
| 17 |
|
| 18 |
+
if (!response.ok) {
|
| 19 |
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
}
|
| 21 |
+
|
| 22 |
+
const data = await response.json();
|
| 23 |
+
console.log('Raw HuggingFace data:', data);
|
| 24 |
+
|
| 25 |
+
if (!data.rows || data.rows.length === 0) {
|
| 26 |
+
throw new Error('No data found in the dataset');
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
const processedData = data.rows.map(row => {
|
| 30 |
+
const config = row.row.config;
|
| 31 |
+
const results = row.row.results;
|
| 32 |
+
|
| 33 |
+
return {
|
| 34 |
+
model: config.model || 'Unknown Model',
|
| 35 |
+
baseModel: config.base_model || '',
|
| 36 |
+
accuracy: results.overall?.accuracy || 0,
|
| 37 |
+
totalQuestions: results.overall?.total_questions || 151,
|
| 38 |
+
correctAnswers: Math.round((results.overall?.accuracy || 0) * (results.overall?.total_questions || 151)),
|
| 39 |
+
license: config.license || 'Unknown',
|
| 40 |
+
submitType: config.submit_type || 'unknown',
|
| 41 |
+
submittedTime: config.submitted_time || new Date().toISOString(),
|
| 42 |
+
params: config.params || 0,
|
| 43 |
+
precision: config.precision || 'unknown',
|
| 44 |
+
status: config.status || 'UNKNOWN'
|
| 45 |
+
};
|
| 46 |
+
});
|
| 47 |
+
|
| 48 |
+
console.log('Processed data:', processedData);
|
| 49 |
+
return processedData;
|
| 50 |
} catch (error) {
|
| 51 |
+
console.error('Error loading data from HuggingFace:', error);
|
| 52 |
+
throw error;
|
| 53 |
}
|
| 54 |
};
|
| 55 |
|
| 56 |
const refreshData = async () => {
|
| 57 |
setRefreshing(true);
|
| 58 |
+
setError(null);
|
| 59 |
try {
|
| 60 |
const newData = await loadData();
|
| 61 |
+
setData(newData);
|
|
|
|
| 62 |
setLastUpdated(new Date());
|
| 63 |
} catch (error) {
|
| 64 |
+
setError(`Failed to load data: ${error.message}`);
|
| 65 |
+
setData([]);
|
| 66 |
} finally {
|
| 67 |
setRefreshing(false);
|
| 68 |
}
|
|
|
|
| 70 |
|
| 71 |
useEffect(() => {
|
| 72 |
const initializeData = async () => {
|
| 73 |
+
try {
|
| 74 |
+
const initialData = await loadData();
|
| 75 |
+
setData(initialData);
|
| 76 |
+
setLastUpdated(new Date());
|
| 77 |
+
} catch (error) {
|
| 78 |
+
setError(`Failed to load data: ${error.message}`);
|
| 79 |
+
setData([]);
|
| 80 |
+
} finally {
|
| 81 |
+
setLoading(false);
|
| 82 |
+
}
|
| 83 |
};
|
| 84 |
initializeData();
|
| 85 |
}, []);
|
|
|
|
| 568 |
React.createElement('div', { style: styles.infoCard },
|
| 569 |
React.createElement('h3', { style: { fontSize: '20px', fontWeight: '600', color: 'white', marginBottom: '12px' } }, '馃搳 About This Evaluation'),
|
| 570 |
React.createElement('p', { style: { color: '#d1d5db', marginBottom: '12px' } }, 'This leaderboard evaluates natural language models on their ability to answer urology questions. Models must respond to multiple-choice questions about urological knowledge, demonstrating their understanding and mastery of this medical specialty.'),
|
| 571 |
+
React.createElement('p', { style: { color: '#d1d5db', marginBottom: '16px' } },
|
| 572 |
+
'Questions are from the SAS (Servicio Andaluz de Salud) for the ',
|
| 573 |
+
React.createElement('a', {
|
| 574 |
+
href: 'https://www.sspa.juntadeandalucia.es/servicioandaluzdesalud/profesionales/ofertas-de-empleo/oferta-de-empleo-publico-puestos-base/oep-extraordinaria-decreto-ley-122022-centros-sas/cuadro-de-evolucion-concurso-oposicion-centros-sas/fea-urologia',
|
| 575 |
+
target: '_blank',
|
| 576 |
+
rel: 'noopener noreferrer',
|
| 577 |
+
style: { color: '#60a5fa', textDecoration: 'none', fontWeight: '600' }
|
| 578 |
+
}, React.createElement('strong', null, 'Convocatoria Concurso Oposici贸n')),
|
| 579 |
+
' - specialized medical examination for urology residents.'
|
| 580 |
+
),
|
| 581 |
+
React.createElement('div', { style: { display: 'flex', justifyContent: 'center', color: '#d1d5db' } },
|
| 582 |
React.createElement('span', null, 'Dataset: ', React.createElement('a', {
|
| 583 |
href: 'https://huggingface.co/datasets/SASLeaderboard/results',
|
| 584 |
style: { color: '#60a5fa', textDecoration: 'none' },
|
| 585 |
target: '_blank',
|
| 586 |
rel: 'noopener noreferrer'
|
| 587 |
+
}, 'SASLeaderboard/results'))
|
|
|
|
| 588 |
)
|
| 589 |
),
|
| 590 |
React.createElement('div', { style: styles.academicCard },
|