initial implementation of leaderboard ratings chart

This commit is contained in:
ayana 2025-06-23 12:50:38 -07:00
parent 610680ac14
commit 098c2faae9
4 changed files with 99 additions and 8 deletions

19
package-lock.json generated
View file

@ -32,6 +32,7 @@
"@xyflow/svelte": "^0.1.19", "@xyflow/svelte": "^0.1.19",
"async": "^3.2.5", "async": "^3.2.5",
"bits-ui": "^0.21.15", "bits-ui": "^0.21.15",
"chart.js": "^4.5.0",
"codemirror": "^6.0.1", "codemirror": "^6.0.1",
"codemirror-lang-elixir": "^4.0.0", "codemirror-lang-elixir": "^4.0.0",
"codemirror-lang-hcl": "^0.1.0", "codemirror-lang-hcl": "^0.1.0",
@ -1870,6 +1871,12 @@
"@jridgewell/sourcemap-codec": "^1.4.14" "@jridgewell/sourcemap-codec": "^1.4.14"
} }
}, },
"node_modules/@kurkle/color": {
"version": "0.3.4",
"resolved": "https://registry.npmjs.org/@kurkle/color/-/color-0.3.4.tgz",
"integrity": "sha512-M5UknZPHRu3DEDWoipU6sE8PdkZ6Z/S+v4dD+Ke8IaNlpdSQah50lz1KtcFBa2vsdOnwbbnxJwVM4wty6udA5w==",
"license": "MIT"
},
"node_modules/@lezer/common": { "node_modules/@lezer/common": {
"version": "1.2.1", "version": "1.2.1",
"resolved": "https://registry.npmjs.org/@lezer/common/-/common-1.2.1.tgz", "resolved": "https://registry.npmjs.org/@lezer/common/-/common-1.2.1.tgz",
@ -4723,6 +4730,18 @@
"url": "https://github.com/chalk/chalk?sponsor=1" "url": "https://github.com/chalk/chalk?sponsor=1"
} }
}, },
"node_modules/chart.js": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/chart.js/-/chart.js-4.5.0.tgz",
"integrity": "sha512-aYeC/jDgSEx8SHWZvANYMioYMZ2KX02W6f6uVfyteuCGcadDLcYVHdfdygsTQkQ4TKn5lghoojAsPj5pu0SnvQ==",
"license": "MIT",
"dependencies": {
"@kurkle/color": "^0.3.0"
},
"engines": {
"pnpm": ">=8"
}
},
"node_modules/check-error": { "node_modules/check-error": {
"version": "1.0.3", "version": "1.0.3",
"resolved": "https://registry.npmjs.org/check-error/-/check-error-1.0.3.tgz", "resolved": "https://registry.npmjs.org/check-error/-/check-error-1.0.3.tgz",

View file

@ -76,6 +76,7 @@
"@xyflow/svelte": "^0.1.19", "@xyflow/svelte": "^0.1.19",
"async": "^3.2.5", "async": "^3.2.5",
"bits-ui": "^0.21.15", "bits-ui": "^0.21.15",
"chart.js": "^4.5.0",
"codemirror": "^6.0.1", "codemirror": "^6.0.1",
"codemirror-lang-elixir": "^4.0.0", "codemirror-lang-elixir": "^4.0.0",
"codemirror-lang-hcl": "^0.1.0", "codemirror-lang-hcl": "^0.1.0",

View file

@ -93,8 +93,10 @@
// //
////////////////////// //////////////////////
let modelRatingHistory = new Map();
const rankHandler = async (similarities: Map<string, number> = new Map()) => { const rankHandler = async (similarities: Map<string, number> = new Map()) => {
const modelStats = calculateModelStats(feedbacks, similarities); const modelStats = calculateModelStats(feedbacks, similarities, modelRatingHistory);
rankedModels = $models rankedModels = $models
.filter((m) => m?.owned_by !== 'arena' && (m?.info?.meta?.hidden ?? false) !== true) .filter((m) => m?.owned_by !== 'arena' && (m?.info?.meta?.hidden ?? false) !== true)
@ -122,7 +124,8 @@
function calculateModelStats( function calculateModelStats(
feedbacks: Feedback[], feedbacks: Feedback[],
similarities: Map<string, number> similarities: Map<string, number>,
historyMap: Map<string, Array<{ timestamp: number; rating: number }>>
): Map<string, ModelStats> { ): Map<string, ModelStats> {
const stats = new Map<string, ModelStats>(); const stats = new Map<string, ModelStats>();
const K = 32; const K = 32;
@ -131,12 +134,21 @@
return stats.get(modelId) || { rating: 1000, won: 0, lost: 0 }; return stats.get(modelId) || { rating: 1000, won: 0, lost: 0 };
} }
function updateStats(modelId: string, ratingChange: number, outcome: number) { function updateStats(
modelId: string,
ratingChange: number,
outcome: number,
timestamp: number
) {
const currentStats = getOrDefaultStats(modelId); const currentStats = getOrDefaultStats(modelId);
currentStats.rating += ratingChange; currentStats.rating += ratingChange;
if (outcome === 1) currentStats.won++; if (outcome === 1) currentStats.won++;
else if (outcome === 0) currentStats.lost++; else if (outcome === 0) currentStats.lost++;
stats.set(modelId, currentStats); stats.set(modelId, currentStats);
if (historyMap) {
if (!historyMap.has(modelId)) historyMap.set(modelId, []);
historyMap.get(modelId).push({ timestamp, rating: Math.round(currentStats.rating) });
}
} }
function calculateEloChange( function calculateEloChange(
@ -174,8 +186,8 @@
const changeA = calculateEloChange(statsA.rating, statsB.rating, outcome, similarity); const changeA = calculateEloChange(statsA.rating, statsB.rating, outcome, similarity);
const changeB = calculateEloChange(statsB.rating, statsA.rating, 1 - outcome, similarity); const changeB = calculateEloChange(statsB.rating, statsA.rating, 1 - outcome, similarity);
updateStats(modelA, changeA, outcome); updateStats(modelA, changeA, outcome, feedback.updated_at);
updateStats(modelB, changeB, 1 - outcome); updateStats(modelB, changeB, 1 - outcome, feedback.updated_at);
}); });
}); });
@ -326,10 +338,11 @@
}); });
</script> </script>
<ModelModal `<ModelModal
bind:show={showLeaderboardModal} bind:show={showLeaderboardModal}
model={selectedModel} model={selectedModel}
{feedbacks} {feedbacks}
{modelRatingHistory}
onClose={closeLeaderboardModal} onClose={closeLeaderboardModal}
/> />

View file

@ -1,8 +1,9 @@
<script lang="ts"> <script lang="ts">
import Modal from '$lib/components/common/Modal.svelte'; import Modal from '$lib/components/common/Modal.svelte';
import { getContext } from 'svelte'; import { getContext, onMount, afterUpdate } from 'svelte';
export let show = false; export let show = false;
export let model = null; export let model = null;
export let modelRatingHistory = new Map();
export let feedbacks = []; export let feedbacks = [];
export let onClose: () => void = () => {}; export let onClose: () => void = () => {};
const i18n = getContext('i18n'); const i18n = getContext('i18n');
@ -28,6 +29,50 @@
.slice(0, topN) .slice(0, topN)
.map(([tag, count]) => ({ tag, count })); .map(([tag, count]) => ({ tag, count }));
}; };
let chartCanvas;
let chart;
$: chartData =
model && modelRatingHistory && modelRatingHistory.has(model.id)
? modelRatingHistory.get(model.id)
: [];
async function renderChart() {
if (!chartCanvas || !chartData || chartData.length < 2) return;
const { Chart, registerables } = await import('chart.js');
Chart.register(...registerables);
if (chart) chart.destroy();
chart = new Chart(chartCanvas, {
type: 'line',
data: {
labels: chartData.map((d) => new Date(d.timestamp * 1000).toLocaleDateString()),
datasets: [
{
label: 'Rating',
data: chartData.map((d) => d.rating),
borderColor: 'rgba(75,192,192,1)',
backgroundColor: 'rgba(75,192,192,0.1)',
tension: 0.2,
pointRadius: 2,
fill: false
}
]
},
options: {
scales: {
y: { beginAtZero: false, title: { display: true, text: 'Elo Rating' } },
x: { title: { display: true, text: 'Date' } }
},
plugins: { legend: { display: false } },
responsive: true,
maintainAspectRatio: false
}
});
}
onMount(renderChart);
afterUpdate(renderChart);
</script> </script>
<Modal size="sm" bind:show> <Modal size="sm" bind:show>
@ -63,13 +108,26 @@
<span>-</span> <span>-</span>
{/if} {/if}
</div> </div>
<div class="my-4" style="height:150px;">
{#if chartData.length > 1}
<canvas bind:this={chartCanvas}></canvas>
{:else}
<div class="text-xs text-gray-400 text-center py-10">
{i18n && i18n.t
? i18n.t('Not enough data for rating history')
: 'Not enough data for rating history'}
</div>
{/if}
</div>
<div class="flex justify-end pt-2"> <div class="flex justify-end pt-2">
<button <button
class="px-3.5 py-1.5 text-sm font-medium bg-black hover:bg-gray-900 text-white dark:bg-white dark:text-black dark:hover:bg-gray-100 transition rounded-full" class="px-3.5 py-1.5 text-sm font-medium bg-black hover:bg-gray-900 text-white dark:bg-white dark:text-black dark:hover:bg-gray-100 transition rounded-full"
type="button" type="button"
on:click={close} on:click={close}
> >
{$i18n.t('Close')} {i18n && i18n.t ? i18n.t('Close') : 'Close'}
</button> </button>
</div> </div>
</div> </div>