<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
  <title>Benchlist - verified runs</title>
  <link href="https://benchlist.ai/feed.xml" rel="self"/>
  <link href="https://benchlist.ai/leaderboard"/>
  <updated>2026-04-22T21:19:01Z</updated>
  <id>https://benchlist.ai/feed.xml</id>
  <generator>Benchlist</generator>
  <entry>
    <title>Voyage AI - BEIR - 68.1</title>
    <link href="https://benchlist.ai/verify/run-voyage-beir-001"/>
    <id>https://benchlist.ai/verify/run-voyage-beir-001</id>
    <updated>2026-04-23T15:15:00Z</updated>
    <summary>Verified run: Voyage AI scored 68.1 on BEIR (model: voyage-3-large). Aligned batch: 0xf02d151fea3bc192a3b4c5d6e7f8009a2f3a4b506182a3d4e5f60819203c4d5.</summary>
  </entry>
  <entry>
    <title>Pinecone - BEIR - 64.3</title>
    <link href="https://benchlist.ai/verify/run-pinecone-ann-002"/>
    <id>https://benchlist.ai/verify/run-pinecone-ann-002</id>
    <updated>2026-04-23T15:00:00Z</updated>
    <summary>Verified run: Pinecone scored 64.3 on BEIR (model: serverless-s1). Aligned batch: 0xef2c141ed93ab192a3b4c5d6e7f8009a2e3f4a506172a3d4e5f608192a3c4d5.</summary>
  </entry>
  <entry>
    <title>OpenAI Codex - τ-Bench - 58.2</title>
    <link href="https://benchlist.ai/verify/run-codex-tau-001"/>
    <id>https://benchlist.ai/verify/run-codex-tau-001</id>
    <updated>2026-04-23T14:45:00Z</updated>
    <summary>Verified run: OpenAI Codex scored 58.2 on τ-Bench (model: codex-pro). Aligned batch: 0xde2b131dc8339192a3b4c5d6e7f80091d2e3f4a50617283c4d5e6f708192a3c4.</summary>
  </entry>
  <entry>
    <title>Devin - τ-Bench - 61.0</title>
    <link href="https://benchlist.ai/verify/run-devin-tau-001"/>
    <id>https://benchlist.ai/verify/run-devin-tau-001</id>
    <updated>2026-04-23T14:30:00Z</updated>
    <summary>Verified run: Devin scored 61.0 on τ-Bench (model: devin-v1). Aligned batch: 0xcd2a121cb7328192a3b4c5d6e7f80091c2d3e4f50617182b3c4d5e6f7081a2c3.</summary>
  </entry>
  <entry>
    <title>REM Labs - GPQA Diamond - 69.4</title>
    <link href="https://benchlist.ai/verify/run-rem-gpqa-001"/>
    <id>https://benchlist.ai/verify/run-rem-gpqa-001</id>
    <updated>2026-04-23T14:15:00Z</updated>
    <summary>Verified run: REM Labs scored 69.4 on GPQA Diamond (model: rem-v1 @ claude-opus-4-7). Aligned batch: 0xbc29111bb6317192a3b4c5d6e7f80091b2c3d4e5f607182a3b4c5d6e7f80091b.</summary>
  </entry>
  <entry>
    <title>REM Labs - MMLU-Pro - 82.3</title>
    <link href="https://benchlist.ai/verify/run-rem-mmlupro-001"/>
    <id>https://benchlist.ai/verify/run-rem-mmlupro-001</id>
    <updated>2026-04-23T14:00:00Z</updated>
    <summary>Verified run: REM Labs scored 82.3 on MMLU-Pro (model: rem-v1 @ claude-opus-4-7). Aligned batch: 0xab28101ab5307081a2b3c4d5e6f708192a3b4c5d6e7f80812b3c4d5e6f7081.</summary>
  </entry>
  <entry>
    <title>MCP Filesystem - MCP Compatibility - 98</title>
    <link href="https://benchlist.ai/verify/run-mcp-filesystem-compat-001"/>
    <id>https://benchlist.ai/verify/run-mcp-filesystem-compat-001</id>
    <updated>2026-04-23T13:45:00Z</updated>
    <summary>Verified run: MCP Filesystem scored 98 on MCP Compatibility (model: mcp-filesystem-1.0). Aligned batch: 0x9a270f19b42f708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>MCP Notion - MCP Compatibility - 92</title>
    <link href="https://benchlist.ai/verify/run-mcp-notion-compat-001"/>
    <id>https://benchlist.ai/verify/run-mcp-notion-compat-001</id>
    <updated>2026-04-23T13:30:00Z</updated>
    <summary>Verified run: MCP Notion scored 92 on MCP Compatibility (model: mcp-notion-1.0). Aligned batch: 0x89260e18b32e708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>Runloop - Sandbox Cold Start - 890</title>
    <link href="https://benchlist.ai/verify/run-runloop-sandbox-001"/>
    <id>https://benchlist.ai/verify/run-runloop-sandbox-001</id>
    <updated>2026-04-23T13:15:00Z</updated>
    <summary>Verified run: Runloop scored 890 on Sandbox Cold Start (model: runloop-bench-v1). Aligned batch: 0x78250d17b22d708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>Vectara - FRAMES - 72.8</title>
    <link href="https://benchlist.ai/verify/run-vectara-frames-001"/>
    <id>https://benchlist.ai/verify/run-vectara-frames-001</id>
    <updated>2026-04-23T13:00:00Z</updated>
    <summary>Verified run: Vectara scored 72.8 on FRAMES (model: vectara-rag). Aligned batch: 0x67240c16b12c708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>AutoGen - AgentBench - 47.3</title>
    <link href="https://benchlist.ai/verify/run-autogen-agentbench-001"/>
    <id>https://benchlist.ai/verify/run-autogen-agentbench-001</id>
    <updated>2026-04-23T12:45:00Z</updated>
    <summary>Verified run: AutoGen scored 47.3 on AgentBench (model: autogen + gpt-5). Aligned batch: 0x56230b15b02b708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>Mastra - τ-Bench - 58.4</title>
    <link href="https://benchlist.ai/verify/run-mastra-taubench-001"/>
    <id>https://benchlist.ai/verify/run-mastra-taubench-001</id>
    <updated>2026-04-23T12:30:00Z</updated>
    <summary>Verified run: Mastra scored 58.4 on τ-Bench (model: mastra + claude-opus-4-7). Aligned batch: 0x45220a14af2a708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>Serper - FRAMES - 70.1</title>
    <link href="https://benchlist.ai/verify/run-serper-frames-001"/>
    <id>https://benchlist.ai/verify/run-serper-frames-001</id>
    <updated>2026-04-23T12:15:00Z</updated>
    <summary>Verified run: Serper scored 70.1 on FRAMES (model: serper-v1). Aligned batch: 0x34210913ae29708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>Jina Reader - FRAMES - 68.4</title>
    <link href="https://benchlist.ai/verify/run-jina-reader-frames-001"/>
    <id>https://benchlist.ai/verify/run-jina-reader-frames-001</id>
    <updated>2026-04-23T12:00:00Z</updated>
    <summary>Verified run: Jina Reader scored 68.4 on FRAMES (model: jina-reader-v2). Aligned batch: 0x23200812ad28708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>ElevenLabs - Voice Real-Time Factor - 0.19</title>
    <link href="https://benchlist.ai/verify/run-elevenlabs-rtf-001"/>
    <id>https://benchlist.ai/verify/run-elevenlabs-rtf-001</id>
    <updated>2026-04-23T11:45:00Z</updated>
    <summary>Verified run: ElevenLabs scored 0.19 on Voice Real-Time Factor (model: eleven-multilingual-v3). Aligned batch: 0x121f0711ac27708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>Deepgram - Voice Real-Time Factor - 0.14</title>
    <link href="https://benchlist.ai/verify/run-deepgram-rtf-001"/>
    <id>https://benchlist.ai/verify/run-deepgram-rtf-001</id>
    <updated>2026-04-23T11:30:00Z</updated>
    <summary>Verified run: Deepgram scored 0.14 on Voice Real-Time Factor (model: nova-3). Aligned batch: 0x011e0610ab26708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>Baseten - Inference Throughput - 220</title>
    <link href="https://benchlist.ai/verify/run-baseten-tps-001"/>
    <id>https://benchlist.ai/verify/run-baseten-tps-001</id>
    <updated>2026-04-23T11:15:00Z</updated>
    <summary>Verified run: Baseten scored 220 on Inference Throughput (model: llama-3.3-70b). Aligned batch: 0xf11d050f0a25708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>SambaNova Cloud - Inference Throughput - 480</title>
    <link href="https://benchlist.ai/verify/run-sambanova-tps-001"/>
    <id>https://benchlist.ai/verify/run-sambanova-tps-001</id>
    <updated>2026-04-23T11:00:00Z</updated>
    <summary>Verified run: SambaNova Cloud scored 480 on Inference Throughput (model: llama-3.3-70b). Aligned batch: 0xe11c040e0a24708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>Milvus - ANN-Benchmarks - 96.1</title>
    <link href="https://benchlist.ai/verify/run-milvus-ann-001"/>
    <id>https://benchlist.ai/verify/run-milvus-ann-001</id>
    <updated>2026-04-23T10:45:00Z</updated>
    <summary>Verified run: Milvus scored 96.1 on ANN-Benchmarks (model: milvus-2.5-gpu). Aligned batch: 0xd11b030d0a23708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>OpenAI Codex - SWE-bench Lite - 56.3</title>
    <link href="https://benchlist.ai/verify/run-codex-swebench-001"/>
    <id>https://benchlist.ai/verify/run-codex-swebench-001</id>
    <updated>2026-04-23T10:30:00Z</updated>
    <summary>Verified run: OpenAI Codex scored 56.3 on SWE-bench Lite (model: codex-pro). Aligned batch: 0xc11a020c0a22708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>Factory Droid - SWE-bench Lite - 52.7</title>
    <link href="https://benchlist.ai/verify/run-factory-swebench-001"/>
    <id>https://benchlist.ai/verify/run-factory-swebench-001</id>
    <updated>2026-04-23T10:15:00Z</updated>
    <summary>Verified run: Factory Droid scored 52.7 on SWE-bench Lite (model: droid-v2). Aligned batch: 0xb119010b0a21708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>Cohere Command - MBPP - 79.3</title>
    <link href="https://benchlist.ai/verify/run-cohere-command-mbpp-001"/>
    <id>https://benchlist.ai/verify/run-cohere-command-mbpp-001</id>
    <updated>2026-04-23T10:00:00Z</updated>
    <summary>Verified run: Cohere Command scored 79.3 on MBPP (model: command-r-plus). Aligned batch: 0xa118000a0a20708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>Mistral AI - LiveCodeBench - 51.9</title>
    <link href="https://benchlist.ai/verify/run-mistral-lcb-001"/>
    <id>https://benchlist.ai/verify/run-mistral-lcb-001</id>
    <updated>2026-04-23T09:45:00Z</updated>
    <summary>Verified run: Mistral AI scored 51.9 on LiveCodeBench (model: mistral-large-2). Aligned batch: 0x911700090a1f708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>Qwen - LiveCodeBench - 55.4</title>
    <link href="https://benchlist.ai/verify/run-qwen-lcb-001"/>
    <id>https://benchlist.ai/verify/run-qwen-lcb-001</id>
    <updated>2026-04-23T09:30:00Z</updated>
    <summary>Verified run: Qwen scored 55.4 on LiveCodeBench (model: qwen-2-5-72b). Aligned batch: 0x811600080a1e708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>Meta Llama - LiveCodeBench - 58.9</title>
    <link href="https://benchlist.ai/verify/run-llama-lcb-001"/>
    <id>https://benchlist.ai/verify/run-llama-lcb-001</id>
    <updated>2026-04-23T09:15:00Z</updated>
    <summary>Verified run: Meta Llama scored 58.9 on LiveCodeBench (model: llama-4-405b). Aligned batch: 0x711500070a1d708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>Moonshot Kimi - LiveCodeBench - 54.2</title>
    <link href="https://benchlist.ai/verify/run-kimi-lcb-001"/>
    <id>https://benchlist.ai/verify/run-kimi-lcb-001</id>
    <updated>2026-04-23T09:00:00Z</updated>
    <summary>Verified run: Moonshot Kimi scored 54.2 on LiveCodeBench (model: kimi-k2). Aligned batch: 0x611400060a1c708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>DeepSeek - LiveCodeBench - 57.8</title>
    <link href="https://benchlist.ai/verify/run-deepseek-lcb-001"/>
    <id>https://benchlist.ai/verify/run-deepseek-lcb-001</id>
    <updated>2026-04-23T08:45:00Z</updated>
    <summary>Verified run: DeepSeek scored 57.8 on LiveCodeBench (model: deepseek-v3). Aligned batch: 0x511300050a1b708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>xAI Grok - LiveCodeBench - 62.1</title>
    <link href="https://benchlist.ai/verify/run-xai-lcb-001"/>
    <id>https://benchlist.ai/verify/run-xai-lcb-001</id>
    <updated>2026-04-23T08:30:00Z</updated>
    <summary>Verified run: xAI Grok scored 62.1 on LiveCodeBench (model: grok-4). Aligned batch: 0x411200040a1a708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>xAI Grok - MBPP - 86.7</title>
    <link href="https://benchlist.ai/verify/run-xai-mbpp-001"/>
    <id>https://benchlist.ai/verify/run-xai-mbpp-001</id>
    <updated>2026-04-23T08:15:00Z</updated>
    <summary>Verified run: xAI Grok scored 86.7 on MBPP (model: grok-4). Aligned batch: 0x311100030a19708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>Google Gemini - MBPP - 89.6</title>
    <link href="https://benchlist.ai/verify/run-gemini-mbpp-001"/>
    <id>https://benchlist.ai/verify/run-gemini-mbpp-001</id>
    <updated>2026-04-23T08:00:00Z</updated>
    <summary>Verified run: Google Gemini scored 89.6 on MBPP (model: gemini-2-5-pro). Aligned batch: 0x210100020a18708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091.</summary>
  </entry>
  <entry>
    <title>Helicone - MCP Compatibility - 82</title>
    <link href="https://benchlist.ai/verify/run-helicone-eval-001"/>
    <id>https://benchlist.ai/verify/run-helicone-eval-001</id>
    <updated>2026-04-22T19:00:00Z</updated>
    <summary>Verified run: Helicone scored 82 on MCP Compatibility (model: helicone-proxy). Aligned batch: -.</summary>
  </entry>
  <entry>
    <title>Langfuse - MCP Compatibility - 88</title>
    <link href="https://benchlist.ai/verify/run-langfuse-eval-001"/>
    <id>https://benchlist.ai/verify/run-langfuse-eval-001</id>
    <updated>2026-04-22T18:00:00Z</updated>
    <summary>Verified run: Langfuse scored 88 on MCP Compatibility (model: langfuse-tracer). Aligned batch: -.</summary>
  </entry>
  <entry>
    <title>REM Labs - LongMemEval - 92.4</title>
    <link href="https://benchlist.ai/verify/run-rem-lme-002"/>
    <id>https://benchlist.ai/verify/run-rem-lme-002</id>
    <updated>2026-04-22T05:00:00Z</updated>
    <summary>Verified run: REM Labs scored 92.4 on LongMemEval (model: rem-v1 @ claude-sonnet-4-6). Aligned batch: 0x11b00f0f1a708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091a2b.</summary>
  </entry>
  <entry>
    <title>MCP GitHub - MCP Compatibility - 100</title>
    <link href="https://benchlist.ai/verify/run-mcp-github-compat-001"/>
    <id>https://benchlist.ai/verify/run-mcp-github-compat-001</id>
    <updated>2026-04-22T04:45:00Z</updated>
    <summary>Verified run: MCP GitHub scored 100 on MCP Compatibility (model: mcp-github-1.0). Aligned batch: 0x109f0e0e19708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091a2b.</summary>
  </entry>
  <entry>
    <title>REM MCP - MCP Compatibility - 100</title>
    <link href="https://benchlist.ai/verify/run-rem-mcp-compat-001"/>
    <id>https://benchlist.ai/verify/run-rem-mcp-compat-001</id>
    <updated>2026-04-22T04:30:00Z</updated>
    <summary>Verified run: REM MCP scored 100 on MCP Compatibility (model: rem-mcp-0.4). Aligned batch: 0x0f8e0d0d18708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091a2b.</summary>
  </entry>
  <entry>
    <title>LlamaParse - DocBench - 91.8</title>
    <link href="https://benchlist.ai/verify/run-llamaparse-doc-001"/>
    <id>https://benchlist.ai/verify/run-llamaparse-doc-001</id>
    <updated>2026-04-22T04:00:00Z</updated>
    <summary>Verified run: LlamaParse scored 91.8 on DocBench (model: llamaparse-premium). Aligned batch: 0x0e7d0c0c17708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091a2b.</summary>
  </entry>
  <entry>
    <title>Reducto - DocBench - 94.2</title>
    <link href="https://benchlist.ai/verify/run-reducto-doc-001"/>
    <id>https://benchlist.ai/verify/run-reducto-doc-001</id>
    <updated>2026-04-22T03:30:00Z</updated>
    <summary>Verified run: Reducto scored 94.2 on DocBench (model: reducto-1.2). Aligned batch: 0x0d6c0b0b16708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091a2b.</summary>
  </entry>
  <entry>
    <title>Lakera Guard - Prompt Injection Recall - 98.4</title>
    <link href="https://benchlist.ai/verify/run-lakera-injection-001"/>
    <id>https://benchlist.ai/verify/run-lakera-injection-001</id>
    <updated>2026-04-22T03:00:00Z</updated>
    <summary>Verified run: Lakera Guard scored 98.4 on Prompt Injection Recall (model: lakera-guard-v4). Aligned batch: 0x0c5b0a0a15708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091a2b.</summary>
  </entry>
  <entry>
    <title>Guardrails AI - Prompt Injection Recall - 97.1</title>
    <link href="https://benchlist.ai/verify/run-guardrails-injection-001"/>
    <id>https://benchlist.ai/verify/run-guardrails-injection-001</id>
    <updated>2026-04-22T02:30:00Z</updated>
    <summary>Verified run: Guardrails AI scored 97.1 on Prompt Injection Recall (model: guardrails-full-suite). Aligned batch: 0x0b4a090914708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091a2b.</summary>
  </entry>
  <entry>
    <title>E2B - Sandbox Cold Start - 2300</title>
    <link href="https://benchlist.ai/verify/run-e2b-sandbox-001"/>
    <id>https://benchlist.ai/verify/run-e2b-sandbox-001</id>
    <updated>2026-04-22T02:00:00Z</updated>
    <summary>Verified run: E2B scored 2300 on Sandbox Cold Start (model: firecracker-prod). Aligned batch: 0x0a39080813708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091a2b.</summary>
  </entry>
  <entry>
    <title>Daytona - Sandbox Cold Start - 1250</title>
    <link href="https://benchlist.ai/verify/run-daytona-sandbox-001"/>
    <id>https://benchlist.ai/verify/run-daytona-sandbox-001</id>
    <updated>2026-04-22T01:30:00Z</updated>
    <summary>Verified run: Daytona scored 1250 on Sandbox Cold Start (model: daytona-1.0). Aligned batch: 0x0928070712708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091a2b.</summary>
  </entry>
  <entry>
    <title>Cartesia - Time-to-First-Token - 92</title>
    <link href="https://benchlist.ai/verify/run-cartesia-ttft-001"/>
    <id>https://benchlist.ai/verify/run-cartesia-ttft-001</id>
    <updated>2026-04-22T01:00:00Z</updated>
    <summary>Verified run: Cartesia scored 92 on Time-to-First-Token (model: sonic). Aligned batch: 0x0817060611708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091a2b.</summary>
  </entry>
  <entry>
    <title>AssemblyAI - LibriSpeech WER - 4.4</title>
    <link href="https://benchlist.ai/verify/run-assemblyai-wer-001"/>
    <id>https://benchlist.ai/verify/run-assemblyai-wer-001</id>
    <updated>2026-04-22T00:30:00Z</updated>
    <summary>Verified run: AssemblyAI scored 4.4 on LibriSpeech WER (model: universal-v2). Aligned batch: 0x0706050510708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091a2b.</summary>
  </entry>
  <entry>
    <title>Replicate - Inference Throughput - 140</title>
    <link href="https://benchlist.ai/verify/run-replicate-tps-001"/>
    <id>https://benchlist.ai/verify/run-replicate-tps-001</id>
    <updated>2026-04-22T00:00:00Z</updated>
    <summary>Verified run: Replicate scored 140 on Inference Throughput (model: llama-3.3-70b). Aligned batch: 0x06f5040409708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091a2b.</summary>
  </entry>
  <entry>
    <title>Cerebras Inference - Inference Throughput - 2180</title>
    <link href="https://benchlist.ai/verify/run-cerebras-tps-001"/>
    <id>https://benchlist.ai/verify/run-cerebras-tps-001</id>
    <updated>2026-04-21T23:30:00Z</updated>
    <summary>Verified run: Cerebras Inference scored 2180 on Inference Throughput (model: llama-3.3-70b). Aligned batch: 0x05e4030308708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091a2b.</summary>
  </entry>
  <entry>
    <title>Jina Embeddings - MTEB - 66.1</title>
    <link href="https://benchlist.ai/verify/run-jina-mteb-001"/>
    <id>https://benchlist.ai/verify/run-jina-mteb-001</id>
    <updated>2026-04-21T23:00:00Z</updated>
    <summary>Verified run: Jina Embeddings scored 66.1 on MTEB (model: jina-embeddings-v3). Aligned batch: 0x04d3020207708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091a2b.</summary>
  </entry>
  <entry>
    <title>OpenAI Embeddings - MTEB - 64.6</title>
    <link href="https://benchlist.ai/verify/run-openai-embed-mteb-001"/>
    <id>https://benchlist.ai/verify/run-openai-embed-mteb-001</id>
    <updated>2026-04-21T22:40:00Z</updated>
    <summary>Verified run: OpenAI Embeddings scored 64.6 on MTEB (model: text-embedding-3-large). Aligned batch: 0x03c2010106708192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091a2b.</summary>
  </entry>
  <entry>
    <title>Turbopuffer - ANN-Benchmarks - 92.8</title>
    <link href="https://benchlist.ai/verify/run-turbopuffer-ann-001"/>
    <id>https://benchlist.ai/verify/run-turbopuffer-ann-001</id>
    <updated>2026-04-21T22:00:00Z</updated>
    <summary>Verified run: Turbopuffer scored 92.8 on ANN-Benchmarks (model: turbopuffer-v1). Aligned batch: 0x02b100000508192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091a2b.</summary>
  </entry>
  <entry>
    <title>LanceDB - ANN-Benchmarks - 93.7</title>
    <link href="https://benchlist.ai/verify/run-lancedb-ann-001"/>
    <id>https://benchlist.ai/verify/run-lancedb-ann-001</id>
    <updated>2026-04-21T21:30:00Z</updated>
    <summary>Verified run: LanceDB scored 93.7 on ANN-Benchmarks (model: lancedb-0.18). Aligned batch: 0x01a0ffff408192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091a2b.</summary>
  </entry>
  <entry>
    <title>Chroma - ANN-Benchmarks - 91.2</title>
    <link href="https://benchlist.ai/verify/run-chroma-ann-001"/>
    <id>https://benchlist.ai/verify/run-chroma-ann-001</id>
    <updated>2026-04-21T21:00:00Z</updated>
    <summary>Verified run: Chroma scored 91.2 on ANN-Benchmarks (model: chroma-0.5). Aligned batch: 0xf89feefe307192a3b4c5d6e7f8091a2b3c4d5e6f708192a3b4c5d6e7f8091a2b.</summary>
  </entry>
</feed>
