feat: Route OpenRouter requests via Nebius with FP8 quantization

- Added OPENROUTER_PROVIDER_PREFERENCES config - Prioritizes Nebius provider for better availability/speed - Uses FP8 quantization for quality/speed balance - Falls back to other providers if Nebius unavailable
2025-12-26 00:12:42 +01:00
parent 8cf51d4aea
commit ce49c9e347
3 changed files with 16 additions and 3 deletions
--- a/src/lib/openrouter.ts
+++ b/src/lib/openrouter.ts
@@ -4,7 +4,7 @@ import OpenAI from 'openai';
 * AI Provider configuration
 * 
 * Set AI_PROVIDER in .env.local to switch:
- * - "openrouter" (default) - Uses OpenRouter with Gemma 3 27B
+ * - "openrouter" (default) - Uses OpenRouter with Gemma 3 27B via Nebius/FP8
 * - "gemini" - Uses Google Gemini 2.5 Flash
 */
 export type AIProvider = 'openrouter' | 'gemini';
@@ -38,3 +38,14 @@ export function getOpenRouterClient(): OpenAI {

 // Default OpenRouter model for vision tasks
 export const OPENROUTER_VISION_MODEL = 'google/gemma-3-27b-it:free';
+
+/**
+ * OpenRouter provider preferences
+ * - Prioritize Nebius provider for better availability
+ * - Request FP8 quantization for quality/speed balance
+ */
+export const OPENROUTER_PROVIDER_PREFERENCES = {
+    order: ['Nebius'],           // Prioritize Nebius
+    quantizations: ['fp8'],      // Use FP8 for quality
+    allow_fallbacks: true,       // Allow fallback to other providers
+};