feat: Route OpenRouter requests via Nebius with FP8 quantization
- Added OPENROUTER_PROVIDER_PREFERENCES config - Prioritizes Nebius provider for better availability/speed - Uses FP8 quantization for quality/speed balance - Falls back to other providers if Nebius unavailable
This commit is contained in:
@@ -4,7 +4,7 @@ import OpenAI from 'openai';
|
||||
* AI Provider configuration
|
||||
*
|
||||
* Set AI_PROVIDER in .env.local to switch:
|
||||
* - "openrouter" (default) - Uses OpenRouter with Gemma 3 27B
|
||||
* - "openrouter" (default) - Uses OpenRouter with Gemma 3 27B via Nebius/FP8
|
||||
* - "gemini" - Uses Google Gemini 2.5 Flash
|
||||
*/
|
||||
export type AIProvider = 'openrouter' | 'gemini';
|
||||
@@ -38,3 +38,14 @@ export function getOpenRouterClient(): OpenAI {
|
||||
|
||||
// Default OpenRouter model for vision tasks
|
||||
export const OPENROUTER_VISION_MODEL = 'google/gemma-3-27b-it:free';
|
||||
|
||||
/**
|
||||
* OpenRouter provider preferences
|
||||
* - Prioritize Nebius provider for better availability
|
||||
* - Request FP8 quantization for quality/speed balance
|
||||
*/
|
||||
export const OPENROUTER_PROVIDER_PREFERENCES = {
|
||||
order: ['Nebius'], // Prioritize Nebius
|
||||
quantizations: ['fp8'], // Use FP8 for quality
|
||||
allow_fallbacks: true, // Allow fallback to other providers
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user