feat: Route OpenRouter requests via Nebius with FP8 quantization
- Added OPENROUTER_PROVIDER_PREFERENCES config - Prioritizes Nebius provider for better availability/speed - Uses FP8 quantization for quality/speed balance - Falls back to other providers if Nebius unavailable
This commit is contained in:
@@ -5,7 +5,7 @@ import { BottleMetadataSchema, BottleMetadata } from '@/types/whisky';
|
|||||||
import { createClient } from '@/lib/supabase/server';
|
import { createClient } from '@/lib/supabase/server';
|
||||||
import { trackApiUsage } from '@/services/track-api-usage';
|
import { trackApiUsage } from '@/services/track-api-usage';
|
||||||
import { checkCreditBalance, deductCredits } from '@/services/credit-service';
|
import { checkCreditBalance, deductCredits } from '@/services/credit-service';
|
||||||
import { getAIProvider, getOpenRouterClient, OPENROUTER_VISION_MODEL } from '@/lib/openrouter';
|
import { getAIProvider, getOpenRouterClient, OPENROUTER_VISION_MODEL, OPENROUTER_PROVIDER_PREFERENCES } from '@/lib/openrouter';
|
||||||
|
|
||||||
// Schema for Gemini Vision extraction
|
// Schema for Gemini Vision extraction
|
||||||
const visionSchema = {
|
const visionSchema = {
|
||||||
@@ -111,6 +111,8 @@ async function analyzeWithOpenRouter(base64Data: string, mimeType: string): Prom
|
|||||||
],
|
],
|
||||||
temperature: 0.1,
|
temperature: 0.1,
|
||||||
max_tokens: 1024,
|
max_tokens: 1024,
|
||||||
|
// @ts-ignore - OpenRouter-specific field
|
||||||
|
provider: OPENROUTER_PROVIDER_PREFERENCES,
|
||||||
});
|
});
|
||||||
|
|
||||||
const endApi = performance.now();
|
const endApi = performance.now();
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import OpenAI from 'openai';
|
|||||||
* AI Provider configuration
|
* AI Provider configuration
|
||||||
*
|
*
|
||||||
* Set AI_PROVIDER in .env.local to switch:
|
* Set AI_PROVIDER in .env.local to switch:
|
||||||
* - "openrouter" (default) - Uses OpenRouter with Gemma 3 27B
|
* - "openrouter" (default) - Uses OpenRouter with Gemma 3 27B via Nebius/FP8
|
||||||
* - "gemini" - Uses Google Gemini 2.5 Flash
|
* - "gemini" - Uses Google Gemini 2.5 Flash
|
||||||
*/
|
*/
|
||||||
export type AIProvider = 'openrouter' | 'gemini';
|
export type AIProvider = 'openrouter' | 'gemini';
|
||||||
@@ -38,3 +38,14 @@ export function getOpenRouterClient(): OpenAI {
|
|||||||
|
|
||||||
// Default OpenRouter model for vision tasks
|
// Default OpenRouter model for vision tasks
|
||||||
export const OPENROUTER_VISION_MODEL = 'google/gemma-3-27b-it:free';
|
export const OPENROUTER_VISION_MODEL = 'google/gemma-3-27b-it:free';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* OpenRouter provider preferences
|
||||||
|
* - Prioritize Nebius provider for better availability
|
||||||
|
* - Request FP8 quantization for quality/speed balance
|
||||||
|
*/
|
||||||
|
export const OPENROUTER_PROVIDER_PREFERENCES = {
|
||||||
|
order: ['Nebius'], // Prioritize Nebius
|
||||||
|
quantizations: ['fp8'], // Use FP8 for quality
|
||||||
|
allow_fallbacks: true, // Allow fallback to other providers
|
||||||
|
};
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user