feat: improved local OCR with Strip & Match distillery detection
- Added comprehensive distillery database (200+ entries) - Implemented Strip & Match heuristic for fuzzy matching - Added contextual age detection from distillery lines - Added whitespace normalization for OCR text - Disabled local name extraction (too noisy, let Gemini handle it) - Fixed confidence scale normalization in TastingEditor (0-1 vs 0-100) - Improved extractName filter (60% letters required) - Relaxed Fuse.js thresholds for partial matches
This commit is contained in:
177
src/app/actions/gemini-vision.ts
Normal file
177
src/app/actions/gemini-vision.ts
Normal file
@@ -0,0 +1,177 @@
|
||||
'use server';
|
||||
|
||||
import { GoogleGenerativeAI, SchemaType, HarmCategory, HarmBlockThreshold } from '@google/generative-ai';
|
||||
import { BottleMetadataSchema, BottleMetadata } from '@/types/whisky';
|
||||
import { createClient } from '@/lib/supabase/server';
|
||||
import { trackApiUsage } from '@/services/track-api-usage';
|
||||
import { checkCreditBalance, deductCredits } from '@/services/credit-service';
|
||||
|
||||
// Schema for Gemini Vision extraction
|
||||
const visionSchema = {
|
||||
description: "Whisky bottle label metadata extracted from image",
|
||||
type: SchemaType.OBJECT as const,
|
||||
properties: {
|
||||
name: { type: SchemaType.STRING, description: "Full whisky name", nullable: false },
|
||||
distillery: { type: SchemaType.STRING, description: "Distillery name", nullable: true },
|
||||
bottler: { type: SchemaType.STRING, description: "Independent bottler if applicable", nullable: true },
|
||||
category: { type: SchemaType.STRING, description: "Whisky category (Single Malt, Blended, Bourbon, etc.)", nullable: true },
|
||||
abv: { type: SchemaType.NUMBER, description: "Alcohol by volume percentage", nullable: true },
|
||||
age: { type: SchemaType.NUMBER, description: "Age statement in years", nullable: true },
|
||||
vintage: { type: SchemaType.STRING, description: "Vintage/distillation year", nullable: true },
|
||||
cask_type: { type: SchemaType.STRING, description: "Cask type (Sherry, Bourbon, Port, etc.)", nullable: true },
|
||||
distilled_at: { type: SchemaType.STRING, description: "Distillation date", nullable: true },
|
||||
bottled_at: { type: SchemaType.STRING, description: "Bottling date", nullable: true },
|
||||
batch_info: { type: SchemaType.STRING, description: "Batch or cask number", nullable: true },
|
||||
is_whisky: { type: SchemaType.BOOLEAN, description: "Whether this is a whisky product", nullable: false },
|
||||
confidence: { type: SchemaType.NUMBER, description: "Confidence score 0-1", nullable: false },
|
||||
},
|
||||
required: ["name", "is_whisky", "confidence"],
|
||||
};
|
||||
|
||||
export interface GeminiVisionResult {
|
||||
success: boolean;
|
||||
data?: BottleMetadata;
|
||||
error?: string;
|
||||
perf?: {
|
||||
apiCall: number;
|
||||
total: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze a whisky bottle label image using Gemini Vision
|
||||
*
|
||||
* @param imageBase64 - Base64 encoded image (with data URL prefix)
|
||||
* @returns GeminiVisionResult with extracted metadata
|
||||
*/
|
||||
export async function analyzeLabelWithGemini(imageBase64: string): Promise<GeminiVisionResult> {
|
||||
const startTotal = performance.now();
|
||||
|
||||
if (!process.env.GEMINI_API_KEY) {
|
||||
return { success: false, error: 'GEMINI_API_KEY is not configured.' };
|
||||
}
|
||||
|
||||
if (!imageBase64 || imageBase64.length < 100) {
|
||||
return { success: false, error: 'Invalid image data provided.' };
|
||||
}
|
||||
|
||||
try {
|
||||
// Auth check
|
||||
const supabase = await createClient();
|
||||
const { data: { user } } = await supabase.auth.getUser();
|
||||
|
||||
if (!user) {
|
||||
return { success: false, error: 'Not authorized.' };
|
||||
}
|
||||
|
||||
// Credit check
|
||||
const creditCheck = await checkCreditBalance(user.id, 'gemini_ai');
|
||||
if (!creditCheck.allowed) {
|
||||
return {
|
||||
success: false,
|
||||
error: `Insufficient credits. Required: ${creditCheck.cost}, Available: ${creditCheck.balance}.`
|
||||
};
|
||||
}
|
||||
|
||||
// Extract base64 data (remove data URL prefix if present)
|
||||
let base64Data = imageBase64;
|
||||
let mimeType = 'image/webp';
|
||||
|
||||
if (imageBase64.startsWith('data:')) {
|
||||
const matches = imageBase64.match(/^data:([^;]+);base64,(.+)$/);
|
||||
if (matches) {
|
||||
mimeType = matches[1];
|
||||
base64Data = matches[2];
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize Gemini
|
||||
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY);
|
||||
const model = genAI.getGenerativeModel({
|
||||
model: 'gemini-2.5-flash',
|
||||
generationConfig: {
|
||||
responseMimeType: "application/json",
|
||||
responseSchema: visionSchema as any,
|
||||
temperature: 0.1,
|
||||
},
|
||||
safetySettings: [
|
||||
{ category: HarmCategory.HARM_CATEGORY_HARASSMENT, threshold: HarmBlockThreshold.BLOCK_NONE },
|
||||
{ category: HarmCategory.HARM_CATEGORY_HATE_SPEECH, threshold: HarmBlockThreshold.BLOCK_NONE },
|
||||
{ category: HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, threshold: HarmBlockThreshold.BLOCK_NONE },
|
||||
{ category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, threshold: HarmBlockThreshold.BLOCK_NONE },
|
||||
] as any,
|
||||
});
|
||||
|
||||
// Vision prompt
|
||||
const prompt = `Analyze this whisky bottle label image and extract all visible metadata.
|
||||
Look carefully for:
|
||||
- Brand/Distillery name
|
||||
- Bottle name or expression
|
||||
- Age statement (e.g., "12 Years Old")
|
||||
- ABV/Alcohol percentage
|
||||
- Vintage year (if shown)
|
||||
- Cask type (e.g., Sherry, Bourbon cask)
|
||||
- Bottler name (if independent bottling)
|
||||
- Category (Single Malt, Blended Malt, Bourbon, etc.)
|
||||
|
||||
Be precise and only include information you can clearly read from the label.
|
||||
If you cannot read something clearly, leave it null.`;
|
||||
|
||||
// API call with timing
|
||||
const startApi = performance.now();
|
||||
const result = await model.generateContent([
|
||||
{ inlineData: { data: base64Data, mimeType } },
|
||||
{ text: prompt },
|
||||
]);
|
||||
const endApi = performance.now();
|
||||
|
||||
// Parse response
|
||||
const jsonData = JSON.parse(result.response.text());
|
||||
|
||||
// Validate with Zod schema
|
||||
const validatedData = BottleMetadataSchema.parse(jsonData);
|
||||
|
||||
// Track usage and deduct credits
|
||||
await trackApiUsage({
|
||||
userId: user.id,
|
||||
apiType: 'gemini_ai',
|
||||
endpoint: 'analyzeLabelWithGemini',
|
||||
success: true
|
||||
});
|
||||
await deductCredits(user.id, 'gemini_ai', 'Vision label analysis');
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: validatedData,
|
||||
perf: {
|
||||
apiCall: endApi - startApi,
|
||||
total: performance.now() - startTotal,
|
||||
}
|
||||
};
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('[GeminiVision] Analysis failed:', error);
|
||||
|
||||
// Try to track the failure
|
||||
try {
|
||||
const supabase = await createClient();
|
||||
const { data: { user } } = await supabase.auth.getUser();
|
||||
if (user) {
|
||||
await trackApiUsage({
|
||||
userId: user.id,
|
||||
apiType: 'gemini_ai',
|
||||
endpoint: 'analyzeLabelWithGemini',
|
||||
success: false,
|
||||
errorMessage: error.message
|
||||
});
|
||||
}
|
||||
} catch (trackError) {
|
||||
console.warn('[GeminiVision] Failed to track error:', trackError);
|
||||
}
|
||||
|
||||
return {
|
||||
success: false,
|
||||
error: error.message || 'Vision analysis failed.'
|
||||
};
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user