feat: Add Fuse.js distillery name normalization

New: src/lib/distillery-matcher.ts
- normalizeDistillery(): Fuzzy matches AI responses against distilleries.json
- cleanBottleName(): Removes distillery from bottle name to avoid duplication
- normalizeWhiskyData(): Combined helper for both operations

Example transformations:
- 'ARDNAHOE DISTILLERY CO LTD' → 'Ardnahoe'
- 'Laphroaig 10 Year Old' → '10 Year Old' (with distillery in separate field)

Integration:
- gemini-vision.ts now normalizes results after AI response
- Enables consistent distillery names for enrichment cache
This commit is contained in:
2025-12-26 22:20:31 +01:00
parent daf6c86633
commit 883b2b61b4
3 changed files with 188 additions and 2 deletions

View File

@@ -6,6 +6,7 @@ import { createClient } from '@/lib/supabase/server';
import { trackApiUsage } from '@/services/track-api-usage';
import { checkCreditBalance, deductCredits } from '@/services/credit-service';
import { getAIProvider, getOpenRouterClient, OPENROUTER_VISION_MODEL, OPENROUTER_PROVIDER_PREFERENCES } from '@/lib/openrouter';
import { normalizeWhiskyData } from '@/lib/distillery-matcher';
// Schema for Gemini Vision extraction
const visionSchema = {
@@ -253,6 +254,23 @@ export async function analyzeLabelWithGemini(imageBase64: string): Promise<Gemin
// Validate with Zod schema
const validatedData = BottleMetadataSchema.parse(result.data);
// ========================================
// NORMALIZE DISTILLERY NAME
// ========================================
const normalized = normalizeWhiskyData(
validatedData.name || '',
validatedData.distillery || ''
);
// Apply normalized values
const finalData = {
...validatedData,
name: normalized.name || validatedData.name,
distillery: normalized.distillery || validatedData.distillery,
};
console.log(`[Vision] Normalized: distillery="${normalized.distillery}", name="${normalized.name}"`);
// Track usage and deduct credits
await trackApiUsage({
userId: user.id,
@@ -264,7 +282,7 @@ export async function analyzeLabelWithGemini(imageBase64: string): Promise<Gemin
return {
success: true,
data: validatedData,
data: finalData,
provider,
perf: {
apiCall: result.apiTime,

View File

@@ -0,0 +1,168 @@
/**
* Distillery Name Normalizer
* Uses Fuse.js fuzzy matching to normalize AI-detected distillery names
* against the known distilleries database.
*/
import Fuse from 'fuse.js';
import distilleries from '@/data/distilleries.json';
interface Distillery {
name: string;
region: string;
}
// Stopwords to remove before matching
const STOPWORDS = [
'distillery', 'distillers', 'distilleries',
'ltd', 'limited', 'co', 'company', 'inc',
'isle of', 'island', 'the',
'single malt', 'scotch', 'whisky', 'whiskey',
'highland', 'lowland', 'speyside', 'islay', 'campbeltown'
];
// Initialize Fuse.js with distilleries
const fuse = new Fuse<Distillery>(distilleries as Distillery[], {
keys: ['name'],
threshold: 0.4, // Fuzzy tolerance (0 = exact, 1 = match anything)
includeScore: true,
ignoreLocation: true,
minMatchCharLength: 3,
});
/**
* Preprocess raw distillery name for better matching
*/
function preprocessName(raw: string): string {
let clean = raw.toLowerCase().trim();
// Remove stopwords
for (const word of STOPWORDS) {
clean = clean.replace(new RegExp(`\\b${word}\\b`, 'gi'), ' ');
}
// Remove extra whitespace
clean = clean.replace(/\s+/g, ' ').trim();
return clean;
}
/**
* Normalize a raw distillery name to a canonical form
* @param rawName - The raw distillery name from AI (e.g., "ARDNAHOE DISTILLERY CO LTD")
* @returns Normalized distillery info with name and region
*/
export function normalizeDistillery(rawName: string): {
name: string;
region: string | null;
matched: boolean;
score: number;
} {
if (!rawName?.trim()) {
return { name: '', region: null, matched: false, score: 1 };
}
const preprocessed = preprocessName(rawName);
// Try exact match first (case-insensitive)
const exactMatch = (distilleries as Distillery[]).find(
d => d.name.toLowerCase() === preprocessed ||
d.name.toLowerCase() === rawName.toLowerCase().trim()
);
if (exactMatch) {
console.log(`[DistilleryMatcher] Exact match: "${rawName}" → "${exactMatch.name}"`);
return {
name: exactMatch.name,
region: exactMatch.region,
matched: true,
score: 0
};
}
// Fuzzy match with Fuse.js
const results = fuse.search(preprocessed);
if (results.length > 0 && results[0].score !== undefined && results[0].score < 0.5) {
const match = results[0].item;
console.log(`[DistilleryMatcher] Fuzzy match: "${rawName}" → "${match.name}" (score: ${results[0].score?.toFixed(3)})`);
return {
name: match.name,
region: match.region,
matched: true,
score: results[0].score
};
}
// No match - return cleaned up original
console.log(`[DistilleryMatcher] No match for: "${rawName}"`);
// At least capitalize properly
const capitalized = rawName.trim()
.split(' ')
.map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
.join(' ');
return {
name: capitalized,
region: null,
matched: false,
score: 1
};
}
/**
* Remove distillery name from bottle name to avoid duplication
* @param bottleName - The full bottle name (e.g., "Laphroaig 10 Year Old")
* @param distillery - The distillery name (e.g., "Laphroaig")
* @returns Cleaned bottle name without distillery (e.g., "10 Year Old")
*/
export function cleanBottleName(bottleName: string, distillery: string): string {
if (!bottleName?.trim() || !distillery?.trim()) {
return bottleName || '';
}
// Create regex to match distillery at start of name (case-insensitive)
const escaped = distillery.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const regex = new RegExp(`^${escaped}\\s*[-–—:]?\\s*`, 'i');
let cleaned = bottleName.replace(regex, '').trim();
// Also try matching distillery anywhere if it appears redundantly
// But only if the name is significantly longer than the distillery
if (cleaned === bottleName && bottleName.length > distillery.length + 5) {
const anywhereRegex = new RegExp(`\\b${escaped}\\b\\s*[-–—:]?\\s*`, 'i');
cleaned = bottleName.replace(anywhereRegex, '').trim();
}
// Clean up any resulting leading/trailing punctuation
cleaned = cleaned.replace(/^[-–—:\s]+/, '').replace(/[-–—:\s]+$/, '').trim();
// If we removed everything, return original
if (!cleaned) {
return bottleName;
}
console.log(`[DistilleryMatcher] Cleaned name: "${bottleName}" → "${cleaned}"`);
return cleaned;
}
/**
* Process both distillery normalization and name cleaning in one call
*/
export function normalizeWhiskyData(rawName: string, rawDistillery: string): {
name: string;
distillery: string;
region: string | null;
distilleryMatched: boolean;
} {
const normalized = normalizeDistillery(rawDistillery);
const cleanedName = cleanBottleName(rawName, normalized.name);
return {
name: cleanedName,
distillery: normalized.name,
region: normalized.region,
distilleryMatched: normalized.matched
};
}

File diff suppressed because one or more lines are too long