feat: Add Fuse.js distillery name normalization
New: src/lib/distillery-matcher.ts - normalizeDistillery(): Fuzzy matches AI responses against distilleries.json - cleanBottleName(): Removes distillery from bottle name to avoid duplication - normalizeWhiskyData(): Combined helper for both operations Example transformations: - 'ARDNAHOE DISTILLERY CO LTD' → 'Ardnahoe' - 'Laphroaig 10 Year Old' → '10 Year Old' (with distillery in separate field) Integration: - gemini-vision.ts now normalizes results after AI response - Enables consistent distillery names for enrichment cache
This commit is contained in:
@@ -6,6 +6,7 @@ import { createClient } from '@/lib/supabase/server';
|
||||
import { trackApiUsage } from '@/services/track-api-usage';
|
||||
import { checkCreditBalance, deductCredits } from '@/services/credit-service';
|
||||
import { getAIProvider, getOpenRouterClient, OPENROUTER_VISION_MODEL, OPENROUTER_PROVIDER_PREFERENCES } from '@/lib/openrouter';
|
||||
import { normalizeWhiskyData } from '@/lib/distillery-matcher';
|
||||
|
||||
// Schema for Gemini Vision extraction
|
||||
const visionSchema = {
|
||||
@@ -253,6 +254,23 @@ export async function analyzeLabelWithGemini(imageBase64: string): Promise<Gemin
|
||||
// Validate with Zod schema
|
||||
const validatedData = BottleMetadataSchema.parse(result.data);
|
||||
|
||||
// ========================================
|
||||
// NORMALIZE DISTILLERY NAME
|
||||
// ========================================
|
||||
const normalized = normalizeWhiskyData(
|
||||
validatedData.name || '',
|
||||
validatedData.distillery || ''
|
||||
);
|
||||
|
||||
// Apply normalized values
|
||||
const finalData = {
|
||||
...validatedData,
|
||||
name: normalized.name || validatedData.name,
|
||||
distillery: normalized.distillery || validatedData.distillery,
|
||||
};
|
||||
|
||||
console.log(`[Vision] Normalized: distillery="${normalized.distillery}", name="${normalized.name}"`);
|
||||
|
||||
// Track usage and deduct credits
|
||||
await trackApiUsage({
|
||||
userId: user.id,
|
||||
@@ -264,7 +282,7 @@ export async function analyzeLabelWithGemini(imageBase64: string): Promise<Gemin
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: validatedData,
|
||||
data: finalData,
|
||||
provider,
|
||||
perf: {
|
||||
apiCall: result.apiTime,
|
||||
|
||||
168
src/lib/distillery-matcher.ts
Normal file
168
src/lib/distillery-matcher.ts
Normal file
@@ -0,0 +1,168 @@
|
||||
/**
|
||||
* Distillery Name Normalizer
|
||||
* Uses Fuse.js fuzzy matching to normalize AI-detected distillery names
|
||||
* against the known distilleries database.
|
||||
*/
|
||||
|
||||
import Fuse from 'fuse.js';
|
||||
import distilleries from '@/data/distilleries.json';
|
||||
|
||||
interface Distillery {
|
||||
name: string;
|
||||
region: string;
|
||||
}
|
||||
|
||||
// Stopwords to remove before matching
|
||||
const STOPWORDS = [
|
||||
'distillery', 'distillers', 'distilleries',
|
||||
'ltd', 'limited', 'co', 'company', 'inc',
|
||||
'isle of', 'island', 'the',
|
||||
'single malt', 'scotch', 'whisky', 'whiskey',
|
||||
'highland', 'lowland', 'speyside', 'islay', 'campbeltown'
|
||||
];
|
||||
|
||||
// Initialize Fuse.js with distilleries
|
||||
const fuse = new Fuse<Distillery>(distilleries as Distillery[], {
|
||||
keys: ['name'],
|
||||
threshold: 0.4, // Fuzzy tolerance (0 = exact, 1 = match anything)
|
||||
includeScore: true,
|
||||
ignoreLocation: true,
|
||||
minMatchCharLength: 3,
|
||||
});
|
||||
|
||||
/**
|
||||
* Preprocess raw distillery name for better matching
|
||||
*/
|
||||
function preprocessName(raw: string): string {
|
||||
let clean = raw.toLowerCase().trim();
|
||||
|
||||
// Remove stopwords
|
||||
for (const word of STOPWORDS) {
|
||||
clean = clean.replace(new RegExp(`\\b${word}\\b`, 'gi'), ' ');
|
||||
}
|
||||
|
||||
// Remove extra whitespace
|
||||
clean = clean.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return clean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize a raw distillery name to a canonical form
|
||||
* @param rawName - The raw distillery name from AI (e.g., "ARDNAHOE DISTILLERY CO LTD")
|
||||
* @returns Normalized distillery info with name and region
|
||||
*/
|
||||
export function normalizeDistillery(rawName: string): {
|
||||
name: string;
|
||||
region: string | null;
|
||||
matched: boolean;
|
||||
score: number;
|
||||
} {
|
||||
if (!rawName?.trim()) {
|
||||
return { name: '', region: null, matched: false, score: 1 };
|
||||
}
|
||||
|
||||
const preprocessed = preprocessName(rawName);
|
||||
|
||||
// Try exact match first (case-insensitive)
|
||||
const exactMatch = (distilleries as Distillery[]).find(
|
||||
d => d.name.toLowerCase() === preprocessed ||
|
||||
d.name.toLowerCase() === rawName.toLowerCase().trim()
|
||||
);
|
||||
|
||||
if (exactMatch) {
|
||||
console.log(`[DistilleryMatcher] Exact match: "${rawName}" → "${exactMatch.name}"`);
|
||||
return {
|
||||
name: exactMatch.name,
|
||||
region: exactMatch.region,
|
||||
matched: true,
|
||||
score: 0
|
||||
};
|
||||
}
|
||||
|
||||
// Fuzzy match with Fuse.js
|
||||
const results = fuse.search(preprocessed);
|
||||
|
||||
if (results.length > 0 && results[0].score !== undefined && results[0].score < 0.5) {
|
||||
const match = results[0].item;
|
||||
console.log(`[DistilleryMatcher] Fuzzy match: "${rawName}" → "${match.name}" (score: ${results[0].score?.toFixed(3)})`);
|
||||
return {
|
||||
name: match.name,
|
||||
region: match.region,
|
||||
matched: true,
|
||||
score: results[0].score
|
||||
};
|
||||
}
|
||||
|
||||
// No match - return cleaned up original
|
||||
console.log(`[DistilleryMatcher] No match for: "${rawName}"`);
|
||||
|
||||
// At least capitalize properly
|
||||
const capitalized = rawName.trim()
|
||||
.split(' ')
|
||||
.map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
|
||||
.join(' ');
|
||||
|
||||
return {
|
||||
name: capitalized,
|
||||
region: null,
|
||||
matched: false,
|
||||
score: 1
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove distillery name from bottle name to avoid duplication
|
||||
* @param bottleName - The full bottle name (e.g., "Laphroaig 10 Year Old")
|
||||
* @param distillery - The distillery name (e.g., "Laphroaig")
|
||||
* @returns Cleaned bottle name without distillery (e.g., "10 Year Old")
|
||||
*/
|
||||
export function cleanBottleName(bottleName: string, distillery: string): string {
|
||||
if (!bottleName?.trim() || !distillery?.trim()) {
|
||||
return bottleName || '';
|
||||
}
|
||||
|
||||
// Create regex to match distillery at start of name (case-insensitive)
|
||||
const escaped = distillery.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
const regex = new RegExp(`^${escaped}\\s*[-–—:]?\\s*`, 'i');
|
||||
|
||||
let cleaned = bottleName.replace(regex, '').trim();
|
||||
|
||||
// Also try matching distillery anywhere if it appears redundantly
|
||||
// But only if the name is significantly longer than the distillery
|
||||
if (cleaned === bottleName && bottleName.length > distillery.length + 5) {
|
||||
const anywhereRegex = new RegExp(`\\b${escaped}\\b\\s*[-–—:]?\\s*`, 'i');
|
||||
cleaned = bottleName.replace(anywhereRegex, '').trim();
|
||||
}
|
||||
|
||||
// Clean up any resulting leading/trailing punctuation
|
||||
cleaned = cleaned.replace(/^[-–—:\s]+/, '').replace(/[-–—:\s]+$/, '').trim();
|
||||
|
||||
// If we removed everything, return original
|
||||
if (!cleaned) {
|
||||
return bottleName;
|
||||
}
|
||||
|
||||
console.log(`[DistilleryMatcher] Cleaned name: "${bottleName}" → "${cleaned}"`);
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process both distillery normalization and name cleaning in one call
|
||||
*/
|
||||
export function normalizeWhiskyData(rawName: string, rawDistillery: string): {
|
||||
name: string;
|
||||
distillery: string;
|
||||
region: string | null;
|
||||
distilleryMatched: boolean;
|
||||
} {
|
||||
const normalized = normalizeDistillery(rawDistillery);
|
||||
const cleanedName = cleanBottleName(rawName, normalized.name);
|
||||
|
||||
return {
|
||||
name: cleanedName,
|
||||
distillery: normalized.name,
|
||||
region: normalized.region,
|
||||
distilleryMatched: normalized.matched
|
||||
};
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user