feat: Add Fuse.js distillery name normalization
New: src/lib/distillery-matcher.ts - normalizeDistillery(): Fuzzy matches AI responses against distilleries.json - cleanBottleName(): Removes distillery from bottle name to avoid duplication - normalizeWhiskyData(): Combined helper for both operations Example transformations: - 'ARDNAHOE DISTILLERY CO LTD' → 'Ardnahoe' - 'Laphroaig 10 Year Old' → '10 Year Old' (with distillery in separate field) Integration: - gemini-vision.ts now normalizes results after AI response - Enables consistent distillery names for enrichment cache
This commit is contained in:
@@ -6,6 +6,7 @@ import { createClient } from '@/lib/supabase/server';
|
|||||||
import { trackApiUsage } from '@/services/track-api-usage';
|
import { trackApiUsage } from '@/services/track-api-usage';
|
||||||
import { checkCreditBalance, deductCredits } from '@/services/credit-service';
|
import { checkCreditBalance, deductCredits } from '@/services/credit-service';
|
||||||
import { getAIProvider, getOpenRouterClient, OPENROUTER_VISION_MODEL, OPENROUTER_PROVIDER_PREFERENCES } from '@/lib/openrouter';
|
import { getAIProvider, getOpenRouterClient, OPENROUTER_VISION_MODEL, OPENROUTER_PROVIDER_PREFERENCES } from '@/lib/openrouter';
|
||||||
|
import { normalizeWhiskyData } from '@/lib/distillery-matcher';
|
||||||
|
|
||||||
// Schema for Gemini Vision extraction
|
// Schema for Gemini Vision extraction
|
||||||
const visionSchema = {
|
const visionSchema = {
|
||||||
@@ -253,6 +254,23 @@ export async function analyzeLabelWithGemini(imageBase64: string): Promise<Gemin
|
|||||||
// Validate with Zod schema
|
// Validate with Zod schema
|
||||||
const validatedData = BottleMetadataSchema.parse(result.data);
|
const validatedData = BottleMetadataSchema.parse(result.data);
|
||||||
|
|
||||||
|
// ========================================
|
||||||
|
// NORMALIZE DISTILLERY NAME
|
||||||
|
// ========================================
|
||||||
|
const normalized = normalizeWhiskyData(
|
||||||
|
validatedData.name || '',
|
||||||
|
validatedData.distillery || ''
|
||||||
|
);
|
||||||
|
|
||||||
|
// Apply normalized values
|
||||||
|
const finalData = {
|
||||||
|
...validatedData,
|
||||||
|
name: normalized.name || validatedData.name,
|
||||||
|
distillery: normalized.distillery || validatedData.distillery,
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(`[Vision] Normalized: distillery="${normalized.distillery}", name="${normalized.name}"`);
|
||||||
|
|
||||||
// Track usage and deduct credits
|
// Track usage and deduct credits
|
||||||
await trackApiUsage({
|
await trackApiUsage({
|
||||||
userId: user.id,
|
userId: user.id,
|
||||||
@@ -264,7 +282,7 @@ export async function analyzeLabelWithGemini(imageBase64: string): Promise<Gemin
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
data: validatedData,
|
data: finalData,
|
||||||
provider,
|
provider,
|
||||||
perf: {
|
perf: {
|
||||||
apiCall: result.apiTime,
|
apiCall: result.apiTime,
|
||||||
|
|||||||
168
src/lib/distillery-matcher.ts
Normal file
168
src/lib/distillery-matcher.ts
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
/**
|
||||||
|
* Distillery Name Normalizer
|
||||||
|
* Uses Fuse.js fuzzy matching to normalize AI-detected distillery names
|
||||||
|
* against the known distilleries database.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import Fuse from 'fuse.js';
|
||||||
|
import distilleries from '@/data/distilleries.json';
|
||||||
|
|
||||||
|
interface Distillery {
|
||||||
|
name: string;
|
||||||
|
region: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stopwords to remove before matching
|
||||||
|
const STOPWORDS = [
|
||||||
|
'distillery', 'distillers', 'distilleries',
|
||||||
|
'ltd', 'limited', 'co', 'company', 'inc',
|
||||||
|
'isle of', 'island', 'the',
|
||||||
|
'single malt', 'scotch', 'whisky', 'whiskey',
|
||||||
|
'highland', 'lowland', 'speyside', 'islay', 'campbeltown'
|
||||||
|
];
|
||||||
|
|
||||||
|
// Initialize Fuse.js with distilleries
|
||||||
|
const fuse = new Fuse<Distillery>(distilleries as Distillery[], {
|
||||||
|
keys: ['name'],
|
||||||
|
threshold: 0.4, // Fuzzy tolerance (0 = exact, 1 = match anything)
|
||||||
|
includeScore: true,
|
||||||
|
ignoreLocation: true,
|
||||||
|
minMatchCharLength: 3,
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Preprocess raw distillery name for better matching
|
||||||
|
*/
|
||||||
|
function preprocessName(raw: string): string {
|
||||||
|
let clean = raw.toLowerCase().trim();
|
||||||
|
|
||||||
|
// Remove stopwords
|
||||||
|
for (const word of STOPWORDS) {
|
||||||
|
clean = clean.replace(new RegExp(`\\b${word}\\b`, 'gi'), ' ');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove extra whitespace
|
||||||
|
clean = clean.replace(/\s+/g, ' ').trim();
|
||||||
|
|
||||||
|
return clean;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalize a raw distillery name to a canonical form
|
||||||
|
* @param rawName - The raw distillery name from AI (e.g., "ARDNAHOE DISTILLERY CO LTD")
|
||||||
|
* @returns Normalized distillery info with name and region
|
||||||
|
*/
|
||||||
|
export function normalizeDistillery(rawName: string): {
|
||||||
|
name: string;
|
||||||
|
region: string | null;
|
||||||
|
matched: boolean;
|
||||||
|
score: number;
|
||||||
|
} {
|
||||||
|
if (!rawName?.trim()) {
|
||||||
|
return { name: '', region: null, matched: false, score: 1 };
|
||||||
|
}
|
||||||
|
|
||||||
|
const preprocessed = preprocessName(rawName);
|
||||||
|
|
||||||
|
// Try exact match first (case-insensitive)
|
||||||
|
const exactMatch = (distilleries as Distillery[]).find(
|
||||||
|
d => d.name.toLowerCase() === preprocessed ||
|
||||||
|
d.name.toLowerCase() === rawName.toLowerCase().trim()
|
||||||
|
);
|
||||||
|
|
||||||
|
if (exactMatch) {
|
||||||
|
console.log(`[DistilleryMatcher] Exact match: "${rawName}" → "${exactMatch.name}"`);
|
||||||
|
return {
|
||||||
|
name: exactMatch.name,
|
||||||
|
region: exactMatch.region,
|
||||||
|
matched: true,
|
||||||
|
score: 0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fuzzy match with Fuse.js
|
||||||
|
const results = fuse.search(preprocessed);
|
||||||
|
|
||||||
|
if (results.length > 0 && results[0].score !== undefined && results[0].score < 0.5) {
|
||||||
|
const match = results[0].item;
|
||||||
|
console.log(`[DistilleryMatcher] Fuzzy match: "${rawName}" → "${match.name}" (score: ${results[0].score?.toFixed(3)})`);
|
||||||
|
return {
|
||||||
|
name: match.name,
|
||||||
|
region: match.region,
|
||||||
|
matched: true,
|
||||||
|
score: results[0].score
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// No match - return cleaned up original
|
||||||
|
console.log(`[DistilleryMatcher] No match for: "${rawName}"`);
|
||||||
|
|
||||||
|
// At least capitalize properly
|
||||||
|
const capitalized = rawName.trim()
|
||||||
|
.split(' ')
|
||||||
|
.map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
|
||||||
|
.join(' ');
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: capitalized,
|
||||||
|
region: null,
|
||||||
|
matched: false,
|
||||||
|
score: 1
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove distillery name from bottle name to avoid duplication
|
||||||
|
* @param bottleName - The full bottle name (e.g., "Laphroaig 10 Year Old")
|
||||||
|
* @param distillery - The distillery name (e.g., "Laphroaig")
|
||||||
|
* @returns Cleaned bottle name without distillery (e.g., "10 Year Old")
|
||||||
|
*/
|
||||||
|
export function cleanBottleName(bottleName: string, distillery: string): string {
|
||||||
|
if (!bottleName?.trim() || !distillery?.trim()) {
|
||||||
|
return bottleName || '';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create regex to match distillery at start of name (case-insensitive)
|
||||||
|
const escaped = distillery.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||||
|
const regex = new RegExp(`^${escaped}\\s*[-–—:]?\\s*`, 'i');
|
||||||
|
|
||||||
|
let cleaned = bottleName.replace(regex, '').trim();
|
||||||
|
|
||||||
|
// Also try matching distillery anywhere if it appears redundantly
|
||||||
|
// But only if the name is significantly longer than the distillery
|
||||||
|
if (cleaned === bottleName && bottleName.length > distillery.length + 5) {
|
||||||
|
const anywhereRegex = new RegExp(`\\b${escaped}\\b\\s*[-–—:]?\\s*`, 'i');
|
||||||
|
cleaned = bottleName.replace(anywhereRegex, '').trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up any resulting leading/trailing punctuation
|
||||||
|
cleaned = cleaned.replace(/^[-–—:\s]+/, '').replace(/[-–—:\s]+$/, '').trim();
|
||||||
|
|
||||||
|
// If we removed everything, return original
|
||||||
|
if (!cleaned) {
|
||||||
|
return bottleName;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[DistilleryMatcher] Cleaned name: "${bottleName}" → "${cleaned}"`);
|
||||||
|
return cleaned;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process both distillery normalization and name cleaning in one call
|
||||||
|
*/
|
||||||
|
export function normalizeWhiskyData(rawName: string, rawDistillery: string): {
|
||||||
|
name: string;
|
||||||
|
distillery: string;
|
||||||
|
region: string | null;
|
||||||
|
distilleryMatched: boolean;
|
||||||
|
} {
|
||||||
|
const normalized = normalizeDistillery(rawDistillery);
|
||||||
|
const cleanedName = cleanBottleName(rawName, normalized.name);
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: cleanedName,
|
||||||
|
distillery: normalized.name,
|
||||||
|
region: normalized.region,
|
||||||
|
distilleryMatched: normalized.matched
|
||||||
|
};
|
||||||
|
}
|
||||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user