/** * Scanner Utilities * Cache checking and helper functions for client-side OCR */ /** * Check if Tesseract.js is ready to run * When online, tesseract will auto-download from CDN, so return true * When offline, check if files are cached * @returns Promise - true if OCR can run */ export async function isTesseractReady(): Promise { if (typeof window === 'undefined') { return false; } // If online, tesseract.js will auto-download what it needs if (navigator.onLine) { console.log('[Scanner] Online - tesseract will use CDN'); return true; } // If offline, check cache if (!('caches' in window)) { console.log('[Scanner] Offline + no cache API - tesseract not ready'); return false; } try { // Check for the core files in cache (matching actual file names in /public/tessdata) const wasmMatch = await window.caches.match('/tessdata/tesseract-core-simd.wasm'); const langMatch = await window.caches.match('/tessdata/eng.traineddata'); const ready = !!(wasmMatch && langMatch); console.log('[Scanner] Offline cache check:', { wasmMatch: !!wasmMatch, langMatch: !!langMatch, ready }); return ready; } catch (error) { console.warn('[Scanner] Cache check failed:', error); return false; } } /** * Extract numeric values from OCR text using regex patterns */ export interface ExtractedNumbers { abv: number | null; age: number | null; vintage: string | null; } export function extractNumbers(text: string): ExtractedNumbers { const result: ExtractedNumbers = { abv: null, age: null, vintage: null }; if (!text) return result; // Normalize text: lowercase, clean up common OCR mistakes const normalizedText = text .replace(/[oO]/g, '0') // Common OCR mistake: O -> 0 .replace(/[lI]/g, '1') // Common OCR mistake: l/I -> 1 .toLowerCase(); // ABV patterns: "43%", "43.5%", "43,5 %", "ABV 43", "vol. 43" const abvPatterns = [ /(\d{2}[.,]\d{1,2})\s*%/, // 43.5% or 43,5 % /(\d{2})\s*%/, // 43% /abv[:\s]*(\d{2}[.,]?\d{0,2})/i, // ABV: 43 or ABV 43.5 /vol[.\s]*(\d{2}[.,]?\d{0,2})/i, // vol. 43 /(\d{2}[.,]\d{1,2})\s*vol/i, // 43.5 vol ]; for (const pattern of abvPatterns) { const match = normalizedText.match(pattern); if (match) { const value = parseFloat(match[1].replace(',', '.')); if (value >= 35 && value <= 75) { // Reasonable whisky ABV range result.abv = value; break; } } } // Age patterns: "12 years", "12 year old", "12 YO", "aged 12" const agePatterns = [ /(\d{1,2})\s*(?:years?|yrs?|y\.?o\.?|jahre?)/i, /aged\s*(\d{1,2})/i, /(\d{1,2})\s*year\s*old/i, ]; for (const pattern of agePatterns) { const match = text.match(pattern); if (match) { const value = parseInt(match[1], 10); if (value >= 3 && value <= 60) { // Reasonable whisky age range result.age = value; break; } } } // Vintage patterns: "1990", "Vintage 1990", "Distilled 1990" const vintagePatterns = [ /(?:vintage|distilled|dist\.?)\s*(19\d{2}|20[0-2]\d)/i, /\b(19[789]\d|20[0-2]\d)\b/, // Years 1970-2029 ]; for (const pattern of vintagePatterns) { const match = text.match(pattern); if (match) { const year = parseInt(match[1], 10); const currentYear = new Date().getFullYear(); if (year >= 1970 && year <= currentYear) { result.vintage = match[1]; break; } } } return result; } /** * Convert an image blob to base64 string */ export function imageToBase64(blob: Blob): Promise { return new Promise((resolve, reject) => { const reader = new FileReader(); reader.onload = () => { if (typeof reader.result === 'string') { resolve(reader.result); } else { reject(new Error('Failed to convert image to base64')); } }; reader.onerror = reject; reader.readAsDataURL(blob); }); } /** * Check if the browser is online */ export function isOnline(): boolean { return typeof navigator !== 'undefined' && navigator.onLine; } /** * Options for image preprocessing */ export interface PreprocessOptions { /** Crop left/right edges (0-0.25) to remove bottle curves. Default: 0.05 */ edgeCrop?: number; /** Target height for resizing. Default: 1200 */ targetHeight?: number; /** Apply binarization (hard black/white). Default: false */ binarize?: boolean; /** Contrast boost factor (1.0 = no change). Default: 1.3 */ contrastBoost?: number; /** Apply sharpening. Default: true */ sharpen?: boolean; } /** * Preprocess an image for better OCR results * * Applies: * 1. Center crop (removes curved bottle edges) * 2. Resize to optimal OCR size * 3. Grayscale conversion * 4. Sharpening (helps with blurry text) * 5. Contrast enhancement * 6. Optional binarization * * @param imageSource - File, Blob, or HTMLImageElement * @param options - Preprocessing options * @returns Promise - Preprocessed image as data URL */ export async function preprocessImageForOCR( imageSource: File | Blob | HTMLImageElement, options: PreprocessOptions = {} ): Promise { const { edgeCrop = 0.05, // Remove 5% from each edge (minimal) targetHeight = 1200, // High resolution binarize = false, // Don't binarize by default contrastBoost = 1.3, // 30% contrast boost sharpen = false, // Disabled - creates noise on photos } = options; // Load image into an HTMLImageElement if not already let img: HTMLImageElement; if (imageSource instanceof HTMLImageElement) { img = imageSource; } else { img = await loadImageFromBlob(imageSource as Blob); } // Create canvas const canvas = document.createElement('canvas'); const ctx = canvas.getContext('2d')!; // Calculate crop dimensions (remove edges to focus on center) const cropX = Math.floor(img.width * edgeCrop); const cropWidth = img.width - (cropX * 2); const cropHeight = img.height; // Calculate resize dimensions (maintain aspect ratio) const scale = targetHeight / cropHeight; const newWidth = Math.floor(cropWidth * scale); const newHeight = targetHeight; canvas.width = newWidth; canvas.height = newHeight; // Draw cropped & resized image ctx.drawImage( img, cropX, 0, cropWidth, cropHeight, // Source: center crop 0, 0, newWidth, newHeight // Destination: full canvas ); // Get pixel data for processing const imageData = ctx.getImageData(0, 0, newWidth, newHeight); const data = imageData.data; // First pass: Convert to grayscale for (let i = 0; i < data.length; i += 4) { const r = data[i]; const g = data[i + 1]; const b = data[i + 2]; const gray = 0.2126 * r + 0.7152 * g + 0.0722 * b; data[i] = data[i + 1] = data[i + 2] = gray; } // Apply sharpening using a 3x3 kernel if (sharpen) { const tempData = new Uint8ClampedArray(data); // Sharpen kernel: enhances edges // [ 0, -1, 0] // [-1, 5, -1] // [ 0, -1, 0] const kernel = [0, -1, 0, -1, 5, -1, 0, -1, 0]; for (let y = 1; y < newHeight - 1; y++) { for (let x = 1; x < newWidth - 1; x++) { let sum = 0; for (let ky = -1; ky <= 1; ky++) { for (let kx = -1; kx <= 1; kx++) { const idx = ((y + ky) * newWidth + (x + kx)) * 4; const ki = (ky + 1) * 3 + (kx + 1); sum += tempData[idx] * kernel[ki]; } } const idx = (y * newWidth + x) * 4; const clamped = Math.min(255, Math.max(0, sum)); data[idx] = data[idx + 1] = data[idx + 2] = clamped; } } } // Second pass: Apply contrast enhancement for (let i = 0; i < data.length; i += 4) { let gray = data[i]; gray = ((gray - 128) * contrastBoost) + 128; gray = Math.min(255, Math.max(0, gray)); if (binarize) { gray = gray >= 128 ? 255 : 0; } data[i] = data[i + 1] = data[i + 2] = gray; } // Put processed data back ctx.putImageData(imageData, 0, 0); console.log('[PreprocessOCR] Image preprocessed:', { original: `${img.width}x${img.height}`, cropped: `${cropWidth}x${cropHeight} (${(edgeCrop * 100).toFixed(0)}% edge crop)`, final: `${newWidth}x${newHeight}`, sharpen, contrastBoost, mode: binarize ? 'binarized' : 'grayscale', }); return canvas.toDataURL('image/png'); } /** * Load an image from a Blob/File into an HTMLImageElement */ function loadImageFromBlob(blob: Blob): Promise { return new Promise((resolve, reject) => { const img = new Image(); const url = URL.createObjectURL(blob); img.onload = () => { URL.revokeObjectURL(url); resolve(img); }; img.onerror = () => { URL.revokeObjectURL(url); reject(new Error('Failed to load image')); }; img.src = url; }); }