perf: Remove Tesseract OCR - saves ~45MB on mobile

- Removed Tesseract.js files from precache (~45MB) - Scanner now uses only Gemini AI (more accurate, less data) - Offline scans queued for later processing when online - App download from ~50MB to ~5MB BREAKING: Local offline OCR no longer available Use Gemini AI instead (requires network for scanning)
2025-12-25 23:39:08 +01:00
parent 462d27ea7b
commit f0f36e9c03
17 changed files with 55 additions and 2190 deletions
--- a/src/lib/ocr/local-engine.ts
+++ b/src/lib/ocr/local-engine.ts
@@ -1,341 +0,0 @@
-/**
- * Local OCR Engine
- * Client-side OCR using Tesseract.js with Fuse.js fuzzy matching
- * 
- * Optimized for whisky label scanning with:
- * - Image preprocessing (grayscale, binarization, center crop)
- * - PSM 11 (Sparse text mode)
- * - Character whitelisting
- * - Bag-of-words fuzzy matching
- */
-
-import Tesseract from 'tesseract.js';
-import Fuse from 'fuse.js';
-import { extractNumbers, ExtractedNumbers, preprocessImageForOCR } from './scanner-utils';
-import distilleries from '@/data/distilleries.json';
-
-export interface LocalOcrResult {
-    distillery: string | null;
-    distilleryRegion: string | null;
-    name: string | null;
-    age: number | null;
-    abv: number | null;
-    vintage: string | null;
-    rawText: string;
-    confidence: number;
-}
-
-// Fuse.js configuration for fuzzy matching distillery names
-// Balanced matching to catch partial OCR errors while avoiding false positives
-const fuseOptions = {
-    keys: ['name'],
-    threshold: 0.35,        // 0 = exact match, 0.35 = allow some fuzziness
-    distance: 50,           // Characters between matched chars
-    includeScore: true,
-    minMatchCharLength: 4,  // Minimum chars to match
-};
-
-const distilleryFuse = new Fuse(distilleries, fuseOptions);
-
-// Tesseract worker singleton (reused across scans)
-let tesseractWorker: Tesseract.Worker | null = null;
-
-// Character whitelist for whisky labels ("Pattern Hack")
-// Restricts Tesseract to only whisky-relevant characters:
-// - Letters: A-Z, a-z
-// - Numbers: 0-9
-// - Essential punctuation: .,%&-/ (for ABV "46.5%", names like "No. 1")
-// - Space: for word separation
-// This prevents garbage like ~, _, ^, {, § from appearing
-const CHAR_WHITELIST = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,%&-/ ';
-
-/**
- * Initialize or get the Tesseract worker
- * Uses local files from /public/tessdata for offline capability
- */
-async function getWorker(): Promise<Tesseract.Worker> {
-    if (tesseractWorker) {
-        return tesseractWorker;
-    }
-
-    console.log('[LocalOCR] Initializing Tesseract worker with local files...');
-
-    // Use local files from /public/tessdata for full offline support
-    tesseractWorker = await Tesseract.createWorker('eng', Tesseract.OEM.LSTM_ONLY, {
-        workerPath: '/tessdata/worker.min.js',  // Local worker for offline
-        corePath: '/tessdata/',
-        langPath: '/tessdata/',
-        logger: (m) => {
-            if (m.status === 'recognizing text') {
-                console.log(`[LocalOCR] Progress: ${Math.round(m.progress * 100)}%`);
-            } else {
-                console.log(`[LocalOCR] ${m.status}`);
-            }
-        },
-    });
-
-    // Configure Tesseract for whisky label OCR
-    await tesseractWorker.setParameters({
-        tessedit_pageseg_mode: Tesseract.PSM.SINGLE_BLOCK, // PSM 6 - treat as single block of text
-        tessedit_char_whitelist: CHAR_WHITELIST,
-        preserve_interword_spaces: '1',  // Keep word spacing
-    });
-
-    console.log('[LocalOCR] Tesseract worker ready (PSM: SINGLE_BLOCK, Whitelist enabled)');
-    return tesseractWorker;
-}
-
-/**
- * Run OCR on an image and extract whisky metadata
- * 
- * @param imageSource - File, Blob, or base64 string of the image
- * @param timeoutMs - Maximum time to wait for OCR (default 10000ms)
- * @returns LocalOcrResult with extracted metadata
- */
-export async function analyzeLocalOcr(
-    imageSource: File | Blob | string,
-    timeoutMs: number = 10000
-): Promise<LocalOcrResult> {
-    const result: LocalOcrResult = {
-        distillery: null,
-        distilleryRegion: null,
-        name: null,
-        age: null,
-        abv: null,
-        vintage: null,
-        rawText: '',
-        confidence: 0,
-    };
-
-    try {
-        // Step 1: Preprocess the image for better OCR
-        let processedImage: string;
-        if (typeof imageSource === 'string') {
-            // Already a data URL, use as-is (can't preprocess string)
-            processedImage = imageSource;
-            console.log('[LocalOCR] Using raw image (string input)');
-        } else {
-            // Preprocess File/Blob: grayscale + sharpen + contrast boost
-            console.log('[LocalOCR] Preprocessing image...');
-            processedImage = await preprocessImageForOCR(imageSource);
-            // Uses defaults: 5% edge crop, 1200px height, sharpen=true, 1.3x contrast
-        }
-
-        // Create a timeout promise
-        const timeoutPromise = new Promise<never>((_, reject) => {
-            setTimeout(() => reject(new Error('OCR timeout')), timeoutMs);
-        });
-
-        // Race OCR against timeout
-        const worker = await getWorker();
-        const ocrResult = await Promise.race([
-            worker.recognize(processedImage),
-            timeoutPromise,
-        ]);
-
-        result.rawText = ocrResult.data.text;
-        result.confidence = ocrResult.data.confidence / 100; // Normalize to 0-1
-
-        // Extract numbers using regex (this works reliably)
-        const numbers = extractNumbers(result.rawText);
-        result.abv = numbers.abv;
-        result.age = numbers.age;
-        result.vintage = numbers.vintage;
-
-        // NOTE: Distillery fuzzy matching disabled - causes too many false positives
-        // with noisy OCR text. Let Gemini Vision handle distillery identification.
-        // const distilleryMatch = findDistillery(result.rawText);
-        // if (distilleryMatch) {
-        //     result.distillery = distilleryMatch.name;
-        //     result.distilleryRegion = distilleryMatch.region;
-        // }
-
-        // Fuzzy match distillery (new algorithm with sanity checks)
-        const distilleryMatch = findDistillery(result.rawText);
-        if (distilleryMatch) {
-            result.distillery = distilleryMatch.name;
-            result.distilleryRegion = distilleryMatch.region;
-
-            // Use contextual age if regex extraction failed
-            if (!result.age && distilleryMatch.contextualAge) {
-                result.age = distilleryMatch.contextualAge;
-                console.log(`[LocalOCR] Using contextual age: ${result.age}`);
-            }
-        }
-
-        // NOTE: Name extraction disabled - Tesseract too noisy for full bottle names
-        // Let Gemini Vision handle the name field
-        // result.name = extractName(result.rawText, result.distillery);
-        result.name = null;
-
-        // Detailed logging for debugging
-        const cleanedText = result.rawText
-            .split('\n')
-            .map(line => line.trim())
-            .filter(line => line.length > 0)
-            .join(' | ');
-
-        console.log('[LocalOCR] ========== OCR RESULTS ==========');
-        console.log('[LocalOCR] Raw Text:\n', result.rawText);
-        console.log('[LocalOCR] Cleaned Text:', cleanedText);
-        console.log('[LocalOCR] Confidence:', (result.confidence * 100).toFixed(1) + '%');
-        console.log('[LocalOCR] Extracted Data:', {
-            distillery: result.distillery,
-            distilleryRegion: result.distilleryRegion,
-            name: result.name,
-            age: result.age,
-            abv: result.abv,
-            vintage: result.vintage,
-        });
-        console.log('[LocalOCR] ===================================');
-
-        return result;
-
-    } catch (error) {
-        console.warn('[LocalOCR] Analysis failed:', error);
-        return result; // Return partial/empty result
-    }
-}
-
-/**
- * Find a distillery name in OCR text using fuzzy matching
- * 
- * Strategy:
- * 1. Normalize whitespace (fix Tesseract's formatting gaps)
- * 2. Split OCR text into lines, filter garbage
- * 3. "Strip & Match": Remove numbers before Fuse matching (helps with "N NEVIS 27")
- * 4. Sanity check: match length must be reasonable
- * 5. Contextual age: if distillery found, look for age in original line
- */
-function findDistillery(text: string): { name: string; region: string; contextualAge?: number } | null {
-    // Split into lines, normalize whitespace, and filter garbage
-    const lines = text
-        .split('\n')
-        .map(line => line.trim().replace(/\s+/g, ' ')) // Normalize whitespace
-        .filter(line => {
-            // Minimum 4 characters
-            if (line.length < 4) return false;
-            // Must have at least 40% letters (lowered to allow lines with numbers)
-            const letters = line.replace(/[^a-zA-Z]/g, '');
-            return letters.length >= line.length * 0.4;
-        });
-
-    console.log('[LocalOCR] Lines for distillery matching:', lines.length);
-
-    // Blacklist common whisky words that shouldn't match distillery names
-    const blacklistedWords = new Set([
-        'reserve', 'malt', 'single', 'whisky', 'whiskey', 'scotch', 'bourbon',
-        'blended', 'irish', 'aged', 'years', 'edition', 'cask', 'barrel',
-        'distillery', 'vintage', 'special', 'limited', 'rare', 'old', 'gold',
-        'spirit', 'spirits', 'proof', 'strength', 'batch', 'select', 'finish'
-    ]);
-
-    // Try to match each line using sliding word windows
-    for (const originalLine of lines) {
-        // STRIP & MATCH: Remove numbers for cleaner Fuse matching
-        const textOnlyLine = originalLine.replace(/[0-9]/g, '').replace(/\s+/g, ' ').trim();
-
-        if (textOnlyLine.length < 4) continue;
-
-        // Split into words for window matching
-        const words = textOnlyLine.split(' ').filter(w => w.length >= 2);
-
-        // Try different window sizes (1-3 words) to find distillery within garbage
-        // E.g., "ge OO BEN NEVIS" → try "BEN NEVIS", "OO BEN", "BEN", etc.
-        for (let windowSize = Math.min(3, words.length); windowSize >= 1; windowSize--) {
-            for (let i = 0; i <= words.length - windowSize; i++) {
-                const phrase = words.slice(i, i + windowSize).join(' ');
-
-                if (phrase.length < 4) continue;
-
-                // Skip blacklisted common words
-                if (blacklistedWords.has(phrase.toLowerCase())) {
-                    continue;
-                }
-
-                const results = distilleryFuse.search(phrase);
-
-                if (results.length > 0 && results[0].score !== undefined && results[0].score < 0.3) {
-                    const match = results[0].item;
-                    const matchScore = results[0].score;
-
-                    // SANITY CHECK: Length ratio should be reasonable (0.6 - 1.5)
-                    const lengthRatio = phrase.length / match.name.length;
-                    if (lengthRatio < 0.6 || lengthRatio > 1.5) {
-                        continue;
-                    }
-
-                    // CONTEXTUAL AGE DETECTION: Look for 2-digit number (3-60) in ORIGINAL line
-                    let contextualAge: number | undefined;
-                    const ageMatch = originalLine.match(/\b(\d{1,2})\b/);
-                    if (ageMatch) {
-                        const potentialAge = parseInt(ageMatch[1], 10);
-                        if (potentialAge >= 3 && potentialAge <= 60) {
-                            contextualAge = potentialAge;
-                            console.log(`[LocalOCR] Contextual age detected: ${potentialAge} years`);
-                        }
-                    }
-
-                    console.log(`[LocalOCR] Distillery match: "${phrase}" → ${match.name} (score: ${matchScore.toFixed(3)}, original: "${originalLine}")`);
-                    return {
-                        name: match.name,
-                        region: match.region,
-                        contextualAge,
-                    };
-                }
-            }
-        }
-    }
-
-    return null;
-}
-
-/**
- * Extract a potential bottle name from OCR text
- */
-function extractName(text: string, distillery: string | null): string | null {
-    const lines = text
-        .split('\n')
-        .map(l => l.trim())
-        .filter(line => {
-            // Minimum 5 characters
-            if (line.length < 5) return false;
-            // Must have at least 60% letters (filter out garbage like "ee" or "4 . .")
-            const letters = line.replace(/[^a-zA-Z]/g, '');
-            if (letters.length < line.length * 0.6) return false;
-            // Skip lines that are just punctuation/numbers
-            if (/^[\d\s.,\-'"]+$/.test(line)) return false;
-            return true;
-        });
-
-    // Skip lines that are just the distillery name
-    const candidates = lines.filter(line => {
-        if (distillery && line.toLowerCase().includes(distillery.toLowerCase())) {
-            // Only skip if the line IS the distillery name (not contains more)
-            return line.length > distillery.length + 5;
-        }
-        return true;
-    });
-
-    // Return the first substantial line (likely the bottle name)
-    for (const line of candidates) {
-        // Skip lines that look like numbers only
-        if (/^\d+[\s%]+/.test(line)) continue;
-        // Skip lines that are just common whisky words
-        if (/^(single|malt|scotch|whisky|whiskey|aged|years?|proof|edition|distilled|distillery)$/i.test(line)) continue;
-
-        return line;
-    }
-
-    return null;
-}
-
-/**
- * Terminate the Tesseract worker (call on cleanup)
- */
-export async function terminateOcrWorker(): Promise<void> {
-    if (tesseractWorker) {
-        await tesseractWorker.terminate();
-        tesseractWorker = null;
-    }
-}
--- a/src/lib/ocr/scanner-utils.ts
+++ b/src/lib/ocr/scanner-utils.ts
@@ -1,440 +0,0 @@
-/**
- * Scanner Utilities
- * Cache checking and helper functions for client-side OCR
- */
-
-/**
- * Check if Tesseract.js is ready to run
- * When online, tesseract will auto-download from CDN, so return true
- * When offline, check if files are cached
- * @returns Promise<boolean> - true if OCR can run
- */
-export async function isTesseractReady(): Promise<boolean> {
-    if (typeof window === 'undefined') {
-        return false;
-    }
-
-    // If online, tesseract.js will auto-download what it needs
-    if (navigator.onLine) {
-        console.log('[Scanner] Online - tesseract will use CDN');
-        return true;
-    }
-
-    // If offline, check cache
-    if (!('caches' in window)) {
-        console.log('[Scanner] Offline + no cache API - tesseract not ready');
-        return false;
-    }
-
-    try {
-        // Check for the core files in cache
-        // Try to find files in any cache (not just default)
-        const cacheNames = await caches.keys();
-        console.log('[Scanner] Available caches:', cacheNames);
-
-        let wasmMatch = false;
-        let langMatch = false;
-
-        for (const cacheName of cacheNames) {
-            const cache = await caches.open(cacheName);
-            const keys = await cache.keys();
-
-            for (const request of keys) {
-                const url = request.url;
-                if (url.includes('tesseract-core') && url.includes('.wasm')) {
-                    wasmMatch = true;
-                }
-                if (url.includes('eng.traineddata')) {
-                    langMatch = true;
-                }
-            }
-        }
-
-        const ready = wasmMatch && langMatch;
-        console.log('[Scanner] Offline cache check:', { wasmMatch, langMatch, ready, cacheCount: cacheNames.length });
-        return ready;
-    } catch (error) {
-        console.warn('[Scanner] Cache check failed:', error);
-        return false;
-    }
-}
-
-/**
- * Extract numeric values from OCR text using regex patterns
- */
-export interface ExtractedNumbers {
-    abv: number | null;
-    age: number | null;
-    vintage: string | null;
-}
-
-export function extractNumbers(text: string): ExtractedNumbers {
-    const result: ExtractedNumbers = {
-        abv: null,
-        age: null,
-        vintage: null
-    };
-
-    if (!text) return result;
-
-    // ========== ABV EXTRACTION (Enhanced) ==========
-    // Step 1: Normalize text for common Tesseract OCR mistakes
-    let normalizedText = text
-        // Fix % misread as numbers or text
-        .replace(/96/g, '%')           // Tesseract often reads % as 96
-        .replace(/o\/o/gi, '%')         // o/o → %
-        .replace(/°\/o/gi, '%')         // °/o → %
-        .replace(/0\/0/g, '%')          // 0/0 → %
-        // Fix common letter/number confusions
-        .replace(/[oO](?=\d)/g, '0')    // O before digit → 0 (e.g., "O5" → "05")
-        .replace(/(?<=\d)[oO]/g, '0')   // O after digit → 0 (e.g., "5O" → "50")
-        .replace(/[lI](?=\d)/g, '1')    // l/I before digit → 1
-        .replace(/(?<=\d)[lI]/g, '1')   // l/I after digit → 1
-        // Normalize decimal separators
-        .replace(/,/g, '.');
-
-    // Step 2: ABV patterns - looking for number before % or Vol
-    const abvPatterns = [
-        /(\d{2}\.?\d{0,2})\s*%/,                      // 43%, 43.5%, 57.1%
-        /(\d{2}\.?\d{0,2})\s*(?:vol|alc)/i,           // 43 vol, 43.5 alc
-        /(?:abv|alc|vol)[:\s]*(\d{2}\.?\d{0,2})/i,    // ABV: 43, vol. 43.5
-        /(\d{2}\.?\d{0,2})\s*(?:percent|prozent)/i,   // 43 percent/prozent
-    ];
-
-    for (const pattern of abvPatterns) {
-        const match = normalizedText.match(pattern);
-        if (match) {
-            const value = parseFloat(match[1]);
-            // STRICT RANGE GUARD: Only accept 35.0 - 75.0
-            // This prevents misidentifying years (1996) or volumes (700ml)
-            if (value >= 35.0 && value <= 75.0) {
-                result.abv = value;
-                console.log(`[ABV] Detected: ${value}% from pattern: ${pattern.source}`);
-                break;
-            } else {
-                console.log(`[ABV] Rejected ${value} - outside 35-75 range`);
-            }
-        }
-    }
-
-    // ========== AGE & VINTAGE (unchanged but use normalized text) ==========
-
-    // Age patterns: "12 years", "12 year old", "12 YO", "aged 12"
-    const agePatterns = [
-        /(\d{1,2})\s*(?:years?|yrs?|y\.?o\.?|jahre?)/i,
-        /aged\s*(\d{1,2})/i,
-        /(\d{1,2})\s*year\s*old/i,
-    ];
-
-    for (const pattern of agePatterns) {
-        const match = text.match(pattern);
-        if (match) {
-            const value = parseInt(match[1], 10);
-            if (value >= 3 && value <= 60) {  // Reasonable whisky age range
-                result.age = value;
-                break;
-            }
-        }
-    }
-
-    // Vintage patterns: "1990", "Vintage 1990", "Distilled 1990"
-    const vintagePatterns = [
-        /(?:vintage|distilled|dist\.?)\s*(19\d{2}|20[0-2]\d)/i,
-        /\b(19[789]\d|20[0-2]\d)\b/,  // Years 1970-2029
-    ];
-
-    for (const pattern of vintagePatterns) {
-        const match = text.match(pattern);
-        if (match) {
-            const year = parseInt(match[1], 10);
-            const currentYear = new Date().getFullYear();
-            if (year >= 1970 && year <= currentYear) {
-                result.vintage = match[1];
-                break;
-            }
-        }
-    }
-
-    return result;
-}
-
-/**
- * Convert an image blob to base64 string
- */
-export function imageToBase64(blob: Blob): Promise<string> {
-    return new Promise((resolve, reject) => {
-        const reader = new FileReader();
-        reader.onload = () => {
-            if (typeof reader.result === 'string') {
-                resolve(reader.result);
-            } else {
-                reject(new Error('Failed to convert image to base64'));
-            }
-        };
-        reader.onerror = reject;
-        reader.readAsDataURL(blob);
-    });
-}
-
-/**
- * Check if the browser is online
- */
-export function isOnline(): boolean {
-    return typeof navigator !== 'undefined' && navigator.onLine;
-}
-
-/**
- * Options for image preprocessing
- */
-export interface PreprocessOptions {
-    /** Crop left/right edges (0-0.25) to remove bottle curves. Default: 0.05 */
-    edgeCrop?: number;
-    /** Target height for resizing. Default: 1200 */
-    targetHeight?: number;
-    /** Apply simple binarization (hard black/white). Default: false */
-    binarize?: boolean;
-    /** Apply adaptive thresholding (better for uneven lighting). Default: true */
-    adaptiveThreshold?: boolean;
-    /** Contrast boost factor (1.0 = no change). Default: 1.3 */
-    contrastBoost?: number;
-    /** Apply sharpening. Default: false */
-    sharpen?: boolean;
-}
-
-/**
- * Preprocess an image for better OCR results
- * 
- * Applies:
- * 1. Center crop (removes curved bottle edges)
- * 2. Resize to optimal OCR size
- * 3. Grayscale conversion
- * 4. Sharpening (helps with blurry text)
- * 5. Contrast enhancement
- * 6. Optional binarization
- * 
- * @param imageSource - File, Blob, or HTMLImageElement
- * @param options - Preprocessing options
- * @returns Promise<string> - Preprocessed image as data URL
- */
-export async function preprocessImageForOCR(
-    imageSource: File | Blob | HTMLImageElement,
-    options: PreprocessOptions = {}
-): Promise<string> {
-    const {
-        edgeCrop = 0.05,        // Remove 5% from each edge (minimal)
-        targetHeight = 1200,    // High resolution
-        binarize = false,       // Simple binarization (global threshold)
-        adaptiveThreshold = true, // Adaptive thresholding (local threshold) - better for uneven lighting
-        contrastBoost = 1.3,    // 30% contrast boost (only if not using adaptive)
-        sharpen = false,        // Disabled - creates noise on photos
-    } = options;
-
-    // Load image into an HTMLImageElement if not already
-    let img: HTMLImageElement;
-
-    if (imageSource instanceof HTMLImageElement) {
-        img = imageSource;
-    } else {
-        img = await loadImageFromBlob(imageSource as Blob);
-    }
-
-    // Create canvas
-    const canvas = document.createElement('canvas');
-    const ctx = canvas.getContext('2d')!;
-
-    // Calculate crop dimensions (remove edges to focus on center)
-    const cropX = Math.floor(img.width * edgeCrop);
-    const cropWidth = img.width - (cropX * 2);
-    const cropHeight = img.height;
-
-    // Calculate resize dimensions (maintain aspect ratio)
-    const scale = targetHeight / cropHeight;
-    const newWidth = Math.floor(cropWidth * scale);
-    const newHeight = targetHeight;
-
-    canvas.width = newWidth;
-    canvas.height = newHeight;
-
-    // Draw cropped & resized image
-    ctx.drawImage(
-        img,
-        cropX, 0, cropWidth, cropHeight,  // Source: center crop
-        0, 0, newWidth, newHeight          // Destination: full canvas
-    );
-
-    // Get pixel data for processing
-    const imageData = ctx.getImageData(0, 0, newWidth, newHeight);
-    const data = imageData.data;
-
-    // First pass: Convert to grayscale
-    for (let i = 0; i < data.length; i += 4) {
-        const r = data[i];
-        const g = data[i + 1];
-        const b = data[i + 2];
-        const gray = 0.2126 * r + 0.7152 * g + 0.0722 * b;
-        data[i] = data[i + 1] = data[i + 2] = gray;
-    }
-
-    // Apply sharpening using a 3x3 kernel
-    if (sharpen) {
-        const tempData = new Uint8ClampedArray(data);
-        // Sharpen kernel: enhances edges
-        // [ 0, -1,  0]
-        // [-1,  5, -1]
-        // [ 0, -1,  0]
-        const kernel = [0, -1, 0, -1, 5, -1, 0, -1, 0];
-
-        for (let y = 1; y < newHeight - 1; y++) {
-            for (let x = 1; x < newWidth - 1; x++) {
-                let sum = 0;
-                for (let ky = -1; ky <= 1; ky++) {
-                    for (let kx = -1; kx <= 1; kx++) {
-                        const idx = ((y + ky) * newWidth + (x + kx)) * 4;
-                        const ki = (ky + 1) * 3 + (kx + 1);
-                        sum += tempData[idx] * kernel[ki];
-                    }
-                }
-                const idx = (y * newWidth + x) * 4;
-                const clamped = Math.min(255, Math.max(0, sum));
-                data[idx] = data[idx + 1] = data[idx + 2] = clamped;
-            }
-        }
-    }
-
-    // Put processed data back (after grayscale conversion)
-    ctx.putImageData(imageData, 0, 0);
-
-    // Apply adaptive or simple binarization/contrast
-    if (adaptiveThreshold) {
-        // ========== ADAPTIVE THRESHOLDING ==========
-        // Uses integral image for efficient local mean calculation
-        // Better for uneven lighting on curved bottles
-        const adaptiveData = ctx.getImageData(0, 0, newWidth, newHeight);
-        const pixels = adaptiveData.data;
-
-        // Window size: ~1/20th of image width, minimum 11, must be odd
-        let windowSize = Math.max(11, Math.floor(newWidth / 20));
-        if (windowSize % 2 === 0) windowSize++;
-        const halfWindow = Math.floor(windowSize / 2);
-
-        // Sauvola-style constant: lower = more sensitive to text
-        const k = 0.15;
-
-        // Build integral image for fast local sum calculation
-        const integral = new Float64Array((newWidth + 1) * (newHeight + 1));
-        const integralSq = new Float64Array((newWidth + 1) * (newHeight + 1));
-
-        for (let y = 0; y < newHeight; y++) {
-            let rowSum = 0;
-            let rowSumSq = 0;
-            for (let x = 0; x < newWidth; x++) {
-                const idx = (y * newWidth + x) * 4;
-                const gray = pixels[idx];
-                rowSum += gray;
-                rowSumSq += gray * gray;
-
-                const iIdx = (y + 1) * (newWidth + 1) + (x + 1);
-                const iIdxAbove = y * (newWidth + 1) + (x + 1);
-                integral[iIdx] = rowSum + integral[iIdxAbove];
-                integralSq[iIdx] = rowSumSq + integralSq[iIdxAbove];
-            }
-        }
-
-        // Apply adaptive threshold
-        const output = new Uint8ClampedArray(pixels.length);
-        for (let y = 0; y < newHeight; y++) {
-            for (let x = 0; x < newWidth; x++) {
-                // Calculate local window bounds
-                const x1 = Math.max(0, x - halfWindow);
-                const y1 = Math.max(0, y - halfWindow);
-                const x2 = Math.min(newWidth - 1, x + halfWindow);
-                const y2 = Math.min(newHeight - 1, y + halfWindow);
-                const count = (x2 - x1 + 1) * (y2 - y1 + 1);
-
-                // Get local sum and sum of squares using integral image
-                const i11 = y1 * (newWidth + 1) + x1;
-                const i12 = y1 * (newWidth + 1) + (x2 + 1);
-                const i21 = (y2 + 1) * (newWidth + 1) + x1;
-                const i22 = (y2 + 1) * (newWidth + 1) + (x2 + 1);
-
-                const sum = integral[i22] - integral[i21] - integral[i12] + integral[i11];
-                const sumSq = integralSq[i22] - integralSq[i21] - integralSq[i12] + integralSq[i11];
-
-                const mean = sum / count;
-                const variance = (sumSq / count) - (mean * mean);
-                const stddev = Math.sqrt(Math.max(0, variance));
-
-                // Sauvola threshold: T = mean * (1 + k * (stddev/R - 1))
-                // R = dynamic range = 128 for grayscale
-                const threshold = mean * (1 + k * (stddev / 128 - 1));
-
-                const idx = (y * newWidth + x) * 4;
-                const pixel = pixels[idx];
-                const binaryValue = pixel < threshold ? 0 : 255;
-
-                output[idx] = output[idx + 1] = output[idx + 2] = binaryValue;
-                output[idx + 3] = 255;
-            }
-        }
-
-        // Copy output back
-        for (let i = 0; i < pixels.length; i++) {
-            pixels[i] = output[i];
-        }
-        ctx.putImageData(adaptiveData, 0, 0);
-
-        console.log('[PreprocessOCR] Adaptive thresholding applied:', {
-            windowSize,
-            k,
-            imageSize: `${newWidth}x${newHeight}`,
-        });
-    } else {
-        // Simple contrast enhancement + optional global binarization
-        const simpleData = ctx.getImageData(0, 0, newWidth, newHeight);
-        const pixels = simpleData.data;
-
-        for (let i = 0; i < pixels.length; i += 4) {
-            let gray = pixels[i];
-            gray = ((gray - 128) * contrastBoost) + 128;
-            gray = Math.min(255, Math.max(0, gray));
-
-            if (binarize) {
-                gray = gray >= 128 ? 255 : 0;
-            }
-
-            pixels[i] = pixels[i + 1] = pixels[i + 2] = gray;
-        }
-
-        ctx.putImageData(simpleData, 0, 0);
-    }
-
-    console.log('[PreprocessOCR] Image preprocessed:', {
-        original: `${img.width}x${img.height}`,
-        cropped: `${cropWidth}x${cropHeight} (${(edgeCrop * 100).toFixed(0)}% edge crop)`,
-        final: `${newWidth}x${newHeight}`,
-        sharpen,
-        mode: adaptiveThreshold ? 'adaptive-threshold' : (binarize ? 'binarized' : 'grayscale+contrast'),
-    });
-
-    return canvas.toDataURL('image/png');
-}
-
-/**
- * Load an image from a Blob/File into an HTMLImageElement
- */
-function loadImageFromBlob(blob: Blob): Promise<HTMLImageElement> {
-    return new Promise((resolve, reject) => {
-        const img = new Image();
-        const url = URL.createObjectURL(blob);
-
-        img.onload = () => {
-            URL.revokeObjectURL(url);
-            resolve(img);
-        };
-        img.onerror = () => {
-            URL.revokeObjectURL(url);
-            reject(new Error('Failed to load image'));
-        };
-        img.src = url;
-    });
-}