Dramlog-Prod/src/lib/ocr/scanner-utils.ts

/**
 * Scanner Utilities
 * Cache checking and helper functions for client-side OCR
 */

/**
 * Check if Tesseract.js is ready to run
 * When online, tesseract will auto-download from CDN, so return true
 * When offline, check if files are cached
 * @returns Promise<boolean> - true if OCR can run
 */
export async function isTesseractReady(): Promise<boolean> {
    if (typeof window === 'undefined') {
        return false;
    }

    // If online, tesseract.js will auto-download what it needs
    if (navigator.onLine) {
        console.log('[Scanner] Online - tesseract will use CDN');
        return true;
    }

    // If offline, check cache
    if (!('caches' in window)) {
        console.log('[Scanner] Offline + no cache API - tesseract not ready');
        return false;
    }

    try {
        // Check for the core files in cache (matching actual file names in /public/tessdata)
        const wasmMatch = await window.caches.match('/tessdata/tesseract-core-simd.wasm');
        const langMatch = await window.caches.match('/tessdata/eng.traineddata');

        const ready = !!(wasmMatch && langMatch);
        console.log('[Scanner] Offline cache check:', { wasmMatch: !!wasmMatch, langMatch: !!langMatch, ready });
        return ready;
    } catch (error) {
        console.warn('[Scanner] Cache check failed:', error);
        return false;
    }
}

/**
 * Extract numeric values from OCR text using regex patterns
 */
export interface ExtractedNumbers {
    abv: number | null;
    age: number | null;
    vintage: string | null;
}

export function extractNumbers(text: string): ExtractedNumbers {
    const result: ExtractedNumbers = {
        abv: null,
        age: null,
        vintage: null
    };

    if (!text) return result;

    // Normalize text: lowercase, clean up common OCR mistakes
    const normalizedText = text
        .replace(/[oO]/g, '0')  // Common OCR mistake: O -> 0
        .replace(/[lI]/g, '1')  // Common OCR mistake: l/I -> 1
        .toLowerCase();

    // ABV patterns: "43%", "43.5%", "43,5 %", "ABV 43", "vol. 43"
    const abvPatterns = [
        /(\d{2}[.,]\d{1,2})\s*%/,           // 43.5% or 43,5 %
        /(\d{2})\s*%/,                        // 43%
        /abv[:\s]*(\d{2}[.,]?\d{0,2})/i,      // ABV: 43 or ABV 43.5
        /vol[.\s]*(\d{2}[.,]?\d{0,2})/i,      // vol. 43
        /(\d{2}[.,]\d{1,2})\s*vol/i,          // 43.5 vol
    ];

    for (const pattern of abvPatterns) {
        const match = normalizedText.match(pattern);
        if (match) {
            const value = parseFloat(match[1].replace(',', '.'));
            if (value >= 35 && value <= 75) {  // Reasonable whisky ABV range
                result.abv = value;
                break;
            }
        }
    }

    // Age patterns: "12 years", "12 year old", "12 YO", "aged 12"
    const agePatterns = [
        /(\d{1,2})\s*(?:years?|yrs?|y\.?o\.?|jahre?)/i,
        /aged\s*(\d{1,2})/i,
        /(\d{1,2})\s*year\s*old/i,
    ];

    for (const pattern of agePatterns) {
        const match = text.match(pattern);
        if (match) {
            const value = parseInt(match[1], 10);
            if (value >= 3 && value <= 60) {  // Reasonable whisky age range
                result.age = value;
                break;
            }
        }
    }

    // Vintage patterns: "1990", "Vintage 1990", "Distilled 1990"
    const vintagePatterns = [
        /(?:vintage|distilled|dist\.?)\s*(19\d{2}|20[0-2]\d)/i,
        /\b(19[789]\d|20[0-2]\d)\b/,  // Years 1970-2029
    ];

    for (const pattern of vintagePatterns) {
        const match = text.match(pattern);
        if (match) {
            const year = parseInt(match[1], 10);
            const currentYear = new Date().getFullYear();
            if (year >= 1970 && year <= currentYear) {
                result.vintage = match[1];
                break;
            }
        }
    }

    return result;
}

/**
 * Convert an image blob to base64 string
 */
export function imageToBase64(blob: Blob): Promise<string> {
    return new Promise((resolve, reject) => {
        const reader = new FileReader();
        reader.onload = () => {
            if (typeof reader.result === 'string') {
                resolve(reader.result);
            } else {
                reject(new Error('Failed to convert image to base64'));
            }
        };
        reader.onerror = reject;
        reader.readAsDataURL(blob);
    });
}

/**
 * Check if the browser is online
 */
export function isOnline(): boolean {
    return typeof navigator !== 'undefined' && navigator.onLine;
}

/**
 * Options for image preprocessing
 */
export interface PreprocessOptions {
    /** Crop left/right edges (0-0.25) to remove bottle curves. Default: 0.05 */
    edgeCrop?: number;
    /** Target height for resizing. Default: 1200 */
    targetHeight?: number;
    /** Apply binarization (hard black/white). Default: false */
    binarize?: boolean;
    /** Contrast boost factor (1.0 = no change). Default: 1.3 */
    contrastBoost?: number;
    /** Apply sharpening. Default: true */
    sharpen?: boolean;
}

/**
 * Preprocess an image for better OCR results
 *
 * Applies:
 * 1. Center crop (removes curved bottle edges)
 * 2. Resize to optimal OCR size
 * 3. Grayscale conversion
 * 4. Sharpening (helps with blurry text)
 * 5. Contrast enhancement
 * 6. Optional binarization
 *
 * @param imageSource - File, Blob, or HTMLImageElement
 * @param options - Preprocessing options
 * @returns Promise<string> - Preprocessed image as data URL
 */
export async function preprocessImageForOCR(
    imageSource: File | Blob | HTMLImageElement,
    options: PreprocessOptions = {}
): Promise<string> {
    const {
        edgeCrop = 0.05,        // Remove 5% from each edge (minimal)
        targetHeight = 1200,    // High resolution
        binarize = false,       // Don't binarize by default
        contrastBoost = 1.3,    // 30% contrast boost
        sharpen = false,        // Disabled - creates noise on photos
    } = options;

    // Load image into an HTMLImageElement if not already
    let img: HTMLImageElement;

    if (imageSource instanceof HTMLImageElement) {
        img = imageSource;
    } else {
        img = await loadImageFromBlob(imageSource as Blob);
    }

    // Create canvas
    const canvas = document.createElement('canvas');
    const ctx = canvas.getContext('2d')!;

    // Calculate crop dimensions (remove edges to focus on center)
    const cropX = Math.floor(img.width * edgeCrop);
    const cropWidth = img.width - (cropX * 2);
    const cropHeight = img.height;

    // Calculate resize dimensions (maintain aspect ratio)
    const scale = targetHeight / cropHeight;
    const newWidth = Math.floor(cropWidth * scale);
    const newHeight = targetHeight;

    canvas.width = newWidth;
    canvas.height = newHeight;

    // Draw cropped & resized image
    ctx.drawImage(
        img,
        cropX, 0, cropWidth, cropHeight,  // Source: center crop
        0, 0, newWidth, newHeight          // Destination: full canvas
    );

    // Get pixel data for processing
    const imageData = ctx.getImageData(0, 0, newWidth, newHeight);
    const data = imageData.data;

    // First pass: Convert to grayscale
    for (let i = 0; i < data.length; i += 4) {
        const r = data[i];
        const g = data[i + 1];
        const b = data[i + 2];
        const gray = 0.2126 * r + 0.7152 * g + 0.0722 * b;
        data[i] = data[i + 1] = data[i + 2] = gray;
    }

    // Apply sharpening using a 3x3 kernel
    if (sharpen) {
        const tempData = new Uint8ClampedArray(data);
        // Sharpen kernel: enhances edges
        // [ 0, -1,  0]
        // [-1,  5, -1]
        // [ 0, -1,  0]
        const kernel = [0, -1, 0, -1, 5, -1, 0, -1, 0];

        for (let y = 1; y < newHeight - 1; y++) {
            for (let x = 1; x < newWidth - 1; x++) {
                let sum = 0;
                for (let ky = -1; ky <= 1; ky++) {
                    for (let kx = -1; kx <= 1; kx++) {
                        const idx = ((y + ky) * newWidth + (x + kx)) * 4;
                        const ki = (ky + 1) * 3 + (kx + 1);
                        sum += tempData[idx] * kernel[ki];
                    }
                }
                const idx = (y * newWidth + x) * 4;
                const clamped = Math.min(255, Math.max(0, sum));
                data[idx] = data[idx + 1] = data[idx + 2] = clamped;
            }
        }
    }

    // Second pass: Apply contrast enhancement
    for (let i = 0; i < data.length; i += 4) {
        let gray = data[i];
        gray = ((gray - 128) * contrastBoost) + 128;
        gray = Math.min(255, Math.max(0, gray));

        if (binarize) {
            gray = gray >= 128 ? 255 : 0;
        }

        data[i] = data[i + 1] = data[i + 2] = gray;
    }

    // Put processed data back
    ctx.putImageData(imageData, 0, 0);

    console.log('[PreprocessOCR] Image preprocessed:', {
        original: `${img.width}x${img.height}`,
        cropped: `${cropWidth}x${cropHeight} (${(edgeCrop * 100).toFixed(0)}% edge crop)`,
        final: `${newWidth}x${newHeight}`,
        sharpen,
        contrastBoost,
        mode: binarize ? 'binarized' : 'grayscale',
    });

    return canvas.toDataURL('image/png');
}

/**
 * Load an image from a Blob/File into an HTMLImageElement
 */
function loadImageFromBlob(blob: Blob): Promise<HTMLImageElement> {
    return new Promise((resolve, reject) => {
        const img = new Image();
        const url = URL.createObjectURL(blob);

        img.onload = () => {
            URL.revokeObjectURL(url);
            resolve(img);
        };
        img.onerror = () => {
            URL.revokeObjectURL(url);
            reject(new Error('Failed to load image'));
        };
        img.src = url;
    });
}