perf: Remove Tesseract OCR - saves ~45MB on mobile

- Removed Tesseract.js files from precache (~45MB)
- Scanner now uses only Gemini AI (more accurate, less data)
- Offline scans queued for later processing when online
- App download from ~50MB to ~5MB

BREAKING: Local offline OCR no longer available
Use Gemini AI instead (requires network for scanning)
This commit is contained in:
2025-12-25 23:39:08 +01:00
parent 462d27ea7b
commit f0f36e9c03
17 changed files with 55 additions and 2190 deletions

View File

@@ -1,341 +0,0 @@
/**
* Local OCR Engine
* Client-side OCR using Tesseract.js with Fuse.js fuzzy matching
*
* Optimized for whisky label scanning with:
* - Image preprocessing (grayscale, binarization, center crop)
* - PSM 11 (Sparse text mode)
* - Character whitelisting
* - Bag-of-words fuzzy matching
*/
import Tesseract from 'tesseract.js';
import Fuse from 'fuse.js';
import { extractNumbers, ExtractedNumbers, preprocessImageForOCR } from './scanner-utils';
import distilleries from '@/data/distilleries.json';
export interface LocalOcrResult {
distillery: string | null;
distilleryRegion: string | null;
name: string | null;
age: number | null;
abv: number | null;
vintage: string | null;
rawText: string;
confidence: number;
}
// Fuse.js configuration for fuzzy matching distillery names
// Balanced matching to catch partial OCR errors while avoiding false positives
const fuseOptions = {
keys: ['name'],
threshold: 0.35, // 0 = exact match, 0.35 = allow some fuzziness
distance: 50, // Characters between matched chars
includeScore: true,
minMatchCharLength: 4, // Minimum chars to match
};
const distilleryFuse = new Fuse(distilleries, fuseOptions);
// Tesseract worker singleton (reused across scans)
let tesseractWorker: Tesseract.Worker | null = null;
// Character whitelist for whisky labels ("Pattern Hack")
// Restricts Tesseract to only whisky-relevant characters:
// - Letters: A-Z, a-z
// - Numbers: 0-9
// - Essential punctuation: .,%&-/ (for ABV "46.5%", names like "No. 1")
// - Space: for word separation
// This prevents garbage like ~, _, ^, {, § from appearing
const CHAR_WHITELIST = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,%&-/ ';
/**
* Initialize or get the Tesseract worker
* Uses local files from /public/tessdata for offline capability
*/
async function getWorker(): Promise<Tesseract.Worker> {
if (tesseractWorker) {
return tesseractWorker;
}
console.log('[LocalOCR] Initializing Tesseract worker with local files...');
// Use local files from /public/tessdata for full offline support
tesseractWorker = await Tesseract.createWorker('eng', Tesseract.OEM.LSTM_ONLY, {
workerPath: '/tessdata/worker.min.js', // Local worker for offline
corePath: '/tessdata/',
langPath: '/tessdata/',
logger: (m) => {
if (m.status === 'recognizing text') {
console.log(`[LocalOCR] Progress: ${Math.round(m.progress * 100)}%`);
} else {
console.log(`[LocalOCR] ${m.status}`);
}
},
});
// Configure Tesseract for whisky label OCR
await tesseractWorker.setParameters({
tessedit_pageseg_mode: Tesseract.PSM.SINGLE_BLOCK, // PSM 6 - treat as single block of text
tessedit_char_whitelist: CHAR_WHITELIST,
preserve_interword_spaces: '1', // Keep word spacing
});
console.log('[LocalOCR] Tesseract worker ready (PSM: SINGLE_BLOCK, Whitelist enabled)');
return tesseractWorker;
}
/**
* Run OCR on an image and extract whisky metadata
*
* @param imageSource - File, Blob, or base64 string of the image
* @param timeoutMs - Maximum time to wait for OCR (default 10000ms)
* @returns LocalOcrResult with extracted metadata
*/
export async function analyzeLocalOcr(
imageSource: File | Blob | string,
timeoutMs: number = 10000
): Promise<LocalOcrResult> {
const result: LocalOcrResult = {
distillery: null,
distilleryRegion: null,
name: null,
age: null,
abv: null,
vintage: null,
rawText: '',
confidence: 0,
};
try {
// Step 1: Preprocess the image for better OCR
let processedImage: string;
if (typeof imageSource === 'string') {
// Already a data URL, use as-is (can't preprocess string)
processedImage = imageSource;
console.log('[LocalOCR] Using raw image (string input)');
} else {
// Preprocess File/Blob: grayscale + sharpen + contrast boost
console.log('[LocalOCR] Preprocessing image...');
processedImage = await preprocessImageForOCR(imageSource);
// Uses defaults: 5% edge crop, 1200px height, sharpen=true, 1.3x contrast
}
// Create a timeout promise
const timeoutPromise = new Promise<never>((_, reject) => {
setTimeout(() => reject(new Error('OCR timeout')), timeoutMs);
});
// Race OCR against timeout
const worker = await getWorker();
const ocrResult = await Promise.race([
worker.recognize(processedImage),
timeoutPromise,
]);
result.rawText = ocrResult.data.text;
result.confidence = ocrResult.data.confidence / 100; // Normalize to 0-1
// Extract numbers using regex (this works reliably)
const numbers = extractNumbers(result.rawText);
result.abv = numbers.abv;
result.age = numbers.age;
result.vintage = numbers.vintage;
// NOTE: Distillery fuzzy matching disabled - causes too many false positives
// with noisy OCR text. Let Gemini Vision handle distillery identification.
// const distilleryMatch = findDistillery(result.rawText);
// if (distilleryMatch) {
// result.distillery = distilleryMatch.name;
// result.distilleryRegion = distilleryMatch.region;
// }
// Fuzzy match distillery (new algorithm with sanity checks)
const distilleryMatch = findDistillery(result.rawText);
if (distilleryMatch) {
result.distillery = distilleryMatch.name;
result.distilleryRegion = distilleryMatch.region;
// Use contextual age if regex extraction failed
if (!result.age && distilleryMatch.contextualAge) {
result.age = distilleryMatch.contextualAge;
console.log(`[LocalOCR] Using contextual age: ${result.age}`);
}
}
// NOTE: Name extraction disabled - Tesseract too noisy for full bottle names
// Let Gemini Vision handle the name field
// result.name = extractName(result.rawText, result.distillery);
result.name = null;
// Detailed logging for debugging
const cleanedText = result.rawText
.split('\n')
.map(line => line.trim())
.filter(line => line.length > 0)
.join(' | ');
console.log('[LocalOCR] ========== OCR RESULTS ==========');
console.log('[LocalOCR] Raw Text:\n', result.rawText);
console.log('[LocalOCR] Cleaned Text:', cleanedText);
console.log('[LocalOCR] Confidence:', (result.confidence * 100).toFixed(1) + '%');
console.log('[LocalOCR] Extracted Data:', {
distillery: result.distillery,
distilleryRegion: result.distilleryRegion,
name: result.name,
age: result.age,
abv: result.abv,
vintage: result.vintage,
});
console.log('[LocalOCR] ===================================');
return result;
} catch (error) {
console.warn('[LocalOCR] Analysis failed:', error);
return result; // Return partial/empty result
}
}
/**
* Find a distillery name in OCR text using fuzzy matching
*
* Strategy:
* 1. Normalize whitespace (fix Tesseract's formatting gaps)
* 2. Split OCR text into lines, filter garbage
* 3. "Strip & Match": Remove numbers before Fuse matching (helps with "N NEVIS 27")
* 4. Sanity check: match length must be reasonable
* 5. Contextual age: if distillery found, look for age in original line
*/
function findDistillery(text: string): { name: string; region: string; contextualAge?: number } | null {
// Split into lines, normalize whitespace, and filter garbage
const lines = text
.split('\n')
.map(line => line.trim().replace(/\s+/g, ' ')) // Normalize whitespace
.filter(line => {
// Minimum 4 characters
if (line.length < 4) return false;
// Must have at least 40% letters (lowered to allow lines with numbers)
const letters = line.replace(/[^a-zA-Z]/g, '');
return letters.length >= line.length * 0.4;
});
console.log('[LocalOCR] Lines for distillery matching:', lines.length);
// Blacklist common whisky words that shouldn't match distillery names
const blacklistedWords = new Set([
'reserve', 'malt', 'single', 'whisky', 'whiskey', 'scotch', 'bourbon',
'blended', 'irish', 'aged', 'years', 'edition', 'cask', 'barrel',
'distillery', 'vintage', 'special', 'limited', 'rare', 'old', 'gold',
'spirit', 'spirits', 'proof', 'strength', 'batch', 'select', 'finish'
]);
// Try to match each line using sliding word windows
for (const originalLine of lines) {
// STRIP & MATCH: Remove numbers for cleaner Fuse matching
const textOnlyLine = originalLine.replace(/[0-9]/g, '').replace(/\s+/g, ' ').trim();
if (textOnlyLine.length < 4) continue;
// Split into words for window matching
const words = textOnlyLine.split(' ').filter(w => w.length >= 2);
// Try different window sizes (1-3 words) to find distillery within garbage
// E.g., "ge OO BEN NEVIS" → try "BEN NEVIS", "OO BEN", "BEN", etc.
for (let windowSize = Math.min(3, words.length); windowSize >= 1; windowSize--) {
for (let i = 0; i <= words.length - windowSize; i++) {
const phrase = words.slice(i, i + windowSize).join(' ');
if (phrase.length < 4) continue;
// Skip blacklisted common words
if (blacklistedWords.has(phrase.toLowerCase())) {
continue;
}
const results = distilleryFuse.search(phrase);
if (results.length > 0 && results[0].score !== undefined && results[0].score < 0.3) {
const match = results[0].item;
const matchScore = results[0].score;
// SANITY CHECK: Length ratio should be reasonable (0.6 - 1.5)
const lengthRatio = phrase.length / match.name.length;
if (lengthRatio < 0.6 || lengthRatio > 1.5) {
continue;
}
// CONTEXTUAL AGE DETECTION: Look for 2-digit number (3-60) in ORIGINAL line
let contextualAge: number | undefined;
const ageMatch = originalLine.match(/\b(\d{1,2})\b/);
if (ageMatch) {
const potentialAge = parseInt(ageMatch[1], 10);
if (potentialAge >= 3 && potentialAge <= 60) {
contextualAge = potentialAge;
console.log(`[LocalOCR] Contextual age detected: ${potentialAge} years`);
}
}
console.log(`[LocalOCR] Distillery match: "${phrase}" → ${match.name} (score: ${matchScore.toFixed(3)}, original: "${originalLine}")`);
return {
name: match.name,
region: match.region,
contextualAge,
};
}
}
}
}
return null;
}
/**
* Extract a potential bottle name from OCR text
*/
function extractName(text: string, distillery: string | null): string | null {
const lines = text
.split('\n')
.map(l => l.trim())
.filter(line => {
// Minimum 5 characters
if (line.length < 5) return false;
// Must have at least 60% letters (filter out garbage like "ee" or "4 . .")
const letters = line.replace(/[^a-zA-Z]/g, '');
if (letters.length < line.length * 0.6) return false;
// Skip lines that are just punctuation/numbers
if (/^[\d\s.,\-'"]+$/.test(line)) return false;
return true;
});
// Skip lines that are just the distillery name
const candidates = lines.filter(line => {
if (distillery && line.toLowerCase().includes(distillery.toLowerCase())) {
// Only skip if the line IS the distillery name (not contains more)
return line.length > distillery.length + 5;
}
return true;
});
// Return the first substantial line (likely the bottle name)
for (const line of candidates) {
// Skip lines that look like numbers only
if (/^\d+[\s%]+/.test(line)) continue;
// Skip lines that are just common whisky words
if (/^(single|malt|scotch|whisky|whiskey|aged|years?|proof|edition|distilled|distillery)$/i.test(line)) continue;
return line;
}
return null;
}
/**
* Terminate the Tesseract worker (call on cleanup)
*/
export async function terminateOcrWorker(): Promise<void> {
if (tesseractWorker) {
await tesseractWorker.terminate();
tesseractWorker = null;
}
}

View File

@@ -1,440 +0,0 @@
/**
* Scanner Utilities
* Cache checking and helper functions for client-side OCR
*/
/**
* Check if Tesseract.js is ready to run
* When online, tesseract will auto-download from CDN, so return true
* When offline, check if files are cached
* @returns Promise<boolean> - true if OCR can run
*/
export async function isTesseractReady(): Promise<boolean> {
if (typeof window === 'undefined') {
return false;
}
// If online, tesseract.js will auto-download what it needs
if (navigator.onLine) {
console.log('[Scanner] Online - tesseract will use CDN');
return true;
}
// If offline, check cache
if (!('caches' in window)) {
console.log('[Scanner] Offline + no cache API - tesseract not ready');
return false;
}
try {
// Check for the core files in cache
// Try to find files in any cache (not just default)
const cacheNames = await caches.keys();
console.log('[Scanner] Available caches:', cacheNames);
let wasmMatch = false;
let langMatch = false;
for (const cacheName of cacheNames) {
const cache = await caches.open(cacheName);
const keys = await cache.keys();
for (const request of keys) {
const url = request.url;
if (url.includes('tesseract-core') && url.includes('.wasm')) {
wasmMatch = true;
}
if (url.includes('eng.traineddata')) {
langMatch = true;
}
}
}
const ready = wasmMatch && langMatch;
console.log('[Scanner] Offline cache check:', { wasmMatch, langMatch, ready, cacheCount: cacheNames.length });
return ready;
} catch (error) {
console.warn('[Scanner] Cache check failed:', error);
return false;
}
}
/**
* Extract numeric values from OCR text using regex patterns
*/
export interface ExtractedNumbers {
abv: number | null;
age: number | null;
vintage: string | null;
}
export function extractNumbers(text: string): ExtractedNumbers {
const result: ExtractedNumbers = {
abv: null,
age: null,
vintage: null
};
if (!text) return result;
// ========== ABV EXTRACTION (Enhanced) ==========
// Step 1: Normalize text for common Tesseract OCR mistakes
let normalizedText = text
// Fix % misread as numbers or text
.replace(/96/g, '%') // Tesseract often reads % as 96
.replace(/o\/o/gi, '%') // o/o → %
.replace(/°\/o/gi, '%') // °/o → %
.replace(/0\/0/g, '%') // 0/0 → %
// Fix common letter/number confusions
.replace(/[oO](?=\d)/g, '0') // O before digit → 0 (e.g., "O5" → "05")
.replace(/(?<=\d)[oO]/g, '0') // O after digit → 0 (e.g., "5O" → "50")
.replace(/[lI](?=\d)/g, '1') // l/I before digit → 1
.replace(/(?<=\d)[lI]/g, '1') // l/I after digit → 1
// Normalize decimal separators
.replace(/,/g, '.');
// Step 2: ABV patterns - looking for number before % or Vol
const abvPatterns = [
/(\d{2}\.?\d{0,2})\s*%/, // 43%, 43.5%, 57.1%
/(\d{2}\.?\d{0,2})\s*(?:vol|alc)/i, // 43 vol, 43.5 alc
/(?:abv|alc|vol)[:\s]*(\d{2}\.?\d{0,2})/i, // ABV: 43, vol. 43.5
/(\d{2}\.?\d{0,2})\s*(?:percent|prozent)/i, // 43 percent/prozent
];
for (const pattern of abvPatterns) {
const match = normalizedText.match(pattern);
if (match) {
const value = parseFloat(match[1]);
// STRICT RANGE GUARD: Only accept 35.0 - 75.0
// This prevents misidentifying years (1996) or volumes (700ml)
if (value >= 35.0 && value <= 75.0) {
result.abv = value;
console.log(`[ABV] Detected: ${value}% from pattern: ${pattern.source}`);
break;
} else {
console.log(`[ABV] Rejected ${value} - outside 35-75 range`);
}
}
}
// ========== AGE & VINTAGE (unchanged but use normalized text) ==========
// Age patterns: "12 years", "12 year old", "12 YO", "aged 12"
const agePatterns = [
/(\d{1,2})\s*(?:years?|yrs?|y\.?o\.?|jahre?)/i,
/aged\s*(\d{1,2})/i,
/(\d{1,2})\s*year\s*old/i,
];
for (const pattern of agePatterns) {
const match = text.match(pattern);
if (match) {
const value = parseInt(match[1], 10);
if (value >= 3 && value <= 60) { // Reasonable whisky age range
result.age = value;
break;
}
}
}
// Vintage patterns: "1990", "Vintage 1990", "Distilled 1990"
const vintagePatterns = [
/(?:vintage|distilled|dist\.?)\s*(19\d{2}|20[0-2]\d)/i,
/\b(19[789]\d|20[0-2]\d)\b/, // Years 1970-2029
];
for (const pattern of vintagePatterns) {
const match = text.match(pattern);
if (match) {
const year = parseInt(match[1], 10);
const currentYear = new Date().getFullYear();
if (year >= 1970 && year <= currentYear) {
result.vintage = match[1];
break;
}
}
}
return result;
}
/**
* Convert an image blob to base64 string
*/
export function imageToBase64(blob: Blob): Promise<string> {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => {
if (typeof reader.result === 'string') {
resolve(reader.result);
} else {
reject(new Error('Failed to convert image to base64'));
}
};
reader.onerror = reject;
reader.readAsDataURL(blob);
});
}
/**
* Check if the browser is online
*/
export function isOnline(): boolean {
return typeof navigator !== 'undefined' && navigator.onLine;
}
/**
* Options for image preprocessing
*/
export interface PreprocessOptions {
/** Crop left/right edges (0-0.25) to remove bottle curves. Default: 0.05 */
edgeCrop?: number;
/** Target height for resizing. Default: 1200 */
targetHeight?: number;
/** Apply simple binarization (hard black/white). Default: false */
binarize?: boolean;
/** Apply adaptive thresholding (better for uneven lighting). Default: true */
adaptiveThreshold?: boolean;
/** Contrast boost factor (1.0 = no change). Default: 1.3 */
contrastBoost?: number;
/** Apply sharpening. Default: false */
sharpen?: boolean;
}
/**
* Preprocess an image for better OCR results
*
* Applies:
* 1. Center crop (removes curved bottle edges)
* 2. Resize to optimal OCR size
* 3. Grayscale conversion
* 4. Sharpening (helps with blurry text)
* 5. Contrast enhancement
* 6. Optional binarization
*
* @param imageSource - File, Blob, or HTMLImageElement
* @param options - Preprocessing options
* @returns Promise<string> - Preprocessed image as data URL
*/
export async function preprocessImageForOCR(
imageSource: File | Blob | HTMLImageElement,
options: PreprocessOptions = {}
): Promise<string> {
const {
edgeCrop = 0.05, // Remove 5% from each edge (minimal)
targetHeight = 1200, // High resolution
binarize = false, // Simple binarization (global threshold)
adaptiveThreshold = true, // Adaptive thresholding (local threshold) - better for uneven lighting
contrastBoost = 1.3, // 30% contrast boost (only if not using adaptive)
sharpen = false, // Disabled - creates noise on photos
} = options;
// Load image into an HTMLImageElement if not already
let img: HTMLImageElement;
if (imageSource instanceof HTMLImageElement) {
img = imageSource;
} else {
img = await loadImageFromBlob(imageSource as Blob);
}
// Create canvas
const canvas = document.createElement('canvas');
const ctx = canvas.getContext('2d')!;
// Calculate crop dimensions (remove edges to focus on center)
const cropX = Math.floor(img.width * edgeCrop);
const cropWidth = img.width - (cropX * 2);
const cropHeight = img.height;
// Calculate resize dimensions (maintain aspect ratio)
const scale = targetHeight / cropHeight;
const newWidth = Math.floor(cropWidth * scale);
const newHeight = targetHeight;
canvas.width = newWidth;
canvas.height = newHeight;
// Draw cropped & resized image
ctx.drawImage(
img,
cropX, 0, cropWidth, cropHeight, // Source: center crop
0, 0, newWidth, newHeight // Destination: full canvas
);
// Get pixel data for processing
const imageData = ctx.getImageData(0, 0, newWidth, newHeight);
const data = imageData.data;
// First pass: Convert to grayscale
for (let i = 0; i < data.length; i += 4) {
const r = data[i];
const g = data[i + 1];
const b = data[i + 2];
const gray = 0.2126 * r + 0.7152 * g + 0.0722 * b;
data[i] = data[i + 1] = data[i + 2] = gray;
}
// Apply sharpening using a 3x3 kernel
if (sharpen) {
const tempData = new Uint8ClampedArray(data);
// Sharpen kernel: enhances edges
// [ 0, -1, 0]
// [-1, 5, -1]
// [ 0, -1, 0]
const kernel = [0, -1, 0, -1, 5, -1, 0, -1, 0];
for (let y = 1; y < newHeight - 1; y++) {
for (let x = 1; x < newWidth - 1; x++) {
let sum = 0;
for (let ky = -1; ky <= 1; ky++) {
for (let kx = -1; kx <= 1; kx++) {
const idx = ((y + ky) * newWidth + (x + kx)) * 4;
const ki = (ky + 1) * 3 + (kx + 1);
sum += tempData[idx] * kernel[ki];
}
}
const idx = (y * newWidth + x) * 4;
const clamped = Math.min(255, Math.max(0, sum));
data[idx] = data[idx + 1] = data[idx + 2] = clamped;
}
}
}
// Put processed data back (after grayscale conversion)
ctx.putImageData(imageData, 0, 0);
// Apply adaptive or simple binarization/contrast
if (adaptiveThreshold) {
// ========== ADAPTIVE THRESHOLDING ==========
// Uses integral image for efficient local mean calculation
// Better for uneven lighting on curved bottles
const adaptiveData = ctx.getImageData(0, 0, newWidth, newHeight);
const pixels = adaptiveData.data;
// Window size: ~1/20th of image width, minimum 11, must be odd
let windowSize = Math.max(11, Math.floor(newWidth / 20));
if (windowSize % 2 === 0) windowSize++;
const halfWindow = Math.floor(windowSize / 2);
// Sauvola-style constant: lower = more sensitive to text
const k = 0.15;
// Build integral image for fast local sum calculation
const integral = new Float64Array((newWidth + 1) * (newHeight + 1));
const integralSq = new Float64Array((newWidth + 1) * (newHeight + 1));
for (let y = 0; y < newHeight; y++) {
let rowSum = 0;
let rowSumSq = 0;
for (let x = 0; x < newWidth; x++) {
const idx = (y * newWidth + x) * 4;
const gray = pixels[idx];
rowSum += gray;
rowSumSq += gray * gray;
const iIdx = (y + 1) * (newWidth + 1) + (x + 1);
const iIdxAbove = y * (newWidth + 1) + (x + 1);
integral[iIdx] = rowSum + integral[iIdxAbove];
integralSq[iIdx] = rowSumSq + integralSq[iIdxAbove];
}
}
// Apply adaptive threshold
const output = new Uint8ClampedArray(pixels.length);
for (let y = 0; y < newHeight; y++) {
for (let x = 0; x < newWidth; x++) {
// Calculate local window bounds
const x1 = Math.max(0, x - halfWindow);
const y1 = Math.max(0, y - halfWindow);
const x2 = Math.min(newWidth - 1, x + halfWindow);
const y2 = Math.min(newHeight - 1, y + halfWindow);
const count = (x2 - x1 + 1) * (y2 - y1 + 1);
// Get local sum and sum of squares using integral image
const i11 = y1 * (newWidth + 1) + x1;
const i12 = y1 * (newWidth + 1) + (x2 + 1);
const i21 = (y2 + 1) * (newWidth + 1) + x1;
const i22 = (y2 + 1) * (newWidth + 1) + (x2 + 1);
const sum = integral[i22] - integral[i21] - integral[i12] + integral[i11];
const sumSq = integralSq[i22] - integralSq[i21] - integralSq[i12] + integralSq[i11];
const mean = sum / count;
const variance = (sumSq / count) - (mean * mean);
const stddev = Math.sqrt(Math.max(0, variance));
// Sauvola threshold: T = mean * (1 + k * (stddev/R - 1))
// R = dynamic range = 128 for grayscale
const threshold = mean * (1 + k * (stddev / 128 - 1));
const idx = (y * newWidth + x) * 4;
const pixel = pixels[idx];
const binaryValue = pixel < threshold ? 0 : 255;
output[idx] = output[idx + 1] = output[idx + 2] = binaryValue;
output[idx + 3] = 255;
}
}
// Copy output back
for (let i = 0; i < pixels.length; i++) {
pixels[i] = output[i];
}
ctx.putImageData(adaptiveData, 0, 0);
console.log('[PreprocessOCR] Adaptive thresholding applied:', {
windowSize,
k,
imageSize: `${newWidth}x${newHeight}`,
});
} else {
// Simple contrast enhancement + optional global binarization
const simpleData = ctx.getImageData(0, 0, newWidth, newHeight);
const pixels = simpleData.data;
for (let i = 0; i < pixels.length; i += 4) {
let gray = pixels[i];
gray = ((gray - 128) * contrastBoost) + 128;
gray = Math.min(255, Math.max(0, gray));
if (binarize) {
gray = gray >= 128 ? 255 : 0;
}
pixels[i] = pixels[i + 1] = pixels[i + 2] = gray;
}
ctx.putImageData(simpleData, 0, 0);
}
console.log('[PreprocessOCR] Image preprocessed:', {
original: `${img.width}x${img.height}`,
cropped: `${cropWidth}x${cropHeight} (${(edgeCrop * 100).toFixed(0)}% edge crop)`,
final: `${newWidth}x${newHeight}`,
sharpen,
mode: adaptiveThreshold ? 'adaptive-threshold' : (binarize ? 'binarized' : 'grayscale+contrast'),
});
return canvas.toDataURL('image/png');
}
/**
* Load an image from a Blob/File into an HTMLImageElement
*/
function loadImageFromBlob(blob: Blob): Promise<HTMLImageElement> {
return new Promise((resolve, reject) => {
const img = new Image();
const url = URL.createObjectURL(blob);
img.onload = () => {
URL.revokeObjectURL(url);
resolve(img);
};
img.onerror = () => {
URL.revokeObjectURL(url);
reject(new Error('Failed to load image'));
};
img.src = url;
});
}