feat: improved local OCR with Strip & Match distillery detection

- Added comprehensive distillery database (200+ entries)
- Implemented Strip & Match heuristic for fuzzy matching
- Added contextual age detection from distillery lines
- Added whitespace normalization for OCR text
- Disabled local name extraction (too noisy, let Gemini handle it)
- Fixed confidence scale normalization in TastingEditor (0-1 vs 0-100)
- Improved extractName filter (60% letters required)
- Relaxed Fuse.js thresholds for partial matches
This commit is contained in:
2025-12-25 13:14:08 +01:00
parent a1a91795d1
commit afe9197776
17 changed files with 3642 additions and 262 deletions

101
pnpm-lock.yaml generated
View File

@@ -44,6 +44,9 @@ importers:
framer-motion:
specifier: ^12.23.26
version: 12.23.26(react-dom@19.2.3(react@19.2.3))(react@19.2.3)
fuse.js:
specifier: ^7.1.0
version: 7.1.0
heic2any:
specifier: ^0.0.4
version: 0.0.4
@@ -68,6 +71,9 @@ importers:
sharp:
specifier: ^0.34.5
version: 0.34.5
tesseract.js:
specifier: ^7.0.0
version: 7.0.0
uuid:
specifier: ^13.0.0
version: 13.0.0
@@ -1333,6 +1339,9 @@ packages:
resolution: {integrity: sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==}
engines: {node: '>=8'}
bmp-js@0.1.0:
resolution: {integrity: sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==}
brace-expansion@1.1.12:
resolution: {integrity: sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==}
@@ -1858,6 +1867,10 @@ packages:
functions-have-names@1.2.3:
resolution: {integrity: sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ==}
fuse.js@7.1.0:
resolution: {integrity: sha512-trLf4SzuuUxfusZADLINj+dE8clK1frKdmqiJNb1Es75fmI5oY6X2mxLVUciLLjxqw/xr72Dhy+lER6dGd02FQ==}
engines: {node: '>=10'}
generator-function@2.0.1:
resolution: {integrity: sha512-SFdFmIJi+ybC0vjlHN0ZGVGHc3lgE0DxPAT0djjVg+kjOnSqclqmj0KQ7ykTOLP6YxoqOvuAODGdcHJn+43q3g==}
engines: {node: '>= 0.4'}
@@ -1968,6 +1981,9 @@ packages:
resolution: {integrity: sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==}
engines: {node: '>=0.10.0'}
idb-keyval@6.2.2:
resolution: {integrity: sha512-yjD9nARJ/jb1g+CvD0tlhUHOrJ9Sy0P8T9MF3YaLlHnSRpwPfpTX0XIvpmw3gAJUmEu3FiICLBDPXVwyEvrleg==}
ignore@5.3.2:
resolution: {integrity: sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==}
engines: {node: '>= 4'}
@@ -2111,6 +2127,9 @@ packages:
resolution: {integrity: sha512-p3EcsicXjit7SaskXHs1hA91QxgTw46Fv6EFKKGS5DRFLD8yKnohjF3hxoju94b/OcMZoQukzpPpBE9uLVKzgQ==}
engines: {node: '>= 0.4'}
is-url@1.2.4:
resolution: {integrity: sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==}
is-weakmap@2.0.2:
resolution: {integrity: sha512-K5pXYOm9wqY1RgjpL3YTkF39tni1XajUIkawTLUo9EZEVUFga5gSQJF8nNS7ZwJQ02y+1YCNYcMh+HIf1ZqE+w==}
engines: {node: '>= 0.4'}
@@ -2309,6 +2328,15 @@ packages:
sass:
optional: true
node-fetch@2.7.0:
resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==}
engines: {node: 4.x || >=6.0.0}
peerDependencies:
encoding: ^0.1.0
peerDependenciesMeta:
encoding:
optional: true
node-releases@2.0.27:
resolution: {integrity: sha512-nmh3lCkYZ3grZvqcCH+fjmQ7X+H0OeZgP40OierEaAptX4XofMh5kwNbWh7lBduUzCcV/8kZ+NDLCwm2iorIlA==}
@@ -2370,6 +2398,10 @@ packages:
zod:
optional: true
opencollective-postinstall@2.0.3:
resolution: {integrity: sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q==}
hasBin: true
optionator@0.9.4:
resolution: {integrity: sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==}
engines: {node: '>= 0.8.0'}
@@ -2575,6 +2607,9 @@ packages:
resolution: {integrity: sha512-00o4I+DVrefhv+nX0ulyi3biSHCPDe+yLv5o/p6d/UVlirijB8E16FtfwSAi4g3tcqrQ4lRAqQSoFEZJehYEcw==}
engines: {node: '>= 0.4'}
regenerator-runtime@0.13.11:
resolution: {integrity: sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==}
regexp.prototype.flags@1.5.4:
resolution: {integrity: sha512-dYqgNSZbDwkaJ2ceRd9ojCGjBq+mOm9LmtXnAnEGyHhN/5R7iDW2TRw3h+o/jCFxus3P2LfWIIiwowAjANm7IA==}
engines: {node: '>= 0.4'}
@@ -2783,6 +2818,12 @@ packages:
engines: {node: '>=14.0.0'}
hasBin: true
tesseract.js-core@7.0.0:
resolution: {integrity: sha512-WnNH518NzmbSq9zgTPeoF8c+xmilS8rFIl1YKbk/ptuuc7p6cLNELNuPAzcmsYw450ca6bLa8j3t0VAtq435Vw==}
tesseract.js@7.0.0:
resolution: {integrity: sha512-exPBkd+z+wM1BuMkx/Bjv43OeLBxhL5kKWsz/9JY+DXcXdiBjiAch0V49QR3oAJqCaL5qURE0vx9Eo+G5YE7mA==}
text-table@0.2.0:
resolution: {integrity: sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==}
@@ -2826,6 +2867,9 @@ packages:
resolution: {integrity: sha512-kXuRi1mtaKMrsLUxz3sQYvVl37B0Ns6MzfrtV5DvJceE9bPyspOqk9xxv7XbZWcfLWbFmm997vl83qUWVJA64w==}
engines: {node: '>=16'}
tr46@0.0.3:
resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==}
tr46@6.0.0:
resolution: {integrity: sha512-bLVMLPtstlZ4iMQHpFHTR7GAGj2jxi8Dg0s2h2MafAE4uSWF98FC/3MomU51iQAMf8/qDUbKWf5GxuvvVcXEhw==}
engines: {node: '>=20'}
@@ -2996,6 +3040,12 @@ packages:
resolution: {integrity: sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==}
engines: {node: '>=18'}
wasm-feature-detect@1.8.0:
resolution: {integrity: sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ==}
webidl-conversions@3.0.1:
resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==}
webidl-conversions@8.0.0:
resolution: {integrity: sha512-n4W4YFyz5JzOfQeA8oN7dUYpR+MBP3PIUsn2jLjWXwK5ASUzt0Jc/A5sAUZoCYFJRGF0FBKJ+1JjN43rNdsQzA==}
engines: {node: '>=20'}
@@ -3012,6 +3062,9 @@ packages:
resolution: {integrity: sha512-2ytDk0kiEj/yu90JOAp44PVPUkO9+jVhyf+SybKlRHSDlvOOZhdPIrr7xTH64l4WixO2cP+wQIcgujkGBPPz6g==}
engines: {node: '>=20'}
whatwg-url@5.0.0:
resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==}
which-boxed-primitive@1.1.1:
resolution: {integrity: sha512-TbX3mj8n0odCBFVlY8AxkqcHASw3L60jIuF8jFP78az3C2YhmGvqbHBpAjTRH2/xqYunrJ9g1jSyjCjpoWzIAA==}
engines: {node: '>= 0.4'}
@@ -3071,6 +3124,9 @@ packages:
resolution: {integrity: sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==}
engines: {node: '>=10'}
zlibjs@0.3.1:
resolution: {integrity: sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==}
zod-to-json-schema@3.25.0:
resolution: {integrity: sha512-HvWtU2UG41LALjajJrML6uQejQhNJx+JBO9IflpSja4R03iNWfKXrj6W2h7ljuLyc1nKS+9yDyL/9tD1U/yBnQ==}
peerDependencies:
@@ -4190,6 +4246,8 @@ snapshots:
binary-extensions@2.3.0: {}
bmp-js@0.1.0: {}
brace-expansion@1.1.12:
dependencies:
balanced-match: 1.0.2
@@ -4869,6 +4927,8 @@ snapshots:
functions-have-names@1.2.3: {}
fuse.js@7.1.0: {}
generator-function@2.0.1: {}
gensync@1.0.0-beta.2: {}
@@ -4987,6 +5047,8 @@ snapshots:
dependencies:
safer-buffer: 2.1.2
idb-keyval@6.2.2: {}
ignore@5.3.2: {}
ignore@7.0.5: {}
@@ -5128,6 +5190,8 @@ snapshots:
dependencies:
which-typed-array: 1.1.19
is-url@1.2.4: {}
is-weakmap@2.0.2: {}
is-weakref@1.1.1:
@@ -5324,6 +5388,10 @@ snapshots:
- '@babel/core'
- babel-plugin-macros
node-fetch@2.7.0:
dependencies:
whatwg-url: 5.0.0
node-releases@2.0.27: {}
normalize-path@3.0.0: {}
@@ -5383,6 +5451,8 @@ snapshots:
ws: 8.18.3
zod: 3.25.76
opencollective-postinstall@2.0.3: {}
optionator@0.9.4:
dependencies:
deep-is: 0.1.4
@@ -5577,6 +5647,8 @@ snapshots:
get-proto: 1.0.1
which-builtin-type: 1.2.1
regenerator-runtime@0.13.11: {}
regexp.prototype.flags@1.5.4:
dependencies:
call-bind: 1.0.8
@@ -5892,6 +5964,22 @@ snapshots:
- tsx
- yaml
tesseract.js-core@7.0.0: {}
tesseract.js@7.0.0:
dependencies:
bmp-js: 0.1.0
idb-keyval: 6.2.2
is-url: 1.2.4
node-fetch: 2.7.0
opencollective-postinstall: 2.0.3
regenerator-runtime: 0.13.11
tesseract.js-core: 7.0.0
wasm-feature-detect: 1.8.0
zlibjs: 0.3.1
transitivePeerDependencies:
- encoding
text-table@0.2.0: {}
thenify-all@1.6.0:
@@ -5929,6 +6017,8 @@ snapshots:
dependencies:
tldts: 7.0.19
tr46@0.0.3: {}
tr46@6.0.0:
dependencies:
punycode: 2.3.1
@@ -6126,6 +6216,10 @@ snapshots:
dependencies:
xml-name-validator: 5.0.0
wasm-feature-detect@1.8.0: {}
webidl-conversions@3.0.1: {}
webidl-conversions@8.0.0: {}
whatwg-encoding@3.1.1:
@@ -6139,6 +6233,11 @@ snapshots:
tr46: 6.0.0
webidl-conversions: 8.0.0
whatwg-url@5.0.0:
dependencies:
tr46: 0.0.3
webidl-conversions: 3.0.1
which-boxed-primitive@1.1.1:
dependencies:
is-bigint: 1.1.0
@@ -6203,6 +6302,8 @@ snapshots:
yocto-queue@0.1.0: {}
zlibjs@0.3.1: {}
zod-to-json-schema@3.25.0(zod@3.25.76):
dependencies:
zod: 3.25.76