2025-10-08 12:30:58 +02:00
{
"cells": [
{
"cell_type": "markdown",
"id": "be3c1872",
"metadata": {},
"source": [
"# AI-based OCR Benchmark Notebook\n",
"\n",
"This notebook benchmarks **AI-based OCR models** on scanned PDF documents/images in Spanish.\n",
"It excludes traditional OCR engines like Tesseract that require external installations."
]
},
{
"cell_type": "code",
2025-10-11 17:25:29 +02:00
"execution_count": null,
2025-10-08 12:30:58 +02:00
"id": "6a1e98fe",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n",
"Requirement already satisfied: pip in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (25.2)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n",
"Requirement already satisfied: jupyter in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (1.1.1)\n",
"Requirement already satisfied: notebook in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter) (7.4.7)\n",
"Requirement already satisfied: jupyter-console in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter) (6.6.3)\n",
"Requirement already satisfied: nbconvert in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter) (7.16.6)\n",
"Requirement already satisfied: ipykernel in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter) (6.30.1)\n",
"Requirement already satisfied: ipywidgets in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter) (8.1.7)\n",
"Requirement already satisfied: jupyterlab in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter) (4.4.9)\n",
"Requirement already satisfied: comm>=0.1.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (0.2.3)\n",
"Requirement already satisfied: debugpy>=1.6.5 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (1.8.17)\n",
"Requirement already satisfied: ipython>=7.23.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (9.6.0)\n",
"Requirement already satisfied: jupyter-client>=8.0.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (8.6.3)\n",
"Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (5.8.1)\n",
"Requirement already satisfied: matplotlib-inline>=0.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (0.1.7)\n",
"Requirement already satisfied: nest-asyncio>=1.4 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (1.6.0)\n",
"Requirement already satisfied: packaging>=22 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (25.0)\n",
"Requirement already satisfied: psutil>=5.7 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (7.1.0)\n",
"Requirement already satisfied: pyzmq>=25 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (27.1.0)\n",
"Requirement already satisfied: tornado>=6.2 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (6.5.2)\n",
"Requirement already satisfied: traitlets>=5.4.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (5.14.3)\n",
"Requirement already satisfied: colorama in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.4.6)\n",
"Requirement already satisfied: decorator in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (5.2.1)\n",
"Requirement already satisfied: ipython-pygments-lexers in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (1.1.1)\n",
"Requirement already satisfied: jedi>=0.16 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.19.2)\n",
"Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (3.0.52)\n",
"Requirement already satisfied: pygments>=2.4.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (2.19.2)\n",
"Requirement already satisfied: stack_data in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.6.3)\n",
"Requirement already satisfied: wcwidth in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=7.23.1->ipykernel->jupyter) (0.2.14)\n",
"Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jedi>=0.16->ipython>=7.23.1->ipykernel->jupyter) (0.8.5)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-client>=8.0.0->ipykernel->jupyter) (2.9.0.post0)\n",
"Requirement already satisfied: platformdirs>=2.5 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel->jupyter) (4.4.0)\n",
"Requirement already satisfied: pywin32>=300 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel->jupyter) (311)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from python-dateutil>=2.8.2->jupyter-client>=8.0.0->ipykernel->jupyter) (1.17.0)\n",
"Requirement already satisfied: widgetsnbextension~=4.0.14 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets->jupyter) (4.0.14)\n",
"Requirement already satisfied: jupyterlab_widgets~=3.0.15 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets->jupyter) (3.0.15)\n",
"Requirement already satisfied: async-lru>=1.0.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (2.0.5)\n",
"Requirement already satisfied: httpx<1,>=0.25.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (0.28.1)\n",
"Requirement already satisfied: jinja2>=3.0.3 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (3.1.6)\n",
"Requirement already satisfied: jupyter-lsp>=2.0.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (2.3.0)\n",
"Requirement already satisfied: jupyter-server<3,>=2.4.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (2.17.0)\n",
"Requirement already satisfied: jupyterlab-server<3,>=2.27.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (2.27.3)\n",
"Requirement already satisfied: notebook-shim>=0.2 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (0.2.4)\n",
"Requirement already satisfied: setuptools>=41.1.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (80.9.0)\n",
"Requirement already satisfied: anyio in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (4.11.0)\n",
"Requirement already satisfied: certifi in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (2025.10.5)\n",
"Requirement already satisfied: httpcore==1.* in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (1.0.9)\n",
"Requirement already satisfied: idna in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (3.10)\n",
"Requirement already satisfied: h11>=0.16 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from httpcore==1.*->httpx<1,>=0.25.0->jupyterlab->jupyter) (0.16.0)\n",
"Requirement already satisfied: argon2-cffi>=21.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (25.1.0)\n",
"Requirement already satisfied: jupyter-events>=0.11.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.12.0)\n",
"Requirement already satisfied: jupyter-server-terminals>=0.4.4 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.5.3)\n",
"Requirement already satisfied: nbformat>=5.3.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (5.10.4)\n",
"Requirement already satisfied: prometheus-client>=0.9 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.23.1)\n",
"Requirement already satisfied: pywinpty>=2.0.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (3.0.2)\n",
"Requirement already satisfied: send2trash>=1.8.2 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.8.3)\n",
"Requirement already satisfied: terminado>=0.8.3 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.18.1)\n",
"Requirement already satisfied: websocket-client>=1.7 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.9.0)\n",
"Requirement already satisfied: babel>=2.10 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (2.17.0)\n",
"Requirement already satisfied: json5>=0.9.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (0.12.1)\n",
"Requirement already satisfied: jsonschema>=4.18.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (4.25.1)\n",
"Requirement already satisfied: requests>=2.31 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (2.32.5)\n",
"Requirement already satisfied: sniffio>=1.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from anyio->httpx<1,>=0.25.0->jupyterlab->jupyter) (1.3.1)\n",
"Requirement already satisfied: argon2-cffi-bindings in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (25.1.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jinja2>=3.0.3->jupyterlab->jupyter) (3.0.3)\n",
"Requirement already satisfied: attrs>=22.2.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (25.4.0)\n",
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (2025.9.1)\n",
"Requirement already satisfied: referencing>=0.28.4 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (0.36.2)\n",
"Requirement already satisfied: rpds-py>=0.7.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (0.27.1)\n",
"Requirement already satisfied: python-json-logger>=2.0.4 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (4.0.0)\n",
"Requirement already satisfied: pyyaml>=5.3 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (6.0.2)\n",
"Requirement already satisfied: rfc3339-validator in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.1.4)\n",
"Requirement already satisfied: rfc3986-validator>=0.1.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.1.1)\n",
"Requirement already satisfied: fqdn in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.5.1)\n",
"Requirement already satisfied: isoduration in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (20.11.0)\n",
"Requirement already satisfied: jsonpointer>1.13 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (3.0.0)\n",
"Requirement already satisfied: rfc3987-syntax>=1.1.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.1.0)\n",
"Requirement already satisfied: uri-template in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.3.0)\n",
"Requirement already satisfied: webcolors>=24.6.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (24.11.1)\n",
"Requirement already satisfied: beautifulsoup4 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbconvert->jupyter) (4.14.2)\n",
"Requirement already satisfied: bleach!=5.0.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from bleach[css]!=5.0.0->nbconvert->jupyter) (6.2.0)\n",
"Requirement already satisfied: defusedxml in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbconvert->jupyter) (0.7.1)\n",
"Requirement already satisfied: jupyterlab-pygments in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbconvert->jupyter) (0.3.0)\n",
"Requirement already satisfied: mistune<4,>=2.0.3 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbconvert->jupyter) (3.1.4)\n",
"Requirement already satisfied: nbclient>=0.5.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbconvert->jupyter) (0.10.2)\n",
"Requirement already satisfied: pandocfilters>=1.4.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbconvert->jupyter) (1.5.1)\n",
"Requirement already satisfied: webencodings in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from bleach!=5.0.0->bleach[css]!=5.0.0->nbconvert->jupyter) (0.5.1)\n",
"Requirement already satisfied: tinycss2<1.5,>=1.1.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from bleach[css]!=5.0.0->nbconvert->jupyter) (1.4.0)\n",
"Requirement already satisfied: fastjsonschema>=2.15 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbformat>=5.3.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2.21.2)\n",
"Requirement already satisfied: charset_normalizer<4,>=2 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from requests>=2.31->jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (3.4.3)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from requests>=2.31->jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (2.5.0)\n",
"Requirement already satisfied: lark>=1.2.2 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from rfc3987-syntax>=1.1.0->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.3.0)\n",
"Requirement already satisfied: cffi>=1.0.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from argon2-cffi-bindings->argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2.0.0)\n",
"Requirement already satisfied: pycparser in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2.23)\n",
"Requirement already satisfied: soupsieve>1.2 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from beautifulsoup4->nbconvert->jupyter) (2.8)\n",
"Requirement already satisfied: typing-extensions>=4.0.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from beautifulsoup4->nbconvert->jupyter) (4.15.0)\n",
"Requirement already satisfied: arrow>=0.15.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.3.0)\n",
"Requirement already satisfied: types-python-dateutil>=2.8.10 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from arrow>=0.15.0->isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2.9.0.20251008)\n",
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data->ipython>=7.23.1->ipykernel->jupyter) (2.2.1)\n",
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data->ipython>=7.23.1->ipykernel->jupyter) (3.0.0)\n",
"Requirement already satisfied: pure-eval in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data->ipython>=7.23.1->ipykernel->jupyter) (0.2.3)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n",
"Requirement already satisfied: ipywidgets in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (8.1.7)\n",
"Requirement already satisfied: comm>=0.1.3 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets) (0.2.3)\n",
"Requirement already satisfied: ipython>=6.1.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets) (9.6.0)\n",
"Requirement already satisfied: traitlets>=4.3.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets) (5.14.3)\n",
"Requirement already satisfied: widgetsnbextension~=4.0.14 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets) (4.0.14)\n",
"Requirement already satisfied: jupyterlab_widgets~=3.0.15 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets) (3.0.15)\n",
"Requirement already satisfied: colorama in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.4.6)\n",
"Requirement already satisfied: decorator in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (5.2.1)\n",
"Requirement already satisfied: ipython-pygments-lexers in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (1.1.1)\n",
"Requirement already satisfied: jedi>=0.16 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.19.2)\n",
"Requirement already satisfied: matplotlib-inline in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.1.7)\n",
"Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (3.0.52)\n",
"Requirement already satisfied: pygments>=2.4.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (2.19.2)\n",
"Requirement already satisfied: stack_data in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.6.3)\n",
"Requirement already satisfied: wcwidth in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets) (0.2.14)\n",
"Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets) (0.8.5)\n",
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data->ipython>=6.1.0->ipywidgets) (2.2.1)\n",
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data->ipython>=6.1.0->ipywidgets) (3.0.0)\n",
"Requirement already satisfied: pure-eval in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data->ipython>=6.1.0->ipywidgets) (0.2.3)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n",
"Requirement already satisfied: ipykernel in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (6.30.1)\n",
"Requirement already satisfied: comm>=0.1.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (0.2.3)\n",
"Requirement already satisfied: debugpy>=1.6.5 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (1.8.17)\n",
"Requirement already satisfied: ipython>=7.23.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (9.6.0)\n",
"Requirement already satisfied: jupyter-client>=8.0.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (8.6.3)\n",
"Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (5.8.1)\n",
"Requirement already satisfied: matplotlib-inline>=0.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (0.1.7)\n",
"Requirement already satisfied: nest-asyncio>=1.4 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (1.6.0)\n",
"Requirement already satisfied: packaging>=22 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (25.0)\n",
"Requirement already satisfied: psutil>=5.7 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (7.1.0)\n",
"Requirement already satisfied: pyzmq>=25 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (27.1.0)\n",
"Requirement already satisfied: tornado>=6.2 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (6.5.2)\n",
"Requirement already satisfied: traitlets>=5.4.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (5.14.3)\n",
"Requirement already satisfied: colorama in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (0.4.6)\n",
"Requirement already satisfied: decorator in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (5.2.1)\n",
"Requirement already satisfied: ipython-pygments-lexers in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (1.1.1)\n",
"Requirement already satisfied: jedi>=0.16 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (0.19.2)\n",
"Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (3.0.52)\n",
"Requirement already satisfied: pygments>=2.4.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (2.19.2)\n",
"Requirement already satisfied: stack_data in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (0.6.3)\n",
"Requirement already satisfied: wcwidth in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=7.23.1->ipykernel) (0.2.14)\n",
"Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jedi>=0.16->ipython>=7.23.1->ipykernel) (0.8.5)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-client>=8.0.0->ipykernel) (2.9.0.post0)\n",
"Requirement already satisfied: platformdirs>=2.5 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel) (4.4.0)\n",
"Requirement already satisfied: pywin32>=300 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel) (311)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from python-dateutil>=2.8.2->jupyter-client>=8.0.0->ipykernel) (1.17.0)\n",
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data->ipython>=7.23.1->ipykernel) (2.2.1)\n",
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data->ipython>=7.23.1->ipykernel) (3.0.0)\n",
"Requirement already satisfied: pure-eval in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data->ipython>=7.23.1->ipykernel) (0.2.3)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n",
"Requirement already satisfied: easyocr in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (1.7.2)\n",
"Requirement already satisfied: transformers in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (4.57.0)\n",
"Requirement already satisfied: torch in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (2.8.0+cpu)\n",
"Requirement already satisfied: pdf2image in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (1.17.0)\n",
"Requirement already satisfied: pillow in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (11.3.0)\n",
"Requirement already satisfied: jiwer in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (4.0.0)\n",
"Requirement already satisfied: paddleocr in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (3.2.0)\n",
"Requirement already satisfied: hf_xet in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (1.1.10)\n",
"Requirement already satisfied: paddlepaddle in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (3.2.0)\n",
"Requirement already satisfied: torchvision>=0.5 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from easyocr) (0.23.0+cpu)\n",
"Requirement already satisfied: opencv-python-headless in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from easyocr) (4.12.0.88)\n",
"Requirement already satisfied: scipy in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from easyocr) (1.16.2)\n",
"Requirement already satisfied: numpy in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from easyocr) (2.1.2)\n",
"Requirement already satisfied: scikit-image in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from easyocr) (0.25.2)\n",
"Requirement already satisfied: python-bidi in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from easyocr) (0.6.6)\n",
"Requirement already satisfied: PyYAML in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from easyocr) (6.0.2)\n",
"Requirement already satisfied: Shapely in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from easyocr) (2.1.2)\n",
"Requirement already satisfied: pyclipper in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from easyocr) (1.3.0.post6)\n",
"Requirement already satisfied: ninja in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from easyocr) (1.13.0)\n",
"Requirement already satisfied: filelock in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from transformers) (3.13.1)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.34.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from transformers) (0.35.3)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from transformers) (25.0)\n",
"Requirement already satisfied: regex!=2019.12.17 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from transformers) (2025.9.18)\n",
"Requirement already satisfied: requests in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from transformers) (2.32.5)\n",
"Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from transformers) (0.22.1)\n",
"Requirement already satisfied: safetensors>=0.4.3 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from transformers) (0.6.2)\n",
"Requirement already satisfied: tqdm>=4.27 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from transformers) (4.67.1)\n",
"Requirement already satisfied: fsspec>=2023.5.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from huggingface-hub<1.0,>=0.34.0->transformers) (2024.6.1)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from huggingface-hub<1.0,>=0.34.0->transformers) (4.15.0)\n",
"Requirement already satisfied: sympy>=1.13.3 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from torch) (1.13.3)\n",
"Requirement already satisfied: networkx in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from torch) (3.3)\n",
"Requirement already satisfied: jinja2 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from torch) (3.1.6)\n",
"Requirement already satisfied: setuptools in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from torch) (80.9.0)\n",
"Requirement already satisfied: click>=8.1.8 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jiwer) (8.3.0)\n",
"Requirement already satisfied: rapidfuzz>=3.9.7 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jiwer) (3.14.1)\n",
"Requirement already satisfied: paddlex<3.3.0,>=3.2.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (3.2.1)\n",
"Requirement already satisfied: aistudio_sdk>=0.3.5 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (0.3.8)\n",
"Requirement already satisfied: chardet in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (5.2.0)\n",
"Requirement already satisfied: colorlog in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (6.9.0)\n",
"Requirement already satisfied: modelscope>=1.28.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (1.30.0)\n",
"Requirement already satisfied: pandas>=1.3 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (2.3.3)\n",
"Requirement already satisfied: prettytable in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (3.16.0)\n",
"Requirement already satisfied: py-cpuinfo in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (9.0.0)\n",
"Requirement already satisfied: pydantic>=2 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (2.12.0)\n",
"Requirement already satisfied: ruamel.yaml in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (0.18.15)\n",
"Requirement already satisfied: ujson in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (5.11.0)\n",
"Requirement already satisfied: imagesize in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (1.4.1)\n",
"Requirement already satisfied: opencv-contrib-python==4.10.0.84 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (4.10.0.84)\n",
"Requirement already satisfied: pypdfium2>=4 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (4.30.0)\n",
"Requirement already satisfied: httpx in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from paddlepaddle) (0.28.1)\n",
"Requirement already satisfied: protobuf>=3.20.2 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from paddlepaddle) (6.32.1)\n",
"Requirement already satisfied: opt-einsum==3.3.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from paddlepaddle) (3.3.0)\n",
"Requirement already satisfied: psutil in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from aistudio_sdk>=0.3.5->paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (7.1.0)\n",
"Requirement already satisfied: bce-python-sdk in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from aistudio_sdk>=0.3.5->paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (0.9.46)\n",
"Requirement already satisfied: colorama in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from click>=8.1.8->jiwer) (0.4.6)\n",
"Requirement already satisfied: urllib3>=1.26 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from modelscope>=1.28.0->paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (2.5.0)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas>=1.3->paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (2.9.0.post0)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas>=1.3->paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (2025.2)\n",
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas>=1.3->paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (2025.2)\n",
"Requirement already satisfied: annotated-types>=0.6.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pydantic>=2->paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (0.7.0)\n",
"Requirement already satisfied: pydantic-core==2.41.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pydantic>=2->paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (2.41.1)\n",
"Requirement already satisfied: typing-inspection>=0.4.2 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pydantic>=2->paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (0.4.2)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from python-dateutil>=2.8.2->pandas>=1.3->paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (1.17.0)\n",
"Requirement already satisfied: charset_normalizer<4,>=2 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from requests->transformers) (3.4.3)\n",
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from requests->transformers) (3.10)\n",
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from requests->transformers) (2025.10.5)\n",
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from sympy>=1.13.3->torch) (1.3.0)\n",
"Requirement already satisfied: pycryptodome>=3.8.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from bce-python-sdk->aistudio_sdk>=0.3.5->paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (3.23.0)\n",
"Requirement already satisfied: future>=0.6.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from bce-python-sdk->aistudio_sdk>=0.3.5->paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (1.0.0)\n",
"Requirement already satisfied: anyio in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from httpx->paddlepaddle) (4.11.0)\n",
"Requirement already satisfied: httpcore==1.* in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from httpx->paddlepaddle) (1.0.9)\n",
"Requirement already satisfied: h11>=0.16 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from httpcore==1.*->httpx->paddlepaddle) (0.16.0)\n",
"Requirement already satisfied: sniffio>=1.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from anyio->httpx->paddlepaddle) (1.3.1)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jinja2->torch) (3.0.3)\n",
"Requirement already satisfied: wcwidth in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from prettytable->paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (0.2.14)\n",
"Requirement already satisfied: ruamel.yaml.clib>=0.2.7 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ruamel.yaml->paddlex<3.3.0,>=3.2.0->paddlex[ocr-core]<3.3.0,>=3.2.0->paddleocr) (0.2.14)\n",
"Requirement already satisfied: imageio!=2.35.0,>=2.33 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from scikit-image->easyocr) (2.37.0)\n",
"Requirement already satisfied: tifffile>=2022.8.12 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from scikit-image->easyocr) (2025.10.4)\n",
"Requirement already satisfied: lazy-loader>=0.4 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from scikit-image->easyocr) (0.4)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n",
"Requirement already satisfied: PyMuPDF in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (1.26.4)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n",
"Requirement already satisfied: pandas in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (2.3.3)\n",
"Requirement already satisfied: numpy>=1.26.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas) (2.1.2)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas) (2.9.0.post0)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas) (2025.2)\n",
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas) (2025.2)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n",
"Requirement already satisfied: matplotlib in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (3.10.6)\n",
"Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (1.3.3)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (0.12.1)\n",
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (4.60.1)\n",
"Requirement already satisfied: kiwisolver>=1.3.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (1.4.9)\n",
"Requirement already satisfied: numpy>=1.23 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (2.1.2)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (25.0)\n",
"Requirement already satisfied: pillow>=8 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (11.3.0)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (3.2.5)\n",
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (2.9.0.post0)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from python-dateutil>=2.7->matplotlib) (1.17.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n",
"Requirement already satisfied: seaborn in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (0.13.2)\n",
"Requirement already satisfied: numpy!=1.24.0,>=1.20 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from seaborn) (2.1.2)\n",
"Requirement already satisfied: pandas>=1.2 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from seaborn) (2.3.3)\n",
"Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from seaborn) (3.10.6)\n",
"Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.3.3)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (0.12.1)\n",
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (4.60.1)\n",
"Requirement already satisfied: kiwisolver>=1.3.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.4.9)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (25.0)\n",
"Requirement already satisfied: pillow>=8 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (11.3.0)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (3.2.5)\n",
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (2.9.0.post0)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas>=1.2->seaborn) (2025.2)\n",
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas>=1.2->seaborn) (2025.2)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn) (1.17.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install --upgrade pip\n",
"%pip install --upgrade jupyter\n",
"%pip install --upgrade ipywidgets\n",
2025-10-11 17:25:29 +02:00
"%pip install --upgrade ipykernel\n",
2025-10-08 12:30:58 +02:00
"\n",
"# Install necessary packages\n",
"%pip install easyocr transformers torch pdf2image pillow jiwer paddleocr hf_xet paddlepaddle\n",
"# pdf reading\n",
"%pip install PyMuPDF\n",
"\n",
"# Data analysis and visualization\n",
"%pip install pandas\n",
"%pip install matplotlib\n",
"%pip install seaborn"
]
},
{
"cell_type": "code",
2025-10-11 17:25:29 +02:00
"execution_count": 11,
2025-10-08 12:30:58 +02:00
"id": "ae33632a",
"metadata": {},
"outputs": [],
"source": [
"# Imports\n",
"import os\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from pdf2image import convert_from_path\n",
"from PIL import Image, ImageOps\n",
"import easyocr\n",
"from transformers import TrOCRProcessor, VisionEncoderDecoderModel\n",
"import torch\n",
"from jiwer import wer, cer\n",
"from paddleocr import PaddleOCR\n",
"import fitz # PyMuPDF"
]
},
{
"cell_type": "markdown",
"id": "0e00f1b0",
"metadata": {},
"source": [
"## 1 Configuration"
]
},
{
"cell_type": "code",
2025-10-11 17:25:29 +02:00
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"PDF_FOLDER = './instructions' # Folder containing PDF files\n",
"OUTPUT_FOLDER = 'results'\n",
"os.makedirs(OUTPUT_FOLDER, exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "dcefbebc",
2025-10-08 12:30:58 +02:00
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.\n",
"Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.\n",
2025-10-11 17:25:29 +02:00
"Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-large-printed and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
}
],
"source": [
"# 1. EasyOCR (works well already)\n",
"import easyocr\n",
"easyocr_reader = easyocr.Reader(['es', 'en']) # Spanish and English\n",
"\n",
"# 2. TrOCR - Use a better variant for documents\n",
"from transformers import TrOCRProcessor, VisionEncoderDecoderModel\n",
"\n",
"# Try using the large model for better performance\n",
"try:\n",
" trocr_processor = TrOCRProcessor.from_pretrained(\"microsoft/trocr-large-printed\")\n",
" trocr_model = VisionEncoderDecoderModel.from_pretrained(\"microsoft/trocr-large-printed\")\n",
"except:\n",
" # Fallback to base model\n",
" trocr_processor = TrOCRProcessor.from_pretrained(\"microsoft/trocr-base-printed\")\n",
" trocr_model = VisionEncoderDecoderModel.from_pretrained(\"microsoft/trocr-base-printed\")\n",
"\n",
"device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
"trocr_model = trocr_model.to(device)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "243849b9",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\sji\\AppData\\Local\\Temp\\ipykernel_35244\\1485176348.py:5: DeprecationWarning: The parameter `det_db_thresh` has been deprecated and will be removed in the future. Please use `text_det_thresh` instead.\n",
" paddleocr_model = PaddleOCR(\n",
"C:\\Users\\sji\\AppData\\Local\\Temp\\ipykernel_35244\\1485176348.py:5: DeprecationWarning: The parameter `det_db_box_thresh` has been deprecated and will be removed in the future. Please use `text_det_box_thresh` instead.\n",
" paddleocr_model = PaddleOCR(\n",
"C:\\Users\\sji\\AppData\\Local\\Temp\\ipykernel_35244\\1485176348.py:5: DeprecationWarning: The parameter `rec_batch_num` has been deprecated and will be removed in the future. Please use `text_recognition_batch_size` instead.\n",
" paddleocr_model = PaddleOCR(\n",
2025-10-08 12:30:58 +02:00
"c:\\Users\\sji\\Desktop\\MastersThesis\\.venv\\Lib\\site-packages\\paddle\\utils\\cpp_extension\\extension_utils.py:718: UserWarning: No ccache found. Please be aware that recompiling all source files may be required. You can download and install ccache from: https://github.com/ccache/ccache/blob/master/doc/INSTALL.md\n",
" warnings.warn(warning_message)\n",
"\u001b[32mCreating model: ('PP-LCNet_x1_0_doc_ori', None)\u001b[0m\n",
"\u001b[32mModel files already exist. Using cached files. To redownload, please delete the directory manually: `C:\\Users\\sji\\.paddlex\\official_models\\PP-LCNet_x1_0_doc_ori`.\u001b[0m\n",
"\u001b[32mCreating model: ('UVDoc', None)\u001b[0m\n",
"\u001b[32mModel files already exist. Using cached files. To redownload, please delete the directory manually: `C:\\Users\\sji\\.paddlex\\official_models\\UVDoc`.\u001b[0m\n",
"\u001b[32mCreating model: ('PP-LCNet_x1_0_textline_ori', None)\u001b[0m\n",
"\u001b[32mModel files already exist. Using cached files. To redownload, please delete the directory manually: `C:\\Users\\sji\\.paddlex\\official_models\\PP-LCNet_x1_0_textline_ori`.\u001b[0m\n",
"\u001b[32mCreating model: ('PP-OCRv5_server_det', None)\u001b[0m\n",
"\u001b[32mModel files already exist. Using cached files. To redownload, please delete the directory manually: `C:\\Users\\sji\\.paddlex\\official_models\\PP-OCRv5_server_det`.\u001b[0m\n",
"\u001b[32mCreating model: ('latin_PP-OCRv5_mobile_rec', None)\u001b[0m\n",
"\u001b[32mModel files already exist. Using cached files. To redownload, please delete the directory manually: `C:\\Users\\sji\\.paddlex\\official_models\\latin_PP-OCRv5_mobile_rec`.\u001b[0m\n"
]
}
],
"source": [
2025-10-11 17:25:29 +02:00
"# 3. PaddleOCR - Better configuration\n",
"from paddleocr import PaddleOCR\n",
2025-10-08 12:30:58 +02:00
"\n",
2025-10-11 17:25:29 +02:00
"# Initialize with better settings for Spanish/Latin text\n",
"paddleocr_model = PaddleOCR(\n",
" lang='es', # Use 'latin' for better Spanish support\n",
" det_db_thresh=0.3, # Lower threshold for better text detection\n",
" det_db_box_thresh=0.5,\n",
" rec_batch_num=6,\n",
")\n"
2025-10-08 12:30:58 +02:00
]
},
{
"cell_type": "markdown",
"id": "84c999e2",
"metadata": {},
"source": [
"## 2 Helper Functions"
]
},
{
"cell_type": "code",
2025-10-11 17:25:29 +02:00
"execution_count": null,
2025-10-08 12:30:58 +02:00
"id": "9596c7df",
"metadata": {},
"outputs": [],
"source": [
2025-10-11 17:25:29 +02:00
"from typing import List, Optional\n",
"\n",
"def show_ocr_result(img: Image.Image, text: str, scale: float = 0.20):\n",
" \"\"\"\n",
" Displays a smaller version of the image with OCR text as a footer.\n",
" \"\"\"\n",
" # Compute plot size based on image dimensions (but without resizing the image)\n",
" w, h = img.size\n",
" figsize = (w * scale / 100, h * scale / 100) # convert pixels to inches approx\n",
"\n",
" fig, ax = plt.subplots(figsize=figsize)\n",
" ax.imshow(img)\n",
" ax.axis(\"off\")\n",
"\n",
"\n",
" # Add OCR text below the image (footer)\n",
" plt.figtext(0.5, 0.02, text.strip(), wrap=True, ha='center', va='bottom', fontsize=10)\n",
" plt.tight_layout()\n",
" plt.show()\n",
"\n",
"def pdf_to_images(pdf_path: str, dpi: int = 300, pages: List[int] = None) -> List[Image.Image]:\n",
" \"\"\"Render a PDF into a list of PIL Images using PyMuPDF or pdf2image.\"\"\"\n",
" images = []\n",
" if fitz is not None:\n",
" doc = fitz.open(pdf_path)\n",
" page_indices = pages if pages is not None else list(range(len(doc)))\n",
" for i in page_indices:\n",
" page = doc.load_page(i)\n",
" mat = fitz.Matrix(dpi/72.0, dpi/72.0)\n",
" pix = page.get_pixmap(matrix=mat, alpha=False)\n",
" img = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)\n",
" images.append(img)\n",
" doc.close()\n",
" elif convert_from_path is not None:\n",
" if pages is None:\n",
" images = convert_from_path(pdf_path, dpi=dpi)\n",
" else:\n",
" # pdf2image supports first_page/last_page; render a slice if contiguous\n",
" images = [convert_from_path(pdf_path, dpi=dpi)[i] for i in pages]\n",
" else:\n",
" raise RuntimeError('Install PyMuPDF or pdf2image to convert PDFs.')\n",
" return images\n",
2025-10-08 12:30:58 +02:00
"\n",
"def ocr_easyocr(img):\n",
" result = easyocr_reader.readtext(np.array(img))\n",
" res = ' '.join([r[1] for r in result])\n",
2025-10-11 17:25:29 +02:00
" show_ocr_result(img, res)\n",
2025-10-08 12:30:58 +02:00
" return res\n",
"\n",
"def pdf_extract_text(pdf_path, page_num) -> str:\n",
" \"\"\"\n",
" Extracts text from a specific PDF page in proper reading order.\n",
" \"\"\"\n",
" doc = fitz.open(pdf_path)\n",
" \n",
" if page_num < 1 or page_num > len(doc):\n",
" return \"\"\n",
" \n",
" page = doc[page_num - 1]\n",
" blocks = page.get_text(\"blocks\") # returns list of (x0, y0, x1, y1, \"text\", block_no, block_type)\n",
" \n",
" # Sort blocks top-to-bottom, left-to-right\n",
" blocks_sorted = sorted(blocks, key=lambda b: (b[1], b[0])) # y0, then x0\n",
" \n",
" text = \" \".join([b[4].replace('\\n', ' ').strip() for b in blocks_sorted])\n",
" return text\n",
"\n",
"def evaluate_text(reference, prediction):\n",
" return {'WER': wer(reference, prediction), 'CER': cer(reference, prediction)}"
]
},
{
"cell_type": "markdown",
"id": "e42cae29",
"metadata": {},
"source": [
"## 3 Run AI OCR Benchmark"
]
},
{
"cell_type": "code",
2025-10-11 17:25:29 +02:00
"execution_count": null,
2025-10-08 12:30:58 +02:00
"id": "9b55c154",
"metadata": {},
"outputs": [
{
"data": {
2025-10-11 17:25:29 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmIAAANaCAYAAAAqCCCSAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQAAjWlJREFUeJzt3Qd4ZUXdgPHJ9t57Z5fee0eaIAKiIjYsqNh7L5+9964ICiiioAIigii9d+llgV22996zJcn9nneyJ57cvTe5yWZ3kt339zwRN7n3lDlzZv5n2qkqFAqFIEmSpO2u0/bfpSRJkmAgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIl1S7ViS8gqFQvypqqqKP+1Rez5Gjquuri4eV6dO7f8ZOztejrXStMzSH+3xGmwNzqu2tjZ07tx5m55Xa/Pwjpz2qbX/u1XtCjdiTU1NLEB3Rps2bQrr169vKJDUdhYsWBCuueaamMbt1dy5c8O//vWvWGFuT+Q38h33Xqm8x+9WrlwZrrjiipiOHcGKFSvC3/72t7Bu3boWfe/hhx8O999/f+hIyC/V1dVlyw1+P3Xq1HDVVVeFDRs2bNNjoez+97//HWbNmtXi7z7++OPh7rvvbrJuaO4nC+hIk1J/r9tct2SBelPb2VHYIqYW4eb5+9//Hvbbb7+w1157tem2sxsPLXlK3l44PgohCrC3vvWtoWvXrqkPaYdChTx79ux2HeSvWrUqTJs2bbsf48aNG8Nll10W9t1333D00Udv8XeO56677goDBgwIw4cPDx0BAQf3EpVqSyxZsqRdB+vlApiHHnoovOMd7wi9e/cumfdvu+22cNRRR4Xu3btv02Mhr8yYMSOMGTMmjB8/vkXfXbp0aVi+fHnZwJoHKfJqfl/51lnO7XWve13o1atX/GzxtqqqqsLJJ58cdt999/jve+65JzzzzDNb1AX9+/eP2+nRo0fYERiIqcXByOLFi8PatWu3yfZ50uXmfdnLXhbao1GjRsXCpCN0/WjHQXcVldOIESPKttTRIkblxGfVvgwdOjTsueeeJR/eKFMJ1AiKCLTb2wNopQgwTzrppIaHFAIzWt7OOOOM0K9fv/g78iafI1jjoYvAc+TIkY22k+Vx0oV8PXjw4HDooYc2+ky3bt3iz47CQExtKt9cXK5AaeozPCE11dpQyfa31THyXwrTct9rybFtbTq1Zp/batvlPlfcdbA1Fcy2OM9tlQZbu71Sn6ECO+GEE8p+b9iwYeGcc84JPXv2LHuMle6/LbVmfy3JN1ubRyv93NZe33HjxsWfcg444IDYutNcEN0W90FLPtuSfRAYTZw4seHfixYtCl26dInnPWTIkEafzVrNeLCdNGlSk9sdOHBg2HXXXcOOzEBMW43gafr06WGfffYJL730Uuy64abk6Y6nnexm5ganCZ6mZm5SKg0Cm9GjR8fxE08++WSYN29e/Ny9994bW53YJs3QfIfKhi6Jp556KhZaPE2xnyeeeCLssssuYdCgQY26UHnK5CbnRs72T3fICy+8EJ/GKPQmTJgQb3IKDI6T7/G3F198MY7JoZtn7733Dn369InbmD9/fmyezz+5Ejjy5MZ31qxZEz+72267xab/rOWMfT///PPxyZC/P/3002HZsmXxnPbff/+YFvl0YjuTJ08OCxcujE/RnAeFXL6g5vj4DPvmM5wH6VCqtY5t0g3E+XFNnn322ZjWPJ1yLhxH/nwYZ8S15NrSAsj5kFbZtrkOpC/d03PmzInjW0irQw45JH6Gc+N3XGf+zdP+Hnvs0ZDOLRkXRX7imlB4U6BzvH379i27nezac8ykH9eFp+rDDjss7p/tcn5cD7oayV9cY1ot8teAc+S7M2fOjOnOZ4orNP7Nkz9plY3NGjt2bMzX5M389kiT5557LqYpx89nePrPPkMXHefKD/vmvuAz5A2+T77n2mVdj/yONJkyZUpDFx/nwDXhnLLtcnxsk3sp2z75hX9TEZZ7qCB/kH4HHnhgozzF38i/3Ffk8VLfJw9l9xH3Numf3ctNBTHke46PPM11JH34Xv7+yFCWkI9Jd+4r8gX7yX8uy8vc81xrusTIF1yj7F7iM5Qh5G/SimvEcR555JHxM/QAcH3pEiXduA+51/L3YpYP+C7lA/cV+Z30Je2yMpK0zL7Hdzgm8mF2n3BctHxyz3EefIZuRI6RtMjOl7Sn3OC8mwss2TbHtXr
2025-10-08 12:30:58 +02:00
"text/plain": [
2025-10-11 17:25:29 +02:00
"<Figure size 620.25x877 with 1 Axes>"
2025-10-08 12:30:58 +02:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
2025-10-11 17:25:29 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmIAAANaCAYAAAAqCCCSAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQAAdL5JREFUeJzt3Qd4nldht/EjW7I85b1n7OxBJglJyIYQEgi77FGgQCmjA75CmaWsFmihtOwNZUPCCmTvvffw3tuWZVvDWt91H+kor5X31bLsI8n377ocx9I7nn3+z1lPWWtra2uQJEnSATfswH+lJEmSYBCTJEnKxCAmSZKUiUFMkiQpE4OYJElSJgYxSZKkTAxikiRJmRjEJEmSMjGISZIkZWIQkyRJysQgJkmSlIlBTJIkKRODmCRJUiYGMUmSpEwMYpIkSZkYxCRJkjIxiEmSJGViEJMkScrEICZJkpSJQUySJCkTg5gkSVImBjFJkqRMDGKSJEmZGMQkSZIyMYhJkiRlYhCTJEnKxCAmSZKUiUFMkiQpE4OYJElSJgYxSZKkTAxikiRJmRjEJEmSMjGISZIkZWIQkyRJysQgJkmSlIlBTJIkKRODmCRJUiYGMUmSpEwMYpIkSZkYxCRJkjIxiEmSJGViEJMkScrEICZJkpSJQUySJCkTg5gkSVImBjFJkqRMDGKSJEmZGMQkSZIyMYhJkiRlYhCTJEnKxCAmSZKUiUFMkiQpE4OYJElSJgYxSZKkTAxikiRJmRjEJEmSMjGISZIkZWIQkyRJysQgJkmSlIlBTJIkKRODmCRJUiYGMUmSpEwMYpIkSZkYxCRJkjIxiEmSJGViEJMkScrEICZJkpSJQUySJCkTg5gkSVImBjFJkqRMDGKSJEmZGMQkSZIyMYhJkiRlYhCTJEnKxCAmSZKUiUFMkiQpE4OYJElSJgYxSZKkTAxikiRJmRjEJEmSMjGISZIkZWIQkyRJysQgJkmSlIlBTJIkKRODmCRJUiYGMUmSpEwMYpIkSZkYxCRJkjIxiEmSJGViEJMkScrEICZJkpSJQUySJCkTg5gkSVImBjFJkqRMDGKSJEmZGMQkSZIyMYhJkiRlYhCTJEnKxCAmSZKUiUFMkiQpE4OYJElSJgYxSZKkTAxikiRJmRjEJEmSMjGISZIkZWIQkyRJysQgJkmSlIlBTJIkKRODmCRJUiYGMUmSpEwMYpIkSZkYxCRJkjIxiEmSJGViEJMkScrEICZJkpSJQUySJCkTg5gkSVImBjFJkqRMDGKSJEmZGMQkSZIyMYhJkiRlYhCTJEnKxCAmSZKUiUFMkiQpE4OYJElSJgYxSZKkTAxikiRJmRjEJEmSMjGISZIkZWIQkyRJysQgJkmSlIlBTJIkKRODmCRJUiYGMUmSpEwMYpIkSZkYxCRJkjIxiEmSJGViEJMkScrEICZJkpSJQUySJCkTg5gkSVImBjFJkqRMDGKSJEmZGMQkSZIyMYhJkiRlYhCTJEnKxCAmSZKUiUFMkiQpE4OYJElSJgYxSZKkTAxikiRJmRjEJEmSMjGISZIkZWIQkyRJysQgJkmSlIlBTJIkKRODmCRJUiYGMUmSpEwMYpIkSZkYxCRJkjIxiEmSJGViEJMkScrEICZJkpSJQUySJCkTg5gkSVImBjFJkqRMDGKSJEmZGMQkSZIyMYhJkiRlYhCTJEnKxCAmSZKUiUFMkiQpE4OYJElSJgYxSZKkTAxikiRJmRjEJEmSMjGISZIkZWIQkyRJysQgJkmSlIlBTJIkKRODmCRJUiYGMUmSpEwMYpIkSZkYxCRJkjIxiEmSJGViEJMkScrEICZJkpSJQUySJCkTg5gkSVImBjFJkqRMDGKSJEmZGMQkSZIyMYhJkiRlYhCTJEnKxCAmSZKUiUFMkiQpE4OYJElSJgYxSZKkTAxikiRJmRjEJEmSMjGISZIkZWIQkyRJysQgJkmSlIlBTJIkKRODmCRJUiYGMUmSpEwMYpIkSZkYxCRJkjIxiEmSJGViEJMkScrEICZJkpSJQUySJCkTg5gkSVImBjFJkqRMDGKSJEmZGMQkSZIyMYhJkiRlYhCTJEnKxCAmSZKUiUFMkiQpE4OYJElSJgYxSZKkTAxikiRJmRjEJEmSMjGISZIkZWIQkyRJysQgJkmSlEl5ri+WpEKtra3xT1lZWfwzEA3kZWS5Wlpa4nINGzbw77HT8rKsPd2WaftjIO6DfcF6NTc3h+HDh+/X9errMTyUt31uA/9s1YDCidjU1BQvoAejxsbGUF9f33FBUv/ZsGFD+M1vfhO38UC1du3a8Kc//SkWmAcSxxvHHedesWOPn+3YsSP89Kc/jdtxMKiurg6//OUvQ21tba/ed9ddd4XbbrstDCYcL3V1dSWvG/x8yZIl4Ve/+lVoaGjYr8vCtfvPf/5zWLVqVa/fe//994ebbrqpy7Khuz8p0LFNiv2+pb1sSUG9q88ZKqwRU69w8vz2t78Nxx13XDjqqKP69bPTiYfe3CUfKCwfFyEuYG94wxtCRUVF7kUaUiiQV69ePaBDfk1NTVi2bNkBX8Y9e/aEH/7wh+HYY48NZ5xxxjN+z/LceOONYcKECWH69OlhMCBwcC5RqPbGli1bBnRYLxVg7rzzzvCWt7wljBkzpuixf+2114bTTz89VFZW7tdl4VhZsWJFmDNnTpg/f36v3rt169awffv2ksGaGymO1cLvKqydZd1e8YpXhNGjR8fXdv6ssrKycMEFF4TDDz88/vvmm28OjzzyyDPKgvHjx8fPGTlyZBgKDGLqdRjZvHlz2L179375fO50OXnPPvvsMBDNmjUrXkwGQ9OPhg6aqyicZsyYUbKmjhoxCideq4Fl6tSp4cgjjyx688Y1laBGKCJoD7Qb0J4iYJ5//vkdNykEM2reLrnkklBVVRV/xrHJ6whr3HQRPGfOnLnX56RjnO3CcT158uRwyimn7PWaESNGxD9DhUFM/aqwurjUBaWr13CH1FVtQ08+f38tI39zMS31vt4s275up7585/767FKv69x0sC8FzP5Yz/21Dfb184q9hgLs3HPPLfm+adOmhVe+8pVh1KhRJZexp9/fn/ryfb05bvb1GO3p6/Z1/86bNy/+KeX444+PtTvdhej+OA9689refAfBaOHChR3/3rRpUygvL4/rPWXKlL1em2rNuLFdtGhRl587ceLEcOihh4ahzCCmfUZ4Wr58eTjmmGPC0qVLY9MNJyV3d9ztpJOZE5wqeKqaOUkpNAg2s2fPjv0nHnzwwbBu3br4ultuuSXWOvGZVEPzHgobmiQeeuiheNHiborveeCBB8IhhxwSJk2atFcTKneZnOScyOn7aQ558skn490YF70FCxbEk5wLBsvJ+/jdU089Ffvk0Mxz9NFHh7Fjx8bPWL9+fayeL7xzJThy58Z7du3aFV972GGHxar/VHPGdz/xxBPxzpDfP/zww2Hbtm1xnZ71rGfFbVG4nficxx9/PGzcuDHeRbMeXOQKL9QsH6/hu3kN68F2KFZbx2fSDMT6sU8effTRuK25O2VdWI7C9aGfEfuSfUsNIOvDtkqfzX5g+9I8vWbNmti/hW118sknx9ewbvyM/cy/uds/4ogjOrZzb/pFcTyxT7h4c0FneceNG1fyc9K+Z5nZfuwX7qqf/exnx+/nc1k/9gdNjRxf7GNqLQr3AevIe1euXBm3O6/pXKDxb+782Vapb9bcuXPjcc2xWfh5bJPHHnssblOWn9dw959eQxMd68ofvpvzgtdwbPB+jnv2XWp65Gdsk8WLF3c08bEO7BPWKX0uy8dnci6lz+d44d8UhKVuKjg+2H4nnHDCXscUv+P45bziGC/2fo6hdB5xbrP907ncVYjhuGf5OKbZj2wf3ld4fiRcSziO2e6cVxwXfE/h69KxzDnPvqZJjOOCfZTOJV7DNYTjm23FPmI5n/Oc58TX0ALA/qVJlO3Geci5VngupuOA93J94LzieGf7su3SNZJtmd7He1gmjsN0nrBc1HxyzrEevIZmRJaRbZHWl23PdYP17i5Y8tks186dO+Pxw3IV0/n44zh
2025-10-08 12:30:58 +02:00
"text/plain": [
2025-10-11 17:25:29 +02:00
"<Figure size 620.25x877 with 1 Axes>"
2025-10-08 12:30:58 +02:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
2025-10-11 17:25:29 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmIAAANaCAYAAAAqCCCSAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQAAjVBJREFUeJzt3QV4ZcXdgPHJurs7LO7uTgsUWkopLS0V+pWWuru7u5cWqNBSoRQtFHd3XWTd3XezkuR+zzvZk57cvTe5yWZ3kt339zwp3eTeI3PmzPzP2KkqFAqFIEmSpG2u07bfpSRJkmAgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIgZikiRJiRiISZIkJWIgJkmSlIiBmCRJUiIGYpIkSYkYiEmSJCViICZJkpSIgZgkSVIiBmKSJEmJGIhJkiQlYiAmSZKUiIGYJElSIl1S7ViS8gqFQvypqqqKP+1Rez5Gjquuri4eV6dO7f8ZOztejrXStMzSH+3xGmwJzqu2tjZ07tx5q55Xa/Pw9pz2qbX/u1XtCjdiTU1NLEB3RBs3bgzr1q1rKJDUdubPnx+uvPLKmMbt1Zw5c8J//vOfWGFuS+Q38h33Xqm8x+9WrFgRLr/88piOHcHy5cvDP//5z7B27doWfe/hhx8O999/f+hIyC/V1dVlyw1+P3ny5HDFFVeE9evXb9Vjoey+8cYbw8yZM1v83SeeeCLcfffdTdYNzf1kAR1pUurvdZvqlixQb2o72wtbxNQi3Dz//ve/w7777hv23HPPNt12duOhJU/J2wrHRyFEAfbWt741dO3aNfUhbVeokGfNmtWug/yVK1eGqVOnbvNj3LBhQ/jTn/4U9tlnn3DUUUdt9neO56677goDBgwIw4cPDx0BAQf3EpVqSyxevLhdB+vlApiHHnoovOMd7wi9e/cumfdvu+22cOSRR4bu3btv1WMhr0yfPj2MGTMmjB8/vkXfXbJkSVi2bFnZwJoHKfJqfl/51lnO7fWvf33o1atX/GzxtqqqqsLJJ58cdtttt/jve+65Jzz77LOb1QX9+/eP2+nRo0fYHhiIqcXByKJFi8KaNWu2yvZ50uXmPe6440J7NGrUqFiYdISuH20/6K6ichoxYkTZljpaxKic+Kzal6FDh4Y99tij5MMbZSqBGkERgXZ7ewCtFAHmSSed1PCQQmBGy9sZZ5wR+vXrF39H3uRzBGs8dBF4jhw5stF2sjxOupCvBw8eHA455JBGn+nWrVv82V4YiKlN5ZuLyxUoTX2GJ6SmWhsq2f7WOkb+S2Fa7nstObYtTafW7HNrbbvc54q7Drakgtka57m10mBLt1fqM1RgJ5xwQtnvDRs2LJxzzjmhZ8+eZY+x0v23pdbsryX5ZkvzaKWf29LrO27cuPhTzv777x9bd5oLotviPmjJZ1uyDwKjnXfeueHfCxcuDF26dInnPWTIkEafzVrNeLCdOHFik9sdOHBg2GWXXcL2zEBMW4zgadq0aWHvvfcOU6ZMiV033JQ83fG0k93M3OA0wdPUzE1KpUFgM3r06Dh+4qmnngpz586Nn7v33ntjqxPbpBma71DZ0CXx9NNPx0KLpyn28+STT4addtopDBo0qFEXKk+Z3OTcyNn+6Q558cUX49MYhd6ECRPiTU6BwXHyPf720ksvxTE5dPPstddeoU+fPnEb8+bNi83z+SdXAkee3PjO6tWr42d33XXX2PSftZyx7xdeeCE+GfL3Z555JixdujSe03777RfTIp9ObGfSpElhwYIF8Sma86CQyxfUHB+fYd98hvMgHUq11rFNuoE4P67Jc889F9Oap1POhePInw/jjLiWXFtaADkf0irbNteB9KV7evbs2XF8C2l18MEHx89wbvyO68y/edrffffdG9K5JeOiyE9cEwpvCnSOt2/fvmW3k117jpn047rwVH3ooYfG/bNdzo/rQVcj+YtrTKtF/hpwjnx3xowZMd35THGFxr958ietsrFZY8eOjfmavJnfHmny/PPPxzTl+PkMT//ZZ+ii41z5Yd/cF3yGvMH3yfdcu6zrkd+RJi+//HJDFx/nwDXhnLLtcnxsk3sp2z75hX9TEZZ7qCB/kH4HHHBAozzF38i/3Ffk8VLfJw9l9xH3Numf3ctNBTHke46PPM11JH34Xv7+yFCWkI9Jd+4r8gX7yX8uy8vc81xrusTIF1yj7F7iM5Qh5G/SimvEcR5xxBHxM/QAcH3pEiXduA+51/L3YpYP+C7lA/cV+Z30Je2yMpK0zL7Hdzgm8mF2n3BctHxyz3EefIZuRI6RtMjOl7Sn3OC8mwss2TbHtWrVqph
2025-10-08 12:30:58 +02:00
"text/plain": [
2025-10-11 17:25:29 +02:00
"<Figure size 620.25x877 with 1 Axes>"
2025-10-08 12:30:58 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\n",
2025-10-11 17:25:29 +02:00
"def ocr_trocr(img):\n",
" \"\"\"\n",
" Fixed TrOCR function for better text recognition\n",
" \"\"\"\n",
" # Convert to RGB if necessary\n",
" if img.mode != 'RGB':\n",
" img = img.convert('RGB')\n",
2025-10-08 12:30:58 +02:00
" \n",
2025-10-11 17:25:29 +02:00
" # Process the image - TrOCR expects RGB input\n",
" pixel_values = trocr_processor(images=img, return_tensors=\"pt\").pixel_values.to(device)\n",
2025-10-08 12:30:58 +02:00
" \n",
2025-10-11 17:25:29 +02:00
" # Generate text with optimized parameters\n",
" with torch.no_grad():\n",
" generated_ids = trocr_model.generate(\n",
" pixel_values,\n",
" max_new_tokens=200, # Allow sufficient tokens\n",
" num_beams=4, # Beam search for better quality\n",
" early_stopping=True,\n",
" no_repeat_ngram_size=2, # Prevent repetitions\n",
" length_penalty=1.0,\n",
" do_sample=False # Deterministic output\n",
" )\n",
2025-10-08 12:30:58 +02:00
" \n",
2025-10-11 17:25:29 +02:00
" # Decode the generated text\n",
" generated_text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]\n",
" \n",
" # Show result\n",
" show_ocr_result(img, generated_text)\n",
" \n",
" return generated_text\n",
"\n",
2025-10-08 12:30:58 +02:00
"\n",
"def ocr_paddle(img):\n",
2025-10-11 17:25:29 +02:00
" \"\"\"\n",
" Fixed PaddleOCR function using the correct API\n",
" \"\"\"\n",
" # Convert PIL image to numpy array\n",
" img_array = np.array(img)\n",
" result = paddleocr_model.predict(img_array)\n",
" # Extract text from result\n",
" text_list = []\n",
" breakpoint()\n",
" items = []\n",
" for item in result:\n",
" boxes = item.json[\"res\"][\"rec_boxes\"]\n",
" texts = item.json[\"res\"][\"rec_texts\"] \n",
" for box, text in zip(boxes, texts):\n",
" x1, y1, _, _ = box\n",
" items.append((x1, y1, text))\n",
" \n",
" # line_tolerance=40\n",
" # Sort top-to-bottom (with tolerance), then left-to-right\n",
" # items.sort(key=lambda t: (round(t[1] / line_tolerance), t[0]))\n",
2025-10-08 12:30:58 +02:00
"\n",
2025-10-11 17:25:29 +02:00
" # Extract ordered text\n",
" ordered_texts = [t[2] for t in items]\n",
" res = \" \".join(ordered_texts)\n",
" \n",
" show_ocr_result(img, res)\n",
" \n",
2025-10-08 12:30:58 +02:00
" return res\n",
"\n",
"\n",
"results = []\n",
"\n",
"for pdf_file in os.listdir(PDF_FOLDER):\n",
" if not pdf_file.lower().endswith('.pdf'):\n",
" continue\n",
" pdf_path = os.path.join(PDF_FOLDER, pdf_file)\n",
" images = pdf_to_images(pdf_path)\n",
" \n",
" for i, img in enumerate(images):\n",
" if i != 0:\n",
" break\n",
" page_num = i+1\n",
" ref = pdf_extract_text(pdf_path, page_num=page_num)\n",
" \n",
" # EasyOCR\n",
" easy_text = ocr_easyocr(img)\n",
" results.append({'PDF': pdf_file, 'Page': page_num, 'Model': 'EasyOCR', 'Prediction': easy_text, **evaluate_text(ref, easy_text)})\n",
" \n",
" # TrOCR\n",
" trocr_text = ocr_trocr(img)\n",
" results.append({'PDF': pdf_file, 'Page': page_num, 'Model': 'TrOCR', 'Prediction': trocr_text, **evaluate_text(ref, trocr_text)})\n",
" \n",
" # PaddleOCR\n",
" paddle_text = ocr_paddle(img)\n",
" results.append({'PDF': pdf_file, 'Page': page_num, 'Model': 'PaddleOCR', 'Prediction': paddle_text, **evaluate_text(ref, paddle_text)})"
]
},
{
"cell_type": "markdown",
"id": "0db6dc74",
"metadata": {},
"source": [
"## 4 Save and Analyze Results"
]
},
{
"cell_type": "code",
2025-10-11 17:25:29 +02:00
"execution_count": 31,
2025-10-08 12:30:58 +02:00
"id": "da3155e3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Benchmark results saved!\n",
" WER CER\n",
"Model \n",
"EasyOCR 0.000000 0.000000\n",
2025-10-11 17:25:29 +02:00
"PaddleOCR 0.153846 0.138614\n",
"TrOCR 1.000000 0.990099\n"
2025-10-08 12:30:58 +02:00
]
},
{
"data": {
2025-10-11 17:25:29 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAArMAAAIVCAYAAADConfoAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQAARVJJREFUeJzt3QuczGX///HPWtaiWOUUYR1DOUXkVJTaJB2VKOdEkdMvLMqhhG6Rum10K0p3hUpHtW5EB0RIN+XQyulWTskui1125//4XP1nmtmdXbtrd79zzb6ej8dkv9/5zuw1szPNe67v57quEJfL5RIAAADAQoWcbgAAAACQU4RZAAAAWIswCwAAAGsRZgEAAGAtwiwAAACsRZgFAACAtQizAAAAsBZhFgAAANYizAIAAMBahFkAyEVvvPGGhISEyMaNG8UGvXr1kksuueSi7uPxxx+XW265JdfahItz/fXXy8iRI51uBpBvCLOAxV555RUTnJo3b57hMXr9oEGDsnR/586dk5dfflmuu+46ufTSS03I0Z91n17nT0pKisyfP1/atm0rl112mRQtWlQiIyOld+/ePoHOHfLcl8KFC0ulSpVMmDp48GCW2jdhwgSf+yhUqJBcccUVcscdd8h3332XpftA7tqzZ4+89tprMmbMGLN95MgR87cZMmRIumN1n143fvz4dNf16NFDihQpIqdPnzbb+rrw/lt7X8LDwz23W716tc91oaGhUq5cOencubNs3749W49l6dKl5vVeokQJ87q677775Oeff872c3L48GF58sknpU6dOlK8eHFzf02aNJFJkybJiRMnPMfpeyajx6i3zel7Z9SoURITEyOHDh3KdtsBGxV2ugEAcu7tt982wXHDhg0SFxcnNWvWzPF9JSYmSseOHeWrr74y4VA/KDUsxsbGmhCyZMkS82GvH8xuZ86ckXvvvdccc8MNN5hAo4F27969snjxYnnzzTdl//79cuWVV3pu88wzz0i1atXk7NmzJoDqB/W3334r27Zt8wkpmZk9e7YJ2qmpqXLgwAGZO3eu+f36PDRq1CjHzwGy76WXXjJ/z3bt2pltDZK1atUyf9O01qxZY4KY/uvvusaNG5vw56ZfjDQop6WBNa3BgwebIKpfuv773//KnDlzTNDV11WFChUu+Di+//57ueuuu+Tqq6+Wf/zjH5KQkCCfffaZ2V+vXr0sPRfu+7n99tvl1KlT8vDDD5sQq/SL3dSpU+Xrr7+W//znP57j9b0xZcqUdPdTqlSpdPuy+t7Rx1GyZEnzZVdvAwQ9FwAr/frrry59Cy9ZssRVtmxZ14QJE/wep8cMHDjwgvf36KOPmmP/+c9/prtu1qxZ5roBAwb47Nf71f0vvvhiutucP3/eNW3aNNeBAwfM9vz5882x33//vc9xo0aNMvsXLVp0wTaOHz/eHHv06FGf/du2bTP7x4wZ43JaRo8z0Jw6dcr827NnT1eJEiVydB/JycmuMmXKuJ566imf/b1793aFhoa6Tp486fP7Chcu7OrWrZvrkksuMa8Pt99++808Z8OGDfPsy2q7Vq1aZW773nvv+eyfPXu22f/8889n6bGMHDnSFRIS4jp06JDP/rNnz7qy6s8//3RVqlTJVb58edf27dvTXa/3/eyzz3q2b7zxRtfVV199wfvNyXtn0KBBrqpVq7pSU1Oz3H7AVpQZABb3ypYuXdr0puopVd3Oqf/973/y+uuvy0033eS3JGHgwIGm5017yfRY921effVVUys5dOhQv71neqrVu1fWnzZt2ph/d+/eneP2u3vetNfPW1JSkjmlrT3W2stXuXJlU0uo+/2VYnz00UdyzTXXmGO1h057nNPS07p9+/aVihUrmuO0p+yxxx6T5OTkdL97+PDhUrZsWdObfc8998jRo0d9jtFede0F1x7Epk2bSrFixaR+/fpmW2lvuG5rr5v28P3www8+t9ceSO1Br169ujlGn4c+ffrIH3/84bc8Q0+Zd+vWzbxuWrduneHzuWXLFtNuPQ2uPYwZ0V7BY8eOSfv27X32631r+Yl36cf69evl/Pnz5jWh96m/w83dU5tZm7Iru68rPQvhj/6Ns0rfD/r6mDFjhk+ZgFv58uXlqaeekvx4jPq+3Ldvn8/zDAQrwixgKQ2veoo/LCxMunbtKr/88os5xZkTX3zxhQkfWreYEb1Ow4g74OltdLt79+5yMbQkQWnAyqrjx4+bEKX1mRrw+vXrZ8LcAw884DlGSxDuvPNOeeGFF6RTp07yz3/+U+6++2558cUXpUuXLn6DmQ5kevDBB81pZj2VqzWT3sHwt99+k2bNmsnChQvNfWgtsT5+Lc1w13q6PfHEE/Ljjz+aMK1h99NPP/X7RUHLQzRgahv1dPOff/5pfta/77Bhw8yp6okTJ5rAoo9PH5fb8uXL5ddffzX1yfr4tO3aNj3N/VenvK/777/ftHPy5MnmOfNHX0P6pUZP+evfOLPBYWvXrjUhWY/15g6l3qUGGlhr165tjtUvON6lBpmFWf07p71oCUBuv67076hfwPQ59/fcZcUnn3xivpDol8us0vedv8eoZT8X8xjd5Q3+SjqAoON01zCA7Nu4caM5vbh8+XKzracSr7zySteQIUNyVGYwdOhQc9wPP/yQ4TGbN282xwwfPtxs6ynhC93G36nSFStWmDIBLT94//33TYlE0aJFPeUIWSkzSHuJiIhwxcbG+hz71ltvuQoVKuT65ptvfPbPmTPH3GbNmjWefbodFhbmiouL8+z78ccf05Vd9OjRw9ynvxIC9+lc9+Ns3769zylefb701PuJEyc8+/Q0sB67du1az75ly5aZfcWKFXPt27fPs//VV181+/W0utvp06fTtePdd981x3399dfpnreuXbumO977dP63337rKlmypKtjx45ZOr3+8MMPuy6//HK/15UrV8518803e7ajoqJM+YF64IEHXPfff7/nuqZNm7pq1aqVrl3+/tZ60ftKW2Ywb94887rSkgV9LdSsWdOUDWzYsMGVFR999JGrePHi5m/kfo1nV+nSpV0NGzbM8vFaZpDRY+zfv/9Fv3f0Nf3YY4/l6LEANmEAGGAh7bXTU5buQTfaO6Y9hf/+979l+vTpfgfIZObkyZPmX53BICPu69y9Yu5/M7uNP2lPSeupdm33hcoRvH3wwQdmgIvmUD2tqwPCtBdVB9a0bNnSHPPee+9J3bp1zele7ely015HtWrVKs+x7nbVqFHDs92gQQPzO7TnU2mPqJYhaK+plgSkpX8Db48++qjPPj0lrL3CeupX79tNBxe1aNHCs+2emULbWaVKlXT7tT16+l9pL6Cb9iTr6Xudlklt3rzZcxrabcCAARk+p/p86GO79dZbTe+u9vhfiPZaZ9Tz2apVK9NzrD2P+jxoyYH2kruve/75583P2lOsp8L99fBrb7v2aKdVpkyZdPu0vMKblkm89dZbZlDYhejgLO311vIAfdz6t9PXtZZnuEVFRZm2fvPNNxnej74nsvt+0Ne/DmBMy9/7IbvvHf3beL/2gWBFmAUso+FAw4YGWZ0WyTvsaJBduXKlCSTZ4f4AdofarAReDXoXuo0/OmWQnm6Oj4+XefPmmdHd2alLVDpzgXeg0dO6OoJeT+1v2rTJ7NOyC52aSUONP1qi4M07OHqHAT3tr7TeVcOK1tRmRdr7c4c+9/1ldJx7FLvW9/rb7317LbfQEgR9PaR9PPr8pqX1vf5oENbaaz01rbNQpK09zkxGp+S1ZODDDz80QVWn3NL2aIhV+iVCSzb0NLm+hrVcxV+JgX4pSxvgMjJu3DgT3jXQ6+/V5ySjOti0tI5VXz9aG+6eWuvpp582z7mWHaiffvrJlHFkRt8T2X0/aD11Vh9jdt87+rdJ+yULCEaEWcAyX375pfz+++/mw1ov/nptsxtmtQfTPaAoo6mt9DrlnqbIPcBl69at2ZoOS2tO3T2bWsOqIUZrRnfu3Jnjyfv1dhrmP/74Y1NrqAFBe1J18JT2tvmTNixm1Jud0/rJrN5fRsdl5fbam6h1qyNGjDB/A/d0Zbfddpt
2025-10-08 12:30:58 +02:00
"text/plain": [
"<Figure size 800x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df_results = pd.DataFrame(results)\n",
"df_results.to_csv(os.path.join(OUTPUT_FOLDER, 'ai_ocr_benchmark_results.csv'), index=False)\n",
"print('Benchmark results saved!')\n",
"\n",
"# Summary by model\n",
"summary = df_results.groupby('Model')[['WER', 'CER']].mean()\n",
"print(summary)\n",
"\n",
"# Plot\n",
"summary.plot(kind='bar', figsize=(8,5), title='AI OCR Benchmark (WER & CER)')\n",
"plt.ylabel('Error Rate')\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
2025-10-11 17:25:29 +02:00
"display_name": ".venv (3.13.5)",
"language": "python",
2025-10-08 12:30:58 +02:00
"name": "python3"
},
"language_info": {
2025-10-11 17:25:29 +02:00
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
2025-10-08 12:30:58 +02:00
"name": "python",
2025-10-11 17:25:29 +02:00
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
2025-10-08 12:30:58 +02:00
}
},
"nbformat": 4,
"nbformat_minor": 5
}