Hyper param serach results

This commit is contained in:
2025-12-07 20:07:59 +01:00
parent 83522273b9
commit 09ec70de27
7 changed files with 1897 additions and 1868 deletions

2
.gitignore vendored
View File

@@ -1,5 +1,5 @@
~$*.docx
results/
__pycache__/*
__pycache__/
dataset
results

View File

@@ -1,496 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 108,
"id": "93809ffc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pip in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (25.3)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: jupyter in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (1.1.1)\n",
"Requirement already satisfied: notebook in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter) (7.5.0)\n",
"Requirement already satisfied: jupyter-console in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter) (6.6.3)\n",
"Requirement already satisfied: nbconvert in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter) (7.16.6)\n",
"Requirement already satisfied: ipykernel in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from jupyter) (7.1.0)\n",
"Requirement already satisfied: ipywidgets in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter) (8.1.8)\n",
"Requirement already satisfied: jupyterlab in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter) (4.5.0)\n",
"Requirement already satisfied: comm>=0.1.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (0.2.3)\n",
"Requirement already satisfied: debugpy>=1.6.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (1.8.17)\n",
"Requirement already satisfied: ipython>=7.23.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (9.8.0)\n",
"Requirement already satisfied: jupyter-client>=8.0.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (8.6.3)\n",
"Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (5.9.1)\n",
"Requirement already satisfied: matplotlib-inline>=0.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (0.2.1)\n",
"Requirement already satisfied: nest-asyncio>=1.4 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (1.6.0)\n",
"Requirement already satisfied: packaging>=22 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (25.0)\n",
"Requirement already satisfied: psutil>=5.7 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (7.1.3)\n",
"Requirement already satisfied: pyzmq>=25 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (27.1.0)\n",
"Requirement already satisfied: tornado>=6.2 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (6.5.2)\n",
"Requirement already satisfied: traitlets>=5.4.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (5.14.3)\n",
"Requirement already satisfied: colorama>=0.4.4 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.4.6)\n",
"Requirement already satisfied: decorator>=4.3.2 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (5.2.1)\n",
"Requirement already satisfied: ipython-pygments-lexers>=1.0.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (1.1.1)\n",
"Requirement already satisfied: jedi>=0.18.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.19.2)\n",
"Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (3.0.52)\n",
"Requirement already satisfied: pygments>=2.11.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (2.19.2)\n",
"Requirement already satisfied: stack_data>=0.6.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.6.3)\n",
"Requirement already satisfied: typing_extensions>=4.6 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (4.15.0)\n",
"Requirement already satisfied: wcwidth in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=7.23.1->ipykernel->jupyter) (0.2.14)\n",
"Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from jedi>=0.18.1->ipython>=7.23.1->ipykernel->jupyter) (0.8.5)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from jupyter-client>=8.0.0->ipykernel->jupyter) (2.9.0.post0)\n",
"Requirement already satisfied: platformdirs>=2.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel->jupyter) (4.5.1)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from python-dateutil>=2.8.2->jupyter-client>=8.0.0->ipykernel->jupyter) (1.17.0)\n",
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel->jupyter) (2.2.1)\n",
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel->jupyter) (3.0.1)\n",
"Requirement already satisfied: pure-eval in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel->jupyter) (0.2.3)\n",
"Requirement already satisfied: widgetsnbextension~=4.0.14 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ipywidgets->jupyter) (4.0.15)\n",
"Requirement already satisfied: jupyterlab_widgets~=3.0.15 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ipywidgets->jupyter) (3.0.16)\n",
"Requirement already satisfied: async-lru>=1.0.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab->jupyter) (2.0.5)\n",
"Requirement already satisfied: httpx<1,>=0.25.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab->jupyter) (0.28.1)\n",
"Requirement already satisfied: jinja2>=3.0.3 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab->jupyter) (3.1.6)\n",
"Requirement already satisfied: jupyter-lsp>=2.0.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab->jupyter) (2.3.0)\n",
"Requirement already satisfied: jupyter-server<3,>=2.4.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab->jupyter) (2.17.0)\n",
"Requirement already satisfied: jupyterlab-server<3,>=2.28.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab->jupyter) (2.28.0)\n",
"Requirement already satisfied: notebook-shim>=0.2 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab->jupyter) (0.2.4)\n",
"Requirement already satisfied: setuptools>=41.1.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab->jupyter) (65.5.0)\n",
"Requirement already satisfied: anyio in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (4.12.0)\n",
"Requirement already satisfied: certifi in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (2025.11.12)\n",
"Requirement already satisfied: httpcore==1.* in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (1.0.9)\n",
"Requirement already satisfied: idna in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (3.11)\n",
"Requirement already satisfied: h11>=0.16 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from httpcore==1.*->httpx<1,>=0.25.0->jupyterlab->jupyter) (0.16.0)\n",
"Requirement already satisfied: argon2-cffi>=21.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (25.1.0)\n",
"Requirement already satisfied: jupyter-events>=0.11.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.12.0)\n",
"Requirement already satisfied: jupyter-server-terminals>=0.4.4 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.5.3)\n",
"Requirement already satisfied: nbformat>=5.3.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (5.10.4)\n",
"Requirement already satisfied: overrides>=5.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (7.7.0)\n",
"Requirement already satisfied: prometheus-client>=0.9 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.23.1)\n",
"Requirement already satisfied: pywinpty>=2.0.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (3.0.2)\n",
"Requirement already satisfied: send2trash>=1.8.2 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.8.3)\n",
"Requirement already satisfied: terminado>=0.8.3 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.18.1)\n",
"Requirement already satisfied: websocket-client>=1.7 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.9.0)\n",
"Requirement already satisfied: babel>=2.10 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (2.17.0)\n",
"Requirement already satisfied: json5>=0.9.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (0.12.1)\n",
"Requirement already satisfied: jsonschema>=4.18.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (4.25.1)\n",
"Requirement already satisfied: requests>=2.31 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (2.32.5)\n",
"Requirement already satisfied: argon2-cffi-bindings in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (25.1.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jinja2>=3.0.3->jupyterlab->jupyter) (3.0.3)\n",
"Requirement already satisfied: attrs>=22.2.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (25.4.0)\n",
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (2025.9.1)\n",
"Requirement already satisfied: referencing>=0.28.4 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (0.37.0)\n",
"Requirement already satisfied: rpds-py>=0.7.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (0.30.0)\n",
"Requirement already satisfied: python-json-logger>=2.0.4 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (4.0.0)\n",
"Requirement already satisfied: pyyaml>=5.3 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (6.0.2)\n",
"Requirement already satisfied: rfc3339-validator in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.1.4)\n",
"Requirement already satisfied: rfc3986-validator>=0.1.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.1.1)\n",
"Requirement already satisfied: fqdn in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.5.1)\n",
"Requirement already satisfied: isoduration in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (20.11.0)\n",
"Requirement already satisfied: jsonpointer>1.13 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (3.0.0)\n",
"Requirement already satisfied: rfc3987-syntax>=1.1.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.1.0)\n",
"Requirement already satisfied: uri-template in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.3.0)\n",
"Requirement already satisfied: webcolors>=24.6.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (25.10.0)\n",
"Requirement already satisfied: beautifulsoup4 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from nbconvert->jupyter) (4.14.3)\n",
"Requirement already satisfied: bleach!=5.0.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from bleach[css]!=5.0.0->nbconvert->jupyter) (6.3.0)\n",
"Requirement already satisfied: defusedxml in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from nbconvert->jupyter) (0.7.1)\n",
"Requirement already satisfied: jupyterlab-pygments in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from nbconvert->jupyter) (0.3.0)\n",
"Requirement already satisfied: mistune<4,>=2.0.3 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from nbconvert->jupyter) (3.1.4)\n",
"Requirement already satisfied: nbclient>=0.5.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from nbconvert->jupyter) (0.10.2)\n",
"Requirement already satisfied: pandocfilters>=1.4.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from nbconvert->jupyter) (1.5.1)\n",
"Requirement already satisfied: webencodings in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from bleach!=5.0.0->bleach[css]!=5.0.0->nbconvert->jupyter) (0.5.1)\n",
"Requirement already satisfied: tinycss2<1.5,>=1.1.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from bleach[css]!=5.0.0->nbconvert->jupyter) (1.4.0)\n",
"Requirement already satisfied: fastjsonschema>=2.15 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from nbformat>=5.3.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2.21.2)\n",
"Requirement already satisfied: charset_normalizer<4,>=2 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests>=2.31->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (3.4.4)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests>=2.31->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (2.6.0)\n",
"Requirement already satisfied: lark>=1.2.2 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from rfc3987-syntax>=1.1.0->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.3.1)\n",
"Requirement already satisfied: cffi>=1.0.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from argon2-cffi-bindings->argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2.0.0)\n",
"Requirement already satisfied: pycparser in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2.23)\n",
"Requirement already satisfied: soupsieve>=1.6.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from beautifulsoup4->nbconvert->jupyter) (2.8)\n",
"Requirement already satisfied: arrow>=0.15.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.4.0)\n",
"Requirement already satisfied: tzdata in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from arrow>=0.15.0->isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2025.2)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: ipywidgets in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (8.1.8)\n",
"Requirement already satisfied: comm>=0.1.3 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipywidgets) (0.2.3)\n",
"Requirement already satisfied: ipython>=6.1.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipywidgets) (9.8.0)\n",
"Requirement already satisfied: traitlets>=4.3.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipywidgets) (5.14.3)\n",
"Requirement already satisfied: widgetsnbextension~=4.0.14 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ipywidgets) (4.0.15)\n",
"Requirement already satisfied: jupyterlab_widgets~=3.0.15 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ipywidgets) (3.0.16)\n",
"Requirement already satisfied: colorama>=0.4.4 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=6.1.0->ipywidgets) (0.4.6)\n",
"Requirement already satisfied: decorator>=4.3.2 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=6.1.0->ipywidgets) (5.2.1)\n",
"Requirement already satisfied: ipython-pygments-lexers>=1.0.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=6.1.0->ipywidgets) (1.1.1)\n",
"Requirement already satisfied: jedi>=0.18.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=6.1.0->ipywidgets) (0.19.2)\n",
"Requirement already satisfied: matplotlib-inline>=0.1.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=6.1.0->ipywidgets) (0.2.1)\n",
"Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=6.1.0->ipywidgets) (3.0.52)\n",
"Requirement already satisfied: pygments>=2.11.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=6.1.0->ipywidgets) (2.19.2)\n",
"Requirement already satisfied: stack_data>=0.6.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=6.1.0->ipywidgets) (0.6.3)\n",
"Requirement already satisfied: typing_extensions>=4.6 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=6.1.0->ipywidgets) (4.15.0)\n",
"Requirement already satisfied: wcwidth in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets) (0.2.14)\n",
"Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from jedi>=0.18.1->ipython>=6.1.0->ipywidgets) (0.8.5)\n",
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (2.2.1)\n",
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (3.0.1)\n",
"Requirement already satisfied: pure-eval in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (0.2.3)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: ipykernel in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (7.1.0)\n",
"Requirement already satisfied: comm>=0.1.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (0.2.3)\n",
"Requirement already satisfied: debugpy>=1.6.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (1.8.17)\n",
"Requirement already satisfied: ipython>=7.23.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (9.8.0)\n",
"Requirement already satisfied: jupyter-client>=8.0.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (8.6.3)\n",
"Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (5.9.1)\n",
"Requirement already satisfied: matplotlib-inline>=0.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (0.2.1)\n",
"Requirement already satisfied: nest-asyncio>=1.4 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (1.6.0)\n",
"Requirement already satisfied: packaging>=22 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (25.0)\n",
"Requirement already satisfied: psutil>=5.7 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (7.1.3)\n",
"Requirement already satisfied: pyzmq>=25 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (27.1.0)\n",
"Requirement already satisfied: tornado>=6.2 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (6.5.2)\n",
"Requirement already satisfied: traitlets>=5.4.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (5.14.3)\n",
"Requirement already satisfied: colorama>=0.4.4 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel) (0.4.6)\n",
"Requirement already satisfied: decorator>=4.3.2 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel) (5.2.1)\n",
"Requirement already satisfied: ipython-pygments-lexers>=1.0.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel) (1.1.1)\n",
"Requirement already satisfied: jedi>=0.18.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel) (0.19.2)\n",
"Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel) (3.0.52)\n",
"Requirement already satisfied: pygments>=2.11.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel) (2.19.2)\n",
"Requirement already satisfied: stack_data>=0.6.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel) (0.6.3)\n",
"Requirement already satisfied: typing_extensions>=4.6 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel) (4.15.0)\n",
"Requirement already satisfied: wcwidth in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=7.23.1->ipykernel) (0.2.14)\n",
"Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from jedi>=0.18.1->ipython>=7.23.1->ipykernel) (0.8.5)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from jupyter-client>=8.0.0->ipykernel) (2.9.0.post0)\n",
"Requirement already satisfied: platformdirs>=2.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel) (4.5.1)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from python-dateutil>=2.8.2->jupyter-client>=8.0.0->ipykernel) (1.17.0)\n",
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel) (2.2.1)\n",
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel) (3.0.1)\n",
"Requirement already satisfied: pure-eval in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel) (0.2.3)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install --upgrade pip\n",
"%pip install --upgrade jupyter\n",
"%pip install --upgrade ipywidgets\n",
"%pip install --upgrade ipykernel"
]
},
{
"cell_type": "code",
"execution_count": 109,
"id": "48724594",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pdf2image in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (1.17.0)\n",
"Requirement already satisfied: pillow in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (12.0.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: PyMuPDF in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (1.26.6)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: pandas in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (2.3.3)\n",
"Requirement already satisfied: numpy>=1.23.2 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pandas) (2.3.5)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from pandas) (2.9.0.post0)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pandas) (2025.2)\n",
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pandas) (2025.2)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: matplotlib in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (3.10.7)\n",
"Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib) (1.3.3)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib) (0.12.1)\n",
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib) (4.61.0)\n",
"Requirement already satisfied: kiwisolver>=1.3.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib) (1.4.9)\n",
"Requirement already satisfied: numpy>=1.23 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib) (2.3.5)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from matplotlib) (25.0)\n",
"Requirement already satisfied: pillow>=8 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib) (12.0.0)\n",
"Requirement already satisfied: pyparsing>=3 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib) (3.2.5)\n",
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from matplotlib) (2.9.0.post0)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from python-dateutil>=2.7->matplotlib) (1.17.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: seaborn in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (0.13.2)\n",
"Requirement already satisfied: numpy!=1.24.0,>=1.20 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from seaborn) (2.3.5)\n",
"Requirement already satisfied: pandas>=1.2 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from seaborn) (2.3.3)\n",
"Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from seaborn) (3.10.7)\n",
"Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.3.3)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (0.12.1)\n",
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (4.61.0)\n",
"Requirement already satisfied: kiwisolver>=1.3.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.4.9)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (25.0)\n",
"Requirement already satisfied: pillow>=8 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (12.0.0)\n",
"Requirement already satisfied: pyparsing>=3 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (3.2.5)\n",
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (2.9.0.post0)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pandas>=1.2->seaborn) (2025.2)\n",
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pandas>=1.2->seaborn) (2025.2)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn) (1.17.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"# Install necessary packages\n",
"%pip install pdf2image pillow \n",
"# pdf reading\n",
"%pip install PyMuPDF\n",
"\n",
"# Data analysis and visualization\n",
"%pip install pandas\n",
"%pip install matplotlib\n",
"%pip install seaborn"
]
},
{
"cell_type": "code",
"execution_count": 110,
"id": "e1f793b6",
"metadata": {},
"outputs": [],
"source": [
"import os, json\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from pdf2image import convert_from_path\n",
"from PIL import Image, ImageOps\n",
"import fitz # PyMuPDF\n",
"import re\n",
"from datetime import datetime\n",
"from typing import List\n",
"import shutil"
]
},
{
"cell_type": "code",
"execution_count": 111,
"id": "1652a78e",
"metadata": {},
"outputs": [],
"source": [
"def pdf_to_images(pdf_path: str, output_dir: str, dpi: int = 300):\n",
" \"\"\"\n",
" Render a PDF into a list of PIL Images using PyMuPDF or pdf2image.\n",
" 'pages' is 1-based (e.g., range(1, 10) -> pages 19).\n",
" \"\"\"\n",
" if fitz is not None:\n",
" doc = fitz.open(pdf_path)\n",
" total_pages = len(doc)\n",
"\n",
" # Adjust page indices (PyMuPDF uses 0-based indexing)\n",
" page_indices = list(range(total_pages))\n",
"\n",
" for i in page_indices:\n",
" page = doc.load_page(i)\n",
" mat = fitz.Matrix(dpi / 72.0, dpi / 72.0)\n",
" pix = page.get_pixmap(matrix=mat, alpha=False)\n",
" img = Image.frombytes(\"RGB\", [pix.width, pix.height], pix.samples)\n",
" # Build filename\n",
" out_path = os.path.join(\n",
" output_dir,\n",
" f\"page_{i + 1:04d}.png\"\n",
" )\n",
"\n",
" img.save(out_path, \"PNG\")\n",
" doc.close()\n",
" else:\n",
" raise RuntimeError(\"Install PyMuPDF or pdf2image to convert PDFs.\")"
]
},
{
"cell_type": "code",
"execution_count": 112,
"id": "f523dd58",
"metadata": {},
"outputs": [],
"source": [
"import fitz\n",
"import re\n",
"import os\n",
"\n",
"def _pdf_extract_text_structured(page, margin_threshold=50):\n",
" \"\"\"\n",
" Extract text using PyMuPDF's dict mode which preserves\n",
" the actual line structure from the PDF.\n",
" \"\"\"\n",
" data = page.get_text(\"dict\")\n",
" \n",
" # Collect all lines with their Y position\n",
" all_lines = []\n",
" margin_text_parts = [] # Collect vertical/margin text\n",
" margin_y_positions = []\n",
" \n",
" for block in data.get(\"blocks\", []):\n",
" if block.get(\"type\") != 0: # Skip non-text blocks\n",
" continue\n",
" \n",
" block_bbox = block.get(\"bbox\", (0, 0, 0, 0))\n",
" block_width = block_bbox[2] - block_bbox[0]\n",
" block_height = block_bbox[3] - block_bbox[1]\n",
" \n",
" # Detect vertical/margin text\n",
" is_margin_text = (block_bbox[0] < margin_threshold or \n",
" block_height > block_width * 2)\n",
" \n",
" for line in block.get(\"lines\", []):\n",
" direction = line.get(\"dir\", (1, 0))\n",
" bbox = line.get(\"bbox\", (0, 0, 0, 0))\n",
" y_center = (bbox[1] + bbox[3]) / 2\n",
" x_start = bbox[0]\n",
" \n",
" # Collect text from all spans\n",
" line_text = \"\"\n",
" for span in line.get(\"spans\", []):\n",
" text = span.get(\"text\", \"\")\n",
" line_text += text\n",
" \n",
" line_text = line_text.strip()\n",
" line_text = re.sub(r\"[•▪◦●❖▶■\\uf000-\\uf0ff]\", \"\", line_text)\n",
" \n",
" if not line_text:\n",
" continue\n",
" \n",
" # Check if this is margin/vertical text\n",
" if is_margin_text or abs(direction[0]) < 0.9:\n",
" margin_text_parts.append((y_center, line_text))\n",
" margin_y_positions.append(y_center)\n",
" else:\n",
" all_lines.append((y_center, x_start, line_text))\n",
" \n",
" # Reconstruct margin text as single line at its vertical center\n",
" if margin_text_parts:\n",
" # Sort by Y position (top to bottom) and join\n",
" margin_text_parts.sort(key=lambda x: x[0])\n",
" full_margin_text = \" \".join(part[1] for part in margin_text_parts)\n",
" # Calculate vertical center of the watermark\n",
" avg_y = sum(margin_y_positions) / len(margin_y_positions)\n",
" # Add as a single line\n",
" all_lines.append((avg_y, -1, full_margin_text)) # x=-1 to sort first\n",
" \n",
" if not all_lines:\n",
" return \"\"\n",
" \n",
" # Sort by Y first, then by X\n",
" all_lines.sort(key=lambda x: (x[0], x[1]))\n",
" \n",
" # Group lines at same vertical position\n",
" merged_rows = []\n",
" current_row = [all_lines[0]]\n",
" current_y = all_lines[0][0]\n",
" \n",
" for y_center, x_start, text in all_lines[1:]:\n",
" if abs(y_center - current_y) <= 2:\n",
" current_row.append((y_center, x_start, text))\n",
" else:\n",
" current_row.sort(key=lambda x: x[1])\n",
" row_text = \" \".join(item[2] for item in current_row)\n",
" merged_rows.append((current_y, row_text))\n",
" current_row = [(y_center, x_start, text)]\n",
" current_y = y_center\n",
" \n",
" if current_row:\n",
" current_row.sort(key=lambda x: x[1])\n",
" row_text = \" \".join(item[2] for item in current_row)\n",
" merged_rows.append((current_y, row_text))\n",
" \n",
" # Sort rows by Y and extract text\n",
" merged_rows.sort(key=lambda x: x[0])\n",
" lines = [row[1] for row in merged_rows]\n",
" \n",
" # Join and clean up\n",
" text = \"\\n\".join(lines)\n",
" text = re.sub(r\" +\", \" \", text).strip()\n",
" text = re.sub(r\"\\n{3,}\", \"\\n\\n\", text).strip()\n",
" \n",
" return text\n",
"\n",
"def pdf_extract_text(pdf_path, output_dir, margin_threshold=50):\n",
" os.makedirs(output_dir, exist_ok=True)\n",
" doc = fitz.open(pdf_path)\n",
" \n",
" for i, page in enumerate(doc):\n",
" text = _pdf_extract_text_structured(page, margin_threshold)\n",
" if not text.strip():\n",
" continue\n",
" out_path = os.path.join(output_dir, f\"page_{i + 1:04d}.txt\")\n",
" with open(out_path, \"w\", encoding=\"utf-8\") as f:\n",
" f.write(text)"
]
},
{
"cell_type": "code",
"execution_count": 113,
"id": "9f64a8c0",
"metadata": {},
"outputs": [],
"source": [
"PDF_FOLDER = './instructions' # Folder containing PDF files\n",
"OUTPUT_FOLDER = './dataset'\n",
"\n",
"os.makedirs(OUTPUT_FOLDER, exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": 114,
"id": "41e4651d",
"metadata": {},
"outputs": [],
"source": [
"i = 0\n",
"\n",
"pdf_files = sorted([\n",
" fname for fname in os.listdir(PDF_FOLDER)\n",
" if fname.lower().endswith(\".pdf\")\n",
"])\n",
"\n",
"\n",
"for fname in pdf_files:\n",
" # build output directories\n",
" out_img_path = os.path.join(OUTPUT_FOLDER, str(i), \"img\")\n",
" out_txt_path = os.path.join(OUTPUT_FOLDER, str(i), \"txt\")\n",
"\n",
" os.makedirs(out_img_path, exist_ok=True)\n",
" os.makedirs(out_txt_path, exist_ok=True)\n",
"\n",
" # source and destination PDF paths\n",
" src_pdf = os.path.join(PDF_FOLDER, fname)\n",
" pdf_path = os.path.join(OUTPUT_FOLDER, str(i), fname)\n",
"\n",
" # copy PDF into numbered folder\n",
" shutil.copy(src_pdf, pdf_path)\n",
"\n",
" # convert PDF → images\n",
" pdf_to_images(\n",
" pdf_path=pdf_path,\n",
" output_dir=out_img_path,\n",
" dpi=300\n",
" )\n",
" pdf_extract_text(\n",
" pdf_path=pdf_path,\n",
" output_dir=out_txt_path,\n",
" margin_threshold=40\n",
" )\n",
"\n",
" i += 1"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because one or more lines are too long

View File

@@ -112,13 +112,23 @@ def assemble_from_paddle_result(paddleocr_predict, min_score=0.0, line_tol_facto
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--pdf-folder", required=True)
# dataset root folder
parser.add_argument("--pdf-folder", required=True)
#Whether to use document image orientation classification.
parser.add_argument("--use-doc-orientation-classify", type=lambda s: s.lower()=="true", default=False)
# Whether to use text image unwarping.
parser.add_argument("--use-doc-unwarping", type=lambda s: s.lower()=="true", default=False)
# Whether to use text line orientation classification.
parser.add_argument("--textline-orientation", type=lambda s: s.lower()=="true", default=True)
parser.add_argument("--text-det-box-thresh", type=float, default=0.6)
# Detection pixel threshold for the text detection model. Pixels with scores greater than this threshold in the output probability map are considered text pixels.
parser.add_argument("--text-det-thresh", type=float, default=0.0)
# Detection box threshold for the text detection model. A detection result is considered a text region if the average score of all pixels within the border of the result is greater than this threshold.
parser.add_argument("--text-det-box-thresh", type=float, default=0.0)
# Text detection expansion coefficient, which expands the text region using this method. The larger the value, the larger the expansion area.
parser.add_argument("--text-det-unclip-ratio", type=float, default=1.5)
# Text recognition threshold. Text results with scores greater than this threshold are retained.
parser.add_argument("--text-rec-score-thresh", type=float, default=0.0)
parser.add_argument("--line-tolerance", type=float, default=0.6)
parser.add_argument("--min-box-score", type=float, default=0.0)
# text location
parser.add_argument("--lang", default="es")
args = parser.parse_args()
@@ -140,12 +150,15 @@ def main():
tp0 = time.time()
out = ocr.predict(
arr,
use_doc_orientation_classify=args.use_doc_orientation_classify,
use_doc_unwarping=args.use_doc_unwarping,
use_textline_orientation=args.textline_orientation, #str2bool Whether to use text line orientation classification.
text_det_thresh=args.text_det_thresh,
text_det_box_thresh=args.text_det_box_thresh,
text_det_unclip_ratio=args.text_det_unclip_ratio,
text_rec_score_thresh=args.text_rec_score_thresh,
use_textline_orientation=args.textline_orientation
text_rec_score_thresh=args.text_rec_score_thresh
)
pred = assemble_from_paddle_result(out, args.min_box_score, args.line_tolerance)
pred = assemble_from_paddle_result(out)
time_per_page_list.append(float(time.time() - tp0))
m = evaluate_text(ref, pred)
cer_list.append(m["CER"])

504
src/prepare_dataset.ipynb Normal file
View File

@@ -0,0 +1,504 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "93809ffc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pip in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (25.3)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: jupyter in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (1.1.1)\n",
"Requirement already satisfied: notebook in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter) (7.5.0)\n",
"Requirement already satisfied: jupyter-console in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter) (6.6.3)\n",
"Requirement already satisfied: nbconvert in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter) (7.16.6)\n",
"Requirement already satisfied: ipykernel in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter) (7.1.0)\n",
"Requirement already satisfied: ipywidgets in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter) (8.1.8)\n",
"Requirement already satisfied: jupyterlab in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter) (4.5.0)\n",
"Requirement already satisfied: comm>=0.1.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (0.2.3)\n",
"Requirement already satisfied: debugpy>=1.6.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (1.8.17)\n",
"Requirement already satisfied: ipython>=7.23.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (9.8.0)\n",
"Requirement already satisfied: jupyter-client>=8.0.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (8.6.3)\n",
"Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (5.9.1)\n",
"Requirement already satisfied: matplotlib-inline>=0.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (0.2.1)\n",
"Requirement already satisfied: nest-asyncio>=1.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (1.6.0)\n",
"Requirement already satisfied: packaging>=22 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (25.0)\n",
"Requirement already satisfied: psutil>=5.7 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (7.1.3)\n",
"Requirement already satisfied: pyzmq>=25 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (27.1.0)\n",
"Requirement already satisfied: tornado>=6.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (6.5.2)\n",
"Requirement already satisfied: traitlets>=5.4.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (5.14.3)\n",
"Requirement already satisfied: colorama>=0.4.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.4.6)\n",
"Requirement already satisfied: decorator>=4.3.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (5.2.1)\n",
"Requirement already satisfied: ipython-pygments-lexers>=1.0.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (1.1.1)\n",
"Requirement already satisfied: jedi>=0.18.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.19.2)\n",
"Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (3.0.52)\n",
"Requirement already satisfied: pygments>=2.11.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (2.19.2)\n",
"Requirement already satisfied: stack_data>=0.6.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.6.3)\n",
"Requirement already satisfied: typing_extensions>=4.6 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (4.15.0)\n",
"Requirement already satisfied: wcwidth in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=7.23.1->ipykernel->jupyter) (0.2.14)\n",
"Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jedi>=0.18.1->ipython>=7.23.1->ipykernel->jupyter) (0.8.5)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-client>=8.0.0->ipykernel->jupyter) (2.9.0.post0)\n",
"Requirement already satisfied: platformdirs>=2.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel->jupyter) (4.5.1)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from python-dateutil>=2.8.2->jupyter-client>=8.0.0->ipykernel->jupyter) (1.17.0)\n",
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel->jupyter) (2.2.1)\n",
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel->jupyter) (3.0.1)\n",
"Requirement already satisfied: pure-eval in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel->jupyter) (0.2.3)\n",
"Requirement already satisfied: widgetsnbextension~=4.0.14 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets->jupyter) (4.0.15)\n",
"Requirement already satisfied: jupyterlab_widgets~=3.0.15 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets->jupyter) (3.0.16)\n",
"Requirement already satisfied: async-lru>=1.0.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (2.0.5)\n",
"Requirement already satisfied: httpx<1,>=0.25.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (0.28.1)\n",
"Requirement already satisfied: jinja2>=3.0.3 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (3.1.6)\n",
"Requirement already satisfied: jupyter-lsp>=2.0.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (2.3.0)\n",
"Requirement already satisfied: jupyter-server<3,>=2.4.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (2.17.0)\n",
"Requirement already satisfied: jupyterlab-server<3,>=2.28.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (2.28.0)\n",
"Requirement already satisfied: notebook-shim>=0.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (0.2.4)\n",
"Requirement already satisfied: setuptools>=41.1.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (65.5.0)\n",
"Requirement already satisfied: anyio in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (4.12.0)\n",
"Requirement already satisfied: certifi in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (2025.11.12)\n",
"Requirement already satisfied: httpcore==1.* in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (1.0.9)\n",
"Requirement already satisfied: idna in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (3.11)\n",
"Requirement already satisfied: h11>=0.16 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from httpcore==1.*->httpx<1,>=0.25.0->jupyterlab->jupyter) (0.16.0)\n",
"Requirement already satisfied: argon2-cffi>=21.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (25.1.0)\n",
"Requirement already satisfied: jupyter-events>=0.11.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.12.0)\n",
"Requirement already satisfied: jupyter-server-terminals>=0.4.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.5.3)\n",
"Requirement already satisfied: nbformat>=5.3.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (5.10.4)\n",
"Requirement already satisfied: overrides>=5.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (7.7.0)\n",
"Requirement already satisfied: prometheus-client>=0.9 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.23.1)\n",
"Requirement already satisfied: pywinpty>=2.0.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (3.0.2)\n",
"Requirement already satisfied: send2trash>=1.8.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.8.3)\n",
"Requirement already satisfied: terminado>=0.8.3 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.18.1)\n",
"Requirement already satisfied: websocket-client>=1.7 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.9.0)\n",
"Requirement already satisfied: babel>=2.10 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (2.17.0)\n",
"Requirement already satisfied: json5>=0.9.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (0.12.1)\n",
"Requirement already satisfied: jsonschema>=4.18.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (4.25.1)\n",
"Requirement already satisfied: requests>=2.31 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (2.32.5)\n",
"Requirement already satisfied: argon2-cffi-bindings in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (25.1.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jinja2>=3.0.3->jupyterlab->jupyter) (3.0.3)\n",
"Requirement already satisfied: attrs>=22.2.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (25.4.0)\n",
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (2025.9.1)\n",
"Requirement already satisfied: referencing>=0.28.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (0.37.0)\n",
"Requirement already satisfied: rpds-py>=0.7.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (0.30.0)\n",
"Requirement already satisfied: python-json-logger>=2.0.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (4.0.0)\n",
"Requirement already satisfied: pyyaml>=5.3 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (6.0.2)\n",
"Requirement already satisfied: rfc3339-validator in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.1.4)\n",
"Requirement already satisfied: rfc3986-validator>=0.1.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.1.1)\n",
"Requirement already satisfied: fqdn in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.5.1)\n",
"Requirement already satisfied: isoduration in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (20.11.0)\n",
"Requirement already satisfied: jsonpointer>1.13 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (3.0.0)\n",
"Requirement already satisfied: rfc3987-syntax>=1.1.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.1.0)\n",
"Requirement already satisfied: uri-template in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.3.0)\n",
"Requirement already satisfied: webcolors>=24.6.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (25.10.0)\n",
"Requirement already satisfied: beautifulsoup4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbconvert->jupyter) (4.14.3)\n",
"Requirement already satisfied: bleach!=5.0.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from bleach[css]!=5.0.0->nbconvert->jupyter) (6.3.0)\n",
"Requirement already satisfied: defusedxml in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbconvert->jupyter) (0.7.1)\n",
"Requirement already satisfied: jupyterlab-pygments in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbconvert->jupyter) (0.3.0)\n",
"Requirement already satisfied: mistune<4,>=2.0.3 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbconvert->jupyter) (3.1.4)\n",
"Requirement already satisfied: nbclient>=0.5.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbconvert->jupyter) (0.10.2)\n",
"Requirement already satisfied: pandocfilters>=1.4.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbconvert->jupyter) (1.5.1)\n",
"Requirement already satisfied: webencodings in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from bleach!=5.0.0->bleach[css]!=5.0.0->nbconvert->jupyter) (0.5.1)\n",
"Requirement already satisfied: tinycss2<1.5,>=1.1.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from bleach[css]!=5.0.0->nbconvert->jupyter) (1.4.0)\n",
"Requirement already satisfied: fastjsonschema>=2.15 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbformat>=5.3.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2.21.2)\n",
"Requirement already satisfied: charset_normalizer<4,>=2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from requests>=2.31->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (3.4.4)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from requests>=2.31->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (2.6.0)\n",
"Requirement already satisfied: lark>=1.2.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from rfc3987-syntax>=1.1.0->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.3.1)\n",
"Requirement already satisfied: cffi>=1.0.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from argon2-cffi-bindings->argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2.0.0)\n",
"Requirement already satisfied: pycparser in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2.23)\n",
"Requirement already satisfied: soupsieve>=1.6.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from beautifulsoup4->nbconvert->jupyter) (2.8)\n",
"Requirement already satisfied: arrow>=0.15.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.4.0)\n",
"Requirement already satisfied: tzdata in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from arrow>=0.15.0->isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2025.2)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: ipywidgets in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (8.1.8)\n",
"Requirement already satisfied: comm>=0.1.3 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets) (0.2.3)\n",
"Requirement already satisfied: ipython>=6.1.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets) (9.8.0)\n",
"Requirement already satisfied: traitlets>=4.3.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets) (5.14.3)\n",
"Requirement already satisfied: widgetsnbextension~=4.0.14 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets) (4.0.15)\n",
"Requirement already satisfied: jupyterlab_widgets~=3.0.15 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets) (3.0.16)\n",
"Requirement already satisfied: colorama>=0.4.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.4.6)\n",
"Requirement already satisfied: decorator>=4.3.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (5.2.1)\n",
"Requirement already satisfied: ipython-pygments-lexers>=1.0.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (1.1.1)\n",
"Requirement already satisfied: jedi>=0.18.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.19.2)\n",
"Requirement already satisfied: matplotlib-inline>=0.1.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.2.1)\n",
"Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (3.0.52)\n",
"Requirement already satisfied: pygments>=2.11.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (2.19.2)\n",
"Requirement already satisfied: stack_data>=0.6.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.6.3)\n",
"Requirement already satisfied: typing_extensions>=4.6 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (4.15.0)\n",
"Requirement already satisfied: wcwidth in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets) (0.2.14)\n",
"Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jedi>=0.18.1->ipython>=6.1.0->ipywidgets) (0.8.5)\n",
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (2.2.1)\n",
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (3.0.1)\n",
"Requirement already satisfied: pure-eval in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (0.2.3)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: ipykernel in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (7.1.0)\n",
"Requirement already satisfied: comm>=0.1.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (0.2.3)\n",
"Requirement already satisfied: debugpy>=1.6.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (1.8.17)\n",
"Requirement already satisfied: ipython>=7.23.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (9.8.0)\n",
"Requirement already satisfied: jupyter-client>=8.0.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (8.6.3)\n",
"Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (5.9.1)\n",
"Requirement already satisfied: matplotlib-inline>=0.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (0.2.1)\n",
"Requirement already satisfied: nest-asyncio>=1.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (1.6.0)\n",
"Requirement already satisfied: packaging>=22 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (25.0)\n",
"Requirement already satisfied: psutil>=5.7 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (7.1.3)\n",
"Requirement already satisfied: pyzmq>=25 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (27.1.0)\n",
"Requirement already satisfied: tornado>=6.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (6.5.2)\n",
"Requirement already satisfied: traitlets>=5.4.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (5.14.3)\n",
"Requirement already satisfied: colorama>=0.4.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (0.4.6)\n",
"Requirement already satisfied: decorator>=4.3.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (5.2.1)\n",
"Requirement already satisfied: ipython-pygments-lexers>=1.0.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (1.1.1)\n",
"Requirement already satisfied: jedi>=0.18.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (0.19.2)\n",
"Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (3.0.52)\n",
"Requirement already satisfied: pygments>=2.11.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (2.19.2)\n",
"Requirement already satisfied: stack_data>=0.6.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (0.6.3)\n",
"Requirement already satisfied: typing_extensions>=4.6 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (4.15.0)\n",
"Requirement already satisfied: wcwidth in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=7.23.1->ipykernel) (0.2.14)\n",
"Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jedi>=0.18.1->ipython>=7.23.1->ipykernel) (0.8.5)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-client>=8.0.0->ipykernel) (2.9.0.post0)\n",
"Requirement already satisfied: platformdirs>=2.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel) (4.5.1)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from python-dateutil>=2.8.2->jupyter-client>=8.0.0->ipykernel) (1.17.0)\n",
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel) (2.2.1)\n",
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel) (3.0.1)\n",
"Requirement already satisfied: pure-eval in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel) (0.2.3)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install --upgrade pip\n",
"%pip install --upgrade jupyter\n",
"%pip install --upgrade ipywidgets\n",
"%pip install --upgrade ipykernel"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "48724594",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting pdf2image\n",
" Using cached pdf2image-1.17.0-py3-none-any.whl.metadata (6.2 kB)\n",
"Requirement already satisfied: pillow in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (12.0.0)\n",
"Using cached pdf2image-1.17.0-py3-none-any.whl (11 kB)\n",
"Installing collected packages: pdf2image\n",
"Successfully installed pdf2image-1.17.0\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Collecting PyMuPDF\n",
" Using cached pymupdf-1.26.6-cp310-abi3-win_amd64.whl.metadata (3.4 kB)\n",
"Using cached pymupdf-1.26.6-cp310-abi3-win_amd64.whl (18.4 MB)\n",
"Installing collected packages: PyMuPDF\n",
"Successfully installed PyMuPDF-1.26.6\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: pandas in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (2.3.3)\n",
"Requirement already satisfied: numpy>=1.23.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas) (2.3.5)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas) (2.9.0.post0)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas) (2025.2)\n",
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas) (2025.2)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: matplotlib in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (3.10.7)\n",
"Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (1.3.3)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (0.12.1)\n",
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (4.61.0)\n",
"Requirement already satisfied: kiwisolver>=1.3.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (1.4.9)\n",
"Requirement already satisfied: numpy>=1.23 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (2.3.5)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (25.0)\n",
"Requirement already satisfied: pillow>=8 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (12.0.0)\n",
"Requirement already satisfied: pyparsing>=3 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (3.2.5)\n",
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (2.9.0.post0)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from python-dateutil>=2.7->matplotlib) (1.17.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: seaborn in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (0.13.2)\n",
"Requirement already satisfied: numpy!=1.24.0,>=1.20 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from seaborn) (2.3.5)\n",
"Requirement already satisfied: pandas>=1.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from seaborn) (2.3.3)\n",
"Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from seaborn) (3.10.7)\n",
"Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.3.3)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (0.12.1)\n",
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (4.61.0)\n",
"Requirement already satisfied: kiwisolver>=1.3.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.4.9)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (25.0)\n",
"Requirement already satisfied: pillow>=8 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (12.0.0)\n",
"Requirement already satisfied: pyparsing>=3 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (3.2.5)\n",
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (2.9.0.post0)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas>=1.2->seaborn) (2025.2)\n",
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas>=1.2->seaborn) (2025.2)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn) (1.17.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"# Install necessary packages\n",
"%pip install pdf2image pillow \n",
"# pdf reading\n",
"%pip install PyMuPDF\n",
"\n",
"# Data analysis and visualization\n",
"%pip install pandas\n",
"%pip install matplotlib\n",
"%pip install seaborn"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e1f793b6",
"metadata": {},
"outputs": [],
"source": [
"import os, json\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from pdf2image import convert_from_path\n",
"from PIL import Image, ImageOps\n",
"import fitz # PyMuPDF\n",
"import re\n",
"from datetime import datetime\n",
"from typing import List\n",
"import shutil"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "1652a78e",
"metadata": {},
"outputs": [],
"source": [
"def pdf_to_images(pdf_path: str, output_dir: str, dpi: int = 300):\n",
" \"\"\"\n",
" Render a PDF into a list of PIL Images using PyMuPDF or pdf2image.\n",
" 'pages' is 1-based (e.g., range(1, 10) -> pages 19).\n",
" \"\"\"\n",
" if fitz is not None:\n",
" doc = fitz.open(pdf_path)\n",
" total_pages = len(doc)\n",
"\n",
" # Adjust page indices (PyMuPDF uses 0-based indexing)\n",
" page_indices = list(range(total_pages))\n",
"\n",
" for i in page_indices:\n",
" page = doc.load_page(i)\n",
" mat = fitz.Matrix(dpi / 72.0, dpi / 72.0)\n",
" pix = page.get_pixmap(matrix=mat, alpha=False)\n",
" img = Image.frombytes(\"RGB\", [pix.width, pix.height], pix.samples)\n",
" # Build filename\n",
" out_path = os.path.join(\n",
" output_dir,\n",
" f\"page_{i + 1:04d}.png\"\n",
" )\n",
"\n",
" img.save(out_path, \"PNG\")\n",
" doc.close()\n",
" else:\n",
" raise RuntimeError(\"Install PyMuPDF or pdf2image to convert PDFs.\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f523dd58",
"metadata": {},
"outputs": [],
"source": [
"import fitz\n",
"import re\n",
"import os\n",
"\n",
"def _pdf_extract_text_structured(page, margin_threshold=50):\n",
" \"\"\"\n",
" Extract text using PyMuPDF's dict mode which preserves\n",
" the actual line structure from the PDF.\n",
" \"\"\"\n",
" data = page.get_text(\"dict\")\n",
" \n",
" # Collect all lines with their Y position\n",
" all_lines = []\n",
" margin_text_parts = [] # Collect vertical/margin text\n",
" margin_y_positions = []\n",
" \n",
" for block in data.get(\"blocks\", []):\n",
" if block.get(\"type\") != 0: # Skip non-text blocks\n",
" continue\n",
" \n",
" block_bbox = block.get(\"bbox\", (0, 0, 0, 0))\n",
" block_width = block_bbox[2] - block_bbox[0]\n",
" block_height = block_bbox[3] - block_bbox[1]\n",
" \n",
" # Detect vertical/margin text\n",
" is_margin_text = (block_bbox[0] < margin_threshold or \n",
" block_height > block_width * 2)\n",
" \n",
" for line in block.get(\"lines\", []):\n",
" direction = line.get(\"dir\", (1, 0))\n",
" bbox = line.get(\"bbox\", (0, 0, 0, 0))\n",
" y_center = (bbox[1] + bbox[3]) / 2\n",
" x_start = bbox[0]\n",
" \n",
" # Collect text from all spans\n",
" line_text = \"\"\n",
" for span in line.get(\"spans\", []):\n",
" text = span.get(\"text\", \"\")\n",
" line_text += text\n",
" \n",
" line_text = line_text.strip()\n",
" line_text = re.sub(r\"[•▪◦●❖▶■\\uf000-\\uf0ff]\", \"\", line_text)\n",
" \n",
" if not line_text:\n",
" continue\n",
" \n",
" # Check if this is margin/vertical text\n",
" if is_margin_text or abs(direction[0]) < 0.9:\n",
" margin_text_parts.append((y_center, line_text))\n",
" margin_y_positions.append(y_center)\n",
" else:\n",
" all_lines.append((y_center, x_start, line_text))\n",
" \n",
" # Reconstruct margin text as single line at its vertical center\n",
" if margin_text_parts:\n",
" # Sort by Y position (top to bottom) and join\n",
" margin_text_parts.sort(key=lambda x: x[0])\n",
" full_margin_text = \" \".join(part[1] for part in margin_text_parts)\n",
" # Calculate vertical center of the watermark\n",
" avg_y = sum(margin_y_positions) / len(margin_y_positions)\n",
" # Add as a single line\n",
" all_lines.append((avg_y, -1, full_margin_text)) # x=-1 to sort first\n",
" \n",
" if not all_lines:\n",
" return \"\"\n",
" \n",
" # Sort by Y first, then by X\n",
" all_lines.sort(key=lambda x: (x[0], x[1]))\n",
" \n",
" # Group lines at same vertical position\n",
" merged_rows = []\n",
" current_row = [all_lines[0]]\n",
" current_y = all_lines[0][0]\n",
" \n",
" for y_center, x_start, text in all_lines[1:]:\n",
" if abs(y_center - current_y) <= 2:\n",
" current_row.append((y_center, x_start, text))\n",
" else:\n",
" current_row.sort(key=lambda x: x[1])\n",
" row_text = \" \".join(item[2] for item in current_row)\n",
" merged_rows.append((current_y, row_text))\n",
" current_row = [(y_center, x_start, text)]\n",
" current_y = y_center\n",
" \n",
" if current_row:\n",
" current_row.sort(key=lambda x: x[1])\n",
" row_text = \" \".join(item[2] for item in current_row)\n",
" merged_rows.append((current_y, row_text))\n",
" \n",
" # Sort rows by Y and extract text\n",
" merged_rows.sort(key=lambda x: x[0])\n",
" lines = [row[1] for row in merged_rows]\n",
" \n",
" # Join and clean up\n",
" text = \"\\n\".join(lines)\n",
" text = re.sub(r\" +\", \" \", text).strip()\n",
" text = re.sub(r\"\\n{3,}\", \"\\n\\n\", text).strip()\n",
" \n",
" return text\n",
"\n",
"def pdf_extract_text(pdf_path, output_dir, margin_threshold=50):\n",
" os.makedirs(output_dir, exist_ok=True)\n",
" doc = fitz.open(pdf_path)\n",
" \n",
" for i, page in enumerate(doc):\n",
" text = _pdf_extract_text_structured(page, margin_threshold)\n",
" if not text.strip():\n",
" continue\n",
" out_path = os.path.join(output_dir, f\"page_{i + 1:04d}.txt\")\n",
" with open(out_path, \"w\", encoding=\"utf-8\") as f:\n",
" f.write(text)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "9f64a8c0",
"metadata": {},
"outputs": [],
"source": [
"PDF_FOLDER = './../instructions' # Folder containing PDF files\n",
"OUTPUT_FOLDER = './dataset'\n",
"\n",
"os.makedirs(OUTPUT_FOLDER, exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "41e4651d",
"metadata": {},
"outputs": [],
"source": [
"i = 0\n",
"\n",
"pdf_files = sorted([\n",
" fname for fname in os.listdir(PDF_FOLDER)\n",
" if fname.lower().endswith(\".pdf\")\n",
"])\n",
"\n",
"\n",
"for fname in pdf_files:\n",
" # build output directories\n",
" out_img_path = os.path.join(OUTPUT_FOLDER, str(i), \"img\")\n",
" out_txt_path = os.path.join(OUTPUT_FOLDER, str(i), \"txt\")\n",
"\n",
" os.makedirs(out_img_path, exist_ok=True)\n",
" os.makedirs(out_txt_path, exist_ok=True)\n",
"\n",
" # source and destination PDF paths\n",
" src_pdf = os.path.join(PDF_FOLDER, fname)\n",
" pdf_path = os.path.join(OUTPUT_FOLDER, str(i), fname)\n",
"\n",
" # copy PDF into numbered folder\n",
" shutil.copy(src_pdf, pdf_path)\n",
"\n",
" # convert PDF → images\n",
" pdf_to_images(\n",
" pdf_path=pdf_path,\n",
" output_dir=out_img_path,\n",
" dpi=300\n",
" )\n",
" pdf_extract_text(\n",
" pdf_path=pdf_path,\n",
" output_dir=out_txt_path,\n",
" margin_threshold=40\n",
" )\n",
"\n",
" i += 1"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv (3.11.9)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,65 @@
CER,WER,TIME,PAGES,TIME_PER_PAGE,timestamp,checkpoint_dir_name,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore,config/use_doc_orientation_classify,config/use_doc_unwarping,config/textline_orientation,config/text_det_thresh,config/text_det_box_thresh,config/text_det_unclip_ratio,config/text_rec_score_thresh,logdir
0.013515850203159258,0.1050034776034098,353.85077571868896,5,70.66230463981628,1765120215,,False,1,d5238c33,2025-12-07_16-10-15,374.27777338027954,374.27777338027954,19452,LAPTOP-2OQK6GT5,127.0.0.1,374.27777338027954,1,True,False,True,0.08878208965533294,0.623029468177504,0.0,0.22994386685874743,d5238c33
0.03905195479212187,0.13208645252197226,354.61478638648987,5,70.82208666801452,1765120220,,False,1,ea8a2f7a,2025-12-07_16-10-20,374.2999520301819,374.2999520301819,7472,LAPTOP-2OQK6GT5,127.0.0.1,374.2999520301819,1,False,False,False,0.39320080607112917,0.6712014538998344,0.0,0.16880221913810864,ea8a2f7a
0.06606238373546518,0.16619192810354325,359.09717535972595,5,71.72569246292115,1765120601,,False,1,ebb12e5b,2025-12-07_16-16-41,379.5437698364258,379.5437698364258,21480,LAPTOP-2OQK6GT5,127.0.0.1,379.5437698364258,1,True,True,True,0.4328784710891528,0.23572507118228522,0.0,0.18443532434104057,ebb12e5b
0.41810946199338,0.5037103242611287,336.6613118648529,5,67.22685413360595,1765120583,,False,1,b3775034,2025-12-07_16-16-23,356.52618169784546,356.52618169784546,23084,LAPTOP-2OQK6GT5,127.0.0.1,356.52618169784546,1,True,True,False,0.06412882230680782,0.3377439247010605,0.0,0.5764053439963283,b3775034
0.1972515944870667,0.2953531713611584,350.1465151309967,5,69.93639450073242,1765120959,,False,1,bf10d370,2025-12-07_16-22-39,370.90337228775024,370.90337228775024,26140,LAPTOP-2OQK6GT5,127.0.0.1,370.90337228775024,1,True,True,True,0.6719551054359146,0.6902317374774642,0.0,0.3964896632708511,bf10d370
0.3864103728596727,0.45583610828383464,320.96620512008667,5,64.09520988464355,1765120947,,False,1,111e5a9e,2025-12-07_16-22-27,341.0712642669678,341.0712642669678,20664,LAPTOP-2OQK6GT5,127.0.0.1,341.0712642669678,1,True,False,False,0.04481600265034593,0.4832664381621284,0.0,0.5464155154391461,111e5a9e
0.5160689446919982,0.5945298276300801,326.65670347213745,5,65.2350733757019,1765121300,,False,1,415d7ba1,2025-12-07_16-28-20,347.29887080192566,347.29887080192566,23848,LAPTOP-2OQK6GT5,127.0.0.1,347.29887080192566,1,True,True,True,0.01699705273201909,0.5233849789194689,0.0,0.20833106578160068,415d7ba1
0.5025130639131208,0.5677161936883898,326.9156484603882,5,65.28343558311462,1765121310,,False,1,a58d8109,2025-12-07_16-28-30,346.09022212028503,346.09022212028503,25248,LAPTOP-2OQK6GT5,127.0.0.1,346.09022212028503,1,False,True,True,0.04024319071476844,0.6705892008057031,0.0,0.1885847677314521,a58d8109
0.07092029393242118,0.17390976502682037,368.5711796283722,5,73.62503981590271,1765121692,,False,1,33bdf2a9,2025-12-07_16-34-52,388.150607585907,388.150607585907,24024,LAPTOP-2OQK6GT5,127.0.0.1,388.150607585907,1,False,True,False,0.4347371576992484,0.490009080993297,0.0,0.1519055407457635,33bdf2a9
0.1168252568583151,0.22212978798067146,364.6228621006012,5,72.82479510307311,1765121699,,False,1,d9df79f3,2025-12-07_16-34-59,384.67676973342896,384.67676973342896,5368,LAPTOP-2OQK6GT5,127.0.0.1,384.67676973342896,1,True,True,False,0.17806350429159667,0.6261942434824851,0.0,0.38547742746319813,d9df79f3
0.06459478599489028,0.16493742503085831,366.6067085266113,5,73.22199411392212,1765122086,,False,1,80ea65f2,2025-12-07_16-41-26,387.6792531013489,387.6792531013489,14064,LAPTOP-2OQK6GT5,127.0.0.1,387.6792531013489,1,True,True,False,0.6011116675422127,0.25138233186284487,0.0,0.31312371671514233,80ea65f2
0.01340057642794312,0.10741926673961485,359.5969452857971,5,71.80434017181396,1765122084,,False,1,2e978bfa,2025-12-07_16-41-24,380.28105759620667,380.28105759620667,11060,LAPTOP-2OQK6GT5,127.0.0.1,380.28105759620667,1,False,False,True,0.23485911670668447,0.07773192307960775,0.0,0.023694797982285992,2e978bfa
0.01340057642794312,0.10741926673961485,347.92934703826904,5,69.49003491401672,1765122459,,False,1,8518cc40,2025-12-07_16-47-39,368.54625153541565,368.54625153541565,21016,LAPTOP-2OQK6GT5,127.0.0.1,368.54625153541565,1,False,False,True,0.2225556801158737,0.00024186765038358704,0.0,0.0028910785387807336,8518cc40
0.01340057642794312,0.10741926673961485,347.14498376846313,5,69.324178647995,1765122461,,False,1,2c691aaa,2025-12-07_16-47-41,366.3459825515747,366.3459825515747,21540,LAPTOP-2OQK6GT5,127.0.0.1,366.3459825515747,1,False,False,True,0.22472742766369874,0.030333356491349384,0.0,0.05099688981312009,2c691aaa
0.013040374955575204,0.10485434443992256,347.22006940841675,5,69.34554209709168,1765122832,,False,1,31e60691,2025-12-07_16-53-52,368.0382122993469,368.0382122993469,17532,LAPTOP-2OQK6GT5,127.0.0.1,368.0382122993469,1,False,False,True,0.25914070057597594,0.0019604082489898533,0.0,0.0035094431353713818,31e60691
0.012582941415352794,0.10327954129031627,349.2319846153259,5,69.74626359939575,1765122837,,False,1,d4d288c6,2025-12-07_16-53-57,368.903502702713,368.903502702713,22216,LAPTOP-2OQK6GT5,127.0.0.1,368.903502702713,1,False,False,True,0.2734075225731028,0.0033989235904911125,0.0,0.015420451500634869,d4d288c6
0.012582941415352794,0.10327954129031627,346.6979134082794,5,69.24065437316895,1765123205,,False,1,7645b77c,2025-12-07_17-00-05,367.4564206600189,367.4564206600189,2272,LAPTOP-2OQK6GT5,127.0.0.1,367.4564206600189,1,False,False,True,0.279241869770728,0.1138413707810162,0.0,0.07531508117874008,7645b77c
0.012407575745987933,0.10201566081383735,346.5196530818939,5,69.19977960586547,1765123208,,False,1,3256ae36,2025-12-07_17-00-08,366.00227642059326,366.00227642059326,6604,LAPTOP-2OQK6GT5,127.0.0.1,366.00227642059326,1,False,False,True,0.30993017979826853,0.1292131176570399,0.0,0.11201957956206357,3256ae36
0.012407575745987933,0.10201566081383735,344.0291979312897,5,68.71350336074829,1765123575,,False,1,b0dda58b,2025-12-07_17-06-15,364.82790350914,364.82790350914,9732,LAPTOP-2OQK6GT5,127.0.0.1,364.82790350914,1,False,False,True,0.3149521989502957,0.11783753596277924,0.0,0.6825729339913746,b0dda58b
0.012429753445092291,0.10205118268939237,346.11818265914917,5,69.12530856132507,1765123581,,False,1,e9d40333,2025-12-07_17-06-21,365.62638425827026,365.62638425827026,23416,LAPTOP-2OQK6GT5,127.0.0.1,365.62638425827026,1,False,False,True,0.5302520310849914,0.1569390945373281,0.0,0.10019443545563994,e9d40333
0.011990675508758594,0.10047637953978608,346.5398359298706,5,69.2183114528656,1765123948,,False,1,aa89fe7a,2025-12-07_17-12-28,366.7530257701874,366.7530257701874,16200,LAPTOP-2OQK6GT5,127.0.0.1,366.7530257701874,1,False,False,True,0.5039700850900125,0.16208277029791282,0.0,0.6765386284546205,aa89fe7a
0.011968497809654236,0.10044085766423105,345.97880601882935,5,69.09321279525757,1765123951,,False,1,92c48d07,2025-12-07_17-12-31,365.0942301750183,365.0942301750183,15432,LAPTOP-2OQK6GT5,127.0.0.1,365.0942301750183,1,False,False,True,0.33321916406589397,0.1864428656555301,0.0,0.6775297319325386,92c48d07
0.011968497809654236,0.10044085766423105,344.1725525856018,5,68.74226913452148,1765124318,,False,1,187790d7,2025-12-07_17-18-38,364.47401189804077,364.47401189804077,24676,LAPTOP-2OQK6GT5,127.0.0.1,364.47401189804077,1,False,False,True,0.3372505528404193,0.2352515935896671,0.0,0.6987321324340134,187790d7
0.011760127958326316,0.09964993325879434,345.9427492618561,5,69.08389501571655,1765124322,,False,1,442a2439,2025-12-07_17-18-42,364.755074262619,364.755074262619,7892,LAPTOP-2OQK6GT5,127.0.0.1,364.755074262619,1,False,False,True,0.5098036701758629,0.2122757290966333,0.0,0.6992468303721803,442a2439
0.011968497809654236,0.10044085766423105,345.40264558792114,5,68.98561010360717,1765124689,,False,1,70862adc,2025-12-07_17-24-49,365.9752175807953,365.9752175807953,15412,LAPTOP-2OQK6GT5,127.0.0.1,365.9752175807953,1,False,False,True,0.3963969237347287,0.2163058925653838,0.0,0.6859176720785957,70862adc
0.012407575745987933,0.10201566081383735,345.8808228969574,5,69.07736506462098,1765124693,,False,1,e6821f34,2025-12-07_17-24-53,365.25493717193604,365.25493717193604,26088,LAPTOP-2OQK6GT5,127.0.0.1,365.25493717193604,1,False,False,True,0.3668982772069688,0.2407751620351906,0.0,0.5737620270733486,e6821f34
0.012199205894660016,0.10122473640840064,347.05629682540894,5,69.31870231628417,1765125062,,False,1,8b680875,2025-12-07_17-31-02,367.2029130458832,367.2029130458832,1720,LAPTOP-2OQK6GT5,127.0.0.1,367.2029130458832,1,False,False,True,0.5312495877753942,0.3193426688929859,0.0,0.591252589724218,8b680875
0.012429753445092291,0.10205118268939237,349.60691928863525,5,69.8253363609314,1765125068,,False,1,fc54867b,2025-12-07_17-31-08,368.73608803749084,368.73608803749084,4888,LAPTOP-2OQK6GT5,127.0.0.1,368.73608803749084,1,False,False,True,0.5034080657304706,0.3042864908472832,0.0,0.5024906014323391,fc54867b
0.013385453418768206,0.10927323740570172,343.8553657531738,5,68.67559289932251,1765125432,,False,1,c32d0d5e,2025-12-07_17-37-12,364.42339730262756,364.42339730262756,25808,LAPTOP-2OQK6GT5,127.0.0.1,364.42339730262756,1,False,False,True,0.15300672154002157,0.39848899797721926,0.0,0.5167681121564286,c32d0d5e
0.013537204772521452,0.10852488053708713,344.60119009017944,5,68.81447420120239,1765125436,,False,1,4762fbbb,2025-12-07_17-37-16,363.3258783817291,363.3258783817291,20760,LAPTOP-2OQK6GT5,127.0.0.1,363.3258783817291,1,False,False,True,0.13342603167575784,0.4010104919178914,0.0,0.618812411626611,4762fbbb
0.011763789518968464,0.09968897796498292,344.03784108161926,5,68.71829047203065,1765125803,,False,1,522ac97c,2025-12-07_17-43-23,364.7200028896332,364.7200028896332,2372,LAPTOP-2OQK6GT5,127.0.0.1,364.7200028896332,1,False,False,True,0.4489762005319642,0.402754966715804,0.0,0.6426372526242771,522ac97c
0.011650346524073398,0.09890157639017978,343.51321721076965,5,68.60030875205993,1765125805,,False,1,5784f433,2025-12-07_17-43-25,362.93026328086853,362.93026328086853,22900,LAPTOP-2OQK6GT5,127.0.0.1,362.93026328086853,1,False,False,True,0.46204975067512033,0.192768833446102,0.0,0.6328281433384326,5784f433
0.011650346524073398,0.09890157639017978,343.80972242355347,5,68.66908102035522,1765126172,,False,1,83af0528,2025-12-07_17-49-32,364.5850279331207,364.5850279331207,9832,LAPTOP-2OQK6GT5,127.0.0.1,364.5850279331207,1,False,False,True,0.4663139585990712,0.1845869678485352,0.0,0.6299207399141384,83af0528
0.011650346524073398,0.09890157639017978,344.11421155929565,5,68.72400512695313,1765126177,,False,1,12cbaa22,2025-12-07_17-49-37,364.24684858322144,364.24684858322144,5968,LAPTOP-2OQK6GT5,127.0.0.1,364.24684858322144,1,False,False,True,0.47277853181431145,0.40562176755388546,0.0,0.6314990057451438,12cbaa22
0.011763789518968464,0.09968897796498292,348.5801889896393,5,69.61860737800598,1765126547,,False,1,a3a87765,2025-12-07_17-55-47,369.27432322502136,369.27432322502136,24372,LAPTOP-2OQK6GT5,127.0.0.1,369.27432322502136,1,False,False,True,0.45010042945259804,0.2855696990924951,0.0,0.6351522397620386,a3a87765
0.0441989903761154,0.13204740781578367,347.0340585708618,5,69.31097078323364,1765126548,,False,1,cf2bad0c,2025-12-07_17-55-48,366.1882207393646,366.1882207393646,3272,LAPTOP-2OQK6GT5,127.0.0.1,366.1882207393646,1,False,False,False,0.5890116605741096,0.283660909026841,0.0,0.4602911956047037,cf2bad0c
0.0441989903761154,0.13204740781578367,343.53946828842163,5,68.61563892364502,1765126916,,False,1,9a9b91e7,2025-12-07_18-01-56,364.0171241760254,364.0171241760254,2272,LAPTOP-2OQK6GT5,127.0.0.1,364.0171241760254,1,False,False,False,0.6089594786916612,0.3646091181984181,0.0,0.46522499154449626,9a9b91e7
0.012199205894660016,0.10122473640840064,345.76200914382935,5,69.05782113075256,1765126922,,False,1,e326d901,2025-12-07_18-02-02,365.42848086357117,365.42848086357117,24932,LAPTOP-2OQK6GT5,127.0.0.1,365.42848086357117,1,False,False,True,0.5932289185132622,0.37353729921136775,0.0,0.46368845919414936,e326d901
0.011990281344944778,0.09910429396546264,344.40758872032166,5,68.7896653175354,1765127287,,False,1,ccb3f19a,2025-12-07_18-08-07,365.1469933986664,365.1469933986664,1104,LAPTOP-2OQK6GT5,127.0.0.1,365.1469933986664,1,True,False,True,0.6866411603181266,0.4537774266698106,0.0,0.3059281770286948,ccb3f19a
0.012186205997500013,0.1012282592390342,343.9386422634125,5,68.69270787239074,1765127290,,False,1,8c12c55f,2025-12-07_18-08-10,363.29733777046204,363.29733777046204,19700,LAPTOP-2OQK6GT5,127.0.0.1,363.29733777046204,1,True,False,True,0.6710404650258701,0.44441637238072235,0.0,0.2641320116724262,8c12c55f
0.0662709141213666,0.16851508812176408,359.4665718078613,5,71.7971097946167,1765127672,,False,1,5a62d5b6,2025-12-07_18-14-32,380.3328058719635,380.3328058719635,26528,LAPTOP-2OQK6GT5,127.0.0.1,380.3328058719635,1,True,True,True,0.40414134317929745,0.2010474655405967,0.0,0.59925716647257,5a62d5b6
0.07070075496425433,0.17390976502682037,356.3221182823181,5,71.16437225341797,1765127673,,False,1,bb4495b7,2025-12-07_18-14-33,375.9771683216095,375.9771683216095,21772,LAPTOP-2OQK6GT5,127.0.0.1,375.9771683216095,1,False,True,False,0.39073713326110354,0.5764393142467112,0.0,0.5413963334094041,bb4495b7
0.01153507274885726,0.09890157639017978,344.71807885169983,5,68.8583309173584,1765128044,,False,1,9d90711d,2025-12-07_18-20-44,365.7700536251068,365.7700536251068,17592,LAPTOP-2OQK6GT5,127.0.0.1,365.7700536251068,1,False,False,True,0.46895437796002276,0.5411583003121286,0.0,0.6350154738477746,9d90711d
0.01153507274885726,0.09890157639017978,343.69704604148865,5,68.64236354827881,1765128046,,False,1,daaec3f8,2025-12-07_18-20-46,363.0186264514923,363.0186264514923,21292,LAPTOP-2OQK6GT5,127.0.0.1,363.0186264514923,1,False,False,True,0.4743507729816579,0.5213407674549528,0.0,0.6445669851749475,daaec3f8
0.01153507274885726,0.09890157639017978,343.6039113998413,5,68.62933912277222,1765128413,,False,1,51fb5915,2025-12-07_18-26-53,364.0196588039398,364.0196588039398,21772,LAPTOP-2OQK6GT5,127.0.0.1,364.0196588039398,1,False,False,True,0.48541186574386475,0.5810500215434935,0.0,0.6463595394763801,51fb5915
0.01164485418311018,0.09964993325879434,344.2613036632538,5,68.75940155982971,1765128417,,False,1,18966a33,2025-12-07_18-26-57,363.3374502658844,363.3374502658844,16900,LAPTOP-2OQK6GT5,127.0.0.1,363.3374502658844,1,False,False,True,0.5501591363807381,0.5132901504443755,0.0,0.6489815927562321,18966a33
0.012314479669876154,0.10205118268939237,345.49542331695557,5,69.01211080551147,1765128785,,False,1,b67080f9,2025-12-07_18-33-05,366.01860308647156,366.01860308647156,20948,LAPTOP-2OQK6GT5,127.0.0.1,366.01860308647156,1,False,False,True,0.5534122098827526,0.5760738874546728,0.0,0.5609719434431071,b67080f9
0.07209115365923097,0.17918874278969218,351.96662616729736,5,70.29538555145264,1765128795,,False,1,2533f368,2025-12-07_18-33-15,371.205295085907,371.205295085907,11208,LAPTOP-2OQK6GT5,127.0.0.1,371.205295085907,1,False,True,True,0.5572268058153711,0.5246075332847907,0.0,0.558307419246103,2533f368
0.06479949428557605,0.16493742503085831,357.1695992946625,5,71.33717932701111,1765129169,,False,1,451d018d,2025-12-07_18-39-29,378.8273491859436,378.8273491859436,3616,LAPTOP-2OQK6GT5,127.0.0.1,378.8273491859436,1,False,True,False,0.6340187369543626,0.5494644274379972,0.0,0.6521052525663952,451d018d
0.04429208645222718,0.13283833222122038,349.41683983802795,5,69.77591800689697,1765129169,,False,1,2256e752,2025-12-07_18-39-29,369.8801362514496,369.8801362514496,25468,LAPTOP-2OQK6GT5,127.0.0.1,369.8801362514496,1,True,False,False,0.6478037819045206,0.6228629446714814,0.0,0.6546094515631737,2256e752
0.012292301970771797,0.10201566081383735,346.071848154068,5,69.12432713508606,1765129542,,False,1,0a892729,2025-12-07_18-45-42,367.237042427063,367.237042427063,26212,LAPTOP-2OQK6GT5,127.0.0.1,367.237042427063,1,False,False,True,0.42173310551322135,0.542928875009614,0.0,0.601586841052583,0a892729
0.012292301970771797,0.10201566081383735,346.42522287368774,5,69.19188222885131,1765129545,,False,1,495075f5,2025-12-07_18-45-45,365.53574872016907,365.53574872016907,23604,LAPTOP-2OQK6GT5,127.0.0.1,365.53574872016907,1,False,False,True,0.4186754897467695,0.6318747444402091,0.0,0.5956181518703515,495075f5
0.011974150685190959,0.10047637953978608,346.9409854412079,5,69.29810705184937,1765129915,,False,1,54c45552,2025-12-07_18-51-55,367.9469211101532,367.9469211101532,25352,LAPTOP-2OQK6GT5,127.0.0.1,367.9469211101532,1,False,False,True,0.46382270850905233,0.6196868829200468,0.0,0.6126115785559785,54c45552
0.011974150685190959,0.10047637953978608,346.4141414165497,5,69.18586716651916,1765129917,,False,1,6b2e9b93,2025-12-07_18-51-57,365.9887709617615,365.9887709617615,25400,LAPTOP-2OQK6GT5,127.0.0.1,365.9887709617615,1,False,False,True,0.4751854264500806,0.48925010555288895,0.0,0.515482483148412,6b2e9b93
0.01153507274885726,0.09890157639017978,346.25940680503845,5,69.15517511367798,1765130288,,False,1,e9a6b81f,2025-12-07_18-58-08,367.33222007751465,367.33222007751465,4036,LAPTOP-2OQK6GT5,127.0.0.1,367.33222007751465,1,False,False,True,0.4879296810791008,0.4925520261481197,0.0,0.6483489622744677,e9a6b81f
0.01153507274885726,0.09890157639017978,345.8425042629242,5,69.06782102584839,1765130290,,False,1,076c5450,2025-12-07_18-58-10,365.1877450942993,365.1877450942993,4832,LAPTOP-2OQK6GT5,127.0.0.1,365.1877450942993,1,False,False,True,0.48842171509426413,0.5881329256041945,0.0,0.6569193185887352,076c5450
0.011875401733542455,0.10047637953978608,350.2443346977234,5,69.94839100837707,1765130664,,False,1,4a42a3ea,2025-12-07_19-04-24,370.9968421459198,370.9968421459198,14912,LAPTOP-2OQK6GT5,127.0.0.1,370.9968421459198,1,False,False,True,0.5590357657789103,0.5940413385819063,0.0,0.6573225721220606,4a42a3ea
0.012080110024228227,0.10047637953978608,351.5000901222229,5,70.19009194374084,1765130669,,False,1,041795f1,2025-12-07_19-04-29,370.946097612381,370.946097612381,22372,LAPTOP-2OQK6GT5,127.0.0.1,370.946097612381,1,False,False,True,0.5650092236486315,0.6617440972899422,0.0,0.6629504776006702,041795f1
0.012314479669876154,0.10205118268939237,343.53907656669617,5,68.6134319782257,1765131035,,False,1,8abb3f37,2025-12-07_19-10-35,364.67463064193726,364.67463064193726,22012,LAPTOP-2OQK6GT5,127.0.0.1,364.67463064193726,1,False,False,True,0.48982107744168,0.4636820835063238,0.0,0.39458266779240964,8abb3f37
0.012314479669876154,0.10205118268939237,345.5919795036316,5,69.02381987571717,1765131040,,False,1,f2cb682e,2025-12-07_19-10-40,364.90754437446594,364.90754437446594,5752,LAPTOP-2OQK6GT5,127.0.0.1,364.90754437446594,1,True,False,True,0.4917954659583112,0.45224829356708557,0.0,0.42597097228928366,f2cb682e
0.012314479669876154,0.10205118268939237,349.50936698913574,5,69.80772981643676,1765131411,,False,1,463fe5e7,2025-12-07_19-16-51,370.56375885009766,370.56375885009766,16524,LAPTOP-2OQK6GT5,127.0.0.1,370.56375885009766,1,True,False,True,0.5373435635563055,0.5202382560972127,0.0,0.5340573143597149,463fe5e7
0.012083932119443879,0.10122473640840064,350.1439118385315,5,69.92809920310974,1765131415,,False,1,88bbe87d,2025-12-07_19-16-55,369.54999685287476,369.54999685287476,15084,LAPTOP-2OQK6GT5,127.0.0.1,369.54999685287476,1,False,False,True,0.5274586910866753,0.5110782288617315,0.0,0.5368958272648865,88bbe87d
0.011875401733542455,0.10047637953978608,355.52406072616577,5,71.00808920860291,1765131794,,False,1,33ea1cc6,2025-12-07_19-23-14,376.746440410614,376.746440410614,17380,LAPTOP-2OQK6GT5,127.0.0.1,376.746440410614,1,False,False,True,0.5229924883346121,0.5158065672775711,0.0,0.6679657240993034,33ea1cc6
0.011853224034438097,0.10044085766423105,355.67893862724304,5,71.0243070602417,1765131797,,False,1,1243723e,2025-12-07_19-23-17,375.44413685798645,375.44413685798645,11232,LAPTOP-2OQK6GT5,127.0.0.1,375.44413685798645,1,False,False,True,0.3726772055073363,0.5573152713604742,0.0,0.6766134238094554,1243723e
1 CER WER TIME PAGES TIME_PER_PAGE timestamp checkpoint_dir_name done training_iteration trial_id date time_this_iter_s time_total_s pid hostname node_ip time_since_restore iterations_since_restore config/use_doc_orientation_classify config/use_doc_unwarping config/textline_orientation config/text_det_thresh config/text_det_box_thresh config/text_det_unclip_ratio config/text_rec_score_thresh logdir
2 0.013515850203159258 0.1050034776034098 353.85077571868896 5 70.66230463981628 1765120215 False 1 d5238c33 2025-12-07_16-10-15 374.27777338027954 374.27777338027954 19452 LAPTOP-2OQK6GT5 127.0.0.1 374.27777338027954 1 True False True 0.08878208965533294 0.623029468177504 0.0 0.22994386685874743 d5238c33
3 0.03905195479212187 0.13208645252197226 354.61478638648987 5 70.82208666801452 1765120220 False 1 ea8a2f7a 2025-12-07_16-10-20 374.2999520301819 374.2999520301819 7472 LAPTOP-2OQK6GT5 127.0.0.1 374.2999520301819 1 False False False 0.39320080607112917 0.6712014538998344 0.0 0.16880221913810864 ea8a2f7a
4 0.06606238373546518 0.16619192810354325 359.09717535972595 5 71.72569246292115 1765120601 False 1 ebb12e5b 2025-12-07_16-16-41 379.5437698364258 379.5437698364258 21480 LAPTOP-2OQK6GT5 127.0.0.1 379.5437698364258 1 True True True 0.4328784710891528 0.23572507118228522 0.0 0.18443532434104057 ebb12e5b
5 0.41810946199338 0.5037103242611287 336.6613118648529 5 67.22685413360595 1765120583 False 1 b3775034 2025-12-07_16-16-23 356.52618169784546 356.52618169784546 23084 LAPTOP-2OQK6GT5 127.0.0.1 356.52618169784546 1 True True False 0.06412882230680782 0.3377439247010605 0.0 0.5764053439963283 b3775034
6 0.1972515944870667 0.2953531713611584 350.1465151309967 5 69.93639450073242 1765120959 False 1 bf10d370 2025-12-07_16-22-39 370.90337228775024 370.90337228775024 26140 LAPTOP-2OQK6GT5 127.0.0.1 370.90337228775024 1 True True True 0.6719551054359146 0.6902317374774642 0.0 0.3964896632708511 bf10d370
7 0.3864103728596727 0.45583610828383464 320.96620512008667 5 64.09520988464355 1765120947 False 1 111e5a9e 2025-12-07_16-22-27 341.0712642669678 341.0712642669678 20664 LAPTOP-2OQK6GT5 127.0.0.1 341.0712642669678 1 True False False 0.04481600265034593 0.4832664381621284 0.0 0.5464155154391461 111e5a9e
8 0.5160689446919982 0.5945298276300801 326.65670347213745 5 65.2350733757019 1765121300 False 1 415d7ba1 2025-12-07_16-28-20 347.29887080192566 347.29887080192566 23848 LAPTOP-2OQK6GT5 127.0.0.1 347.29887080192566 1 True True True 0.01699705273201909 0.5233849789194689 0.0 0.20833106578160068 415d7ba1
9 0.5025130639131208 0.5677161936883898 326.9156484603882 5 65.28343558311462 1765121310 False 1 a58d8109 2025-12-07_16-28-30 346.09022212028503 346.09022212028503 25248 LAPTOP-2OQK6GT5 127.0.0.1 346.09022212028503 1 False True True 0.04024319071476844 0.6705892008057031 0.0 0.1885847677314521 a58d8109
10 0.07092029393242118 0.17390976502682037 368.5711796283722 5 73.62503981590271 1765121692 False 1 33bdf2a9 2025-12-07_16-34-52 388.150607585907 388.150607585907 24024 LAPTOP-2OQK6GT5 127.0.0.1 388.150607585907 1 False True False 0.4347371576992484 0.490009080993297 0.0 0.1519055407457635 33bdf2a9
11 0.1168252568583151 0.22212978798067146 364.6228621006012 5 72.82479510307311 1765121699 False 1 d9df79f3 2025-12-07_16-34-59 384.67676973342896 384.67676973342896 5368 LAPTOP-2OQK6GT5 127.0.0.1 384.67676973342896 1 True True False 0.17806350429159667 0.6261942434824851 0.0 0.38547742746319813 d9df79f3
12 0.06459478599489028 0.16493742503085831 366.6067085266113 5 73.22199411392212 1765122086 False 1 80ea65f2 2025-12-07_16-41-26 387.6792531013489 387.6792531013489 14064 LAPTOP-2OQK6GT5 127.0.0.1 387.6792531013489 1 True True False 0.6011116675422127 0.25138233186284487 0.0 0.31312371671514233 80ea65f2
13 0.01340057642794312 0.10741926673961485 359.5969452857971 5 71.80434017181396 1765122084 False 1 2e978bfa 2025-12-07_16-41-24 380.28105759620667 380.28105759620667 11060 LAPTOP-2OQK6GT5 127.0.0.1 380.28105759620667 1 False False True 0.23485911670668447 0.07773192307960775 0.0 0.023694797982285992 2e978bfa
14 0.01340057642794312 0.10741926673961485 347.92934703826904 5 69.49003491401672 1765122459 False 1 8518cc40 2025-12-07_16-47-39 368.54625153541565 368.54625153541565 21016 LAPTOP-2OQK6GT5 127.0.0.1 368.54625153541565 1 False False True 0.2225556801158737 0.00024186765038358704 0.0 0.0028910785387807336 8518cc40
15 0.01340057642794312 0.10741926673961485 347.14498376846313 5 69.324178647995 1765122461 False 1 2c691aaa 2025-12-07_16-47-41 366.3459825515747 366.3459825515747 21540 LAPTOP-2OQK6GT5 127.0.0.1 366.3459825515747 1 False False True 0.22472742766369874 0.030333356491349384 0.0 0.05099688981312009 2c691aaa
16 0.013040374955575204 0.10485434443992256 347.22006940841675 5 69.34554209709168 1765122832 False 1 31e60691 2025-12-07_16-53-52 368.0382122993469 368.0382122993469 17532 LAPTOP-2OQK6GT5 127.0.0.1 368.0382122993469 1 False False True 0.25914070057597594 0.0019604082489898533 0.0 0.0035094431353713818 31e60691
17 0.012582941415352794 0.10327954129031627 349.2319846153259 5 69.74626359939575 1765122837 False 1 d4d288c6 2025-12-07_16-53-57 368.903502702713 368.903502702713 22216 LAPTOP-2OQK6GT5 127.0.0.1 368.903502702713 1 False False True 0.2734075225731028 0.0033989235904911125 0.0 0.015420451500634869 d4d288c6
18 0.012582941415352794 0.10327954129031627 346.6979134082794 5 69.24065437316895 1765123205 False 1 7645b77c 2025-12-07_17-00-05 367.4564206600189 367.4564206600189 2272 LAPTOP-2OQK6GT5 127.0.0.1 367.4564206600189 1 False False True 0.279241869770728 0.1138413707810162 0.0 0.07531508117874008 7645b77c
19 0.012407575745987933 0.10201566081383735 346.5196530818939 5 69.19977960586547 1765123208 False 1 3256ae36 2025-12-07_17-00-08 366.00227642059326 366.00227642059326 6604 LAPTOP-2OQK6GT5 127.0.0.1 366.00227642059326 1 False False True 0.30993017979826853 0.1292131176570399 0.0 0.11201957956206357 3256ae36
20 0.012407575745987933 0.10201566081383735 344.0291979312897 5 68.71350336074829 1765123575 False 1 b0dda58b 2025-12-07_17-06-15 364.82790350914 364.82790350914 9732 LAPTOP-2OQK6GT5 127.0.0.1 364.82790350914 1 False False True 0.3149521989502957 0.11783753596277924 0.0 0.6825729339913746 b0dda58b
21 0.012429753445092291 0.10205118268939237 346.11818265914917 5 69.12530856132507 1765123581 False 1 e9d40333 2025-12-07_17-06-21 365.62638425827026 365.62638425827026 23416 LAPTOP-2OQK6GT5 127.0.0.1 365.62638425827026 1 False False True 0.5302520310849914 0.1569390945373281 0.0 0.10019443545563994 e9d40333
22 0.011990675508758594 0.10047637953978608 346.5398359298706 5 69.2183114528656 1765123948 False 1 aa89fe7a 2025-12-07_17-12-28 366.7530257701874 366.7530257701874 16200 LAPTOP-2OQK6GT5 127.0.0.1 366.7530257701874 1 False False True 0.5039700850900125 0.16208277029791282 0.0 0.6765386284546205 aa89fe7a
23 0.011968497809654236 0.10044085766423105 345.97880601882935 5 69.09321279525757 1765123951 False 1 92c48d07 2025-12-07_17-12-31 365.0942301750183 365.0942301750183 15432 LAPTOP-2OQK6GT5 127.0.0.1 365.0942301750183 1 False False True 0.33321916406589397 0.1864428656555301 0.0 0.6775297319325386 92c48d07
24 0.011968497809654236 0.10044085766423105 344.1725525856018 5 68.74226913452148 1765124318 False 1 187790d7 2025-12-07_17-18-38 364.47401189804077 364.47401189804077 24676 LAPTOP-2OQK6GT5 127.0.0.1 364.47401189804077 1 False False True 0.3372505528404193 0.2352515935896671 0.0 0.6987321324340134 187790d7
25 0.011760127958326316 0.09964993325879434 345.9427492618561 5 69.08389501571655 1765124322 False 1 442a2439 2025-12-07_17-18-42 364.755074262619 364.755074262619 7892 LAPTOP-2OQK6GT5 127.0.0.1 364.755074262619 1 False False True 0.5098036701758629 0.2122757290966333 0.0 0.6992468303721803 442a2439
26 0.011968497809654236 0.10044085766423105 345.40264558792114 5 68.98561010360717 1765124689 False 1 70862adc 2025-12-07_17-24-49 365.9752175807953 365.9752175807953 15412 LAPTOP-2OQK6GT5 127.0.0.1 365.9752175807953 1 False False True 0.3963969237347287 0.2163058925653838 0.0 0.6859176720785957 70862adc
27 0.012407575745987933 0.10201566081383735 345.8808228969574 5 69.07736506462098 1765124693 False 1 e6821f34 2025-12-07_17-24-53 365.25493717193604 365.25493717193604 26088 LAPTOP-2OQK6GT5 127.0.0.1 365.25493717193604 1 False False True 0.3668982772069688 0.2407751620351906 0.0 0.5737620270733486 e6821f34
28 0.012199205894660016 0.10122473640840064 347.05629682540894 5 69.31870231628417 1765125062 False 1 8b680875 2025-12-07_17-31-02 367.2029130458832 367.2029130458832 1720 LAPTOP-2OQK6GT5 127.0.0.1 367.2029130458832 1 False False True 0.5312495877753942 0.3193426688929859 0.0 0.591252589724218 8b680875
29 0.012429753445092291 0.10205118268939237 349.60691928863525 5 69.8253363609314 1765125068 False 1 fc54867b 2025-12-07_17-31-08 368.73608803749084 368.73608803749084 4888 LAPTOP-2OQK6GT5 127.0.0.1 368.73608803749084 1 False False True 0.5034080657304706 0.3042864908472832 0.0 0.5024906014323391 fc54867b
30 0.013385453418768206 0.10927323740570172 343.8553657531738 5 68.67559289932251 1765125432 False 1 c32d0d5e 2025-12-07_17-37-12 364.42339730262756 364.42339730262756 25808 LAPTOP-2OQK6GT5 127.0.0.1 364.42339730262756 1 False False True 0.15300672154002157 0.39848899797721926 0.0 0.5167681121564286 c32d0d5e
31 0.013537204772521452 0.10852488053708713 344.60119009017944 5 68.81447420120239 1765125436 False 1 4762fbbb 2025-12-07_17-37-16 363.3258783817291 363.3258783817291 20760 LAPTOP-2OQK6GT5 127.0.0.1 363.3258783817291 1 False False True 0.13342603167575784 0.4010104919178914 0.0 0.618812411626611 4762fbbb
32 0.011763789518968464 0.09968897796498292 344.03784108161926 5 68.71829047203065 1765125803 False 1 522ac97c 2025-12-07_17-43-23 364.7200028896332 364.7200028896332 2372 LAPTOP-2OQK6GT5 127.0.0.1 364.7200028896332 1 False False True 0.4489762005319642 0.402754966715804 0.0 0.6426372526242771 522ac97c
33 0.011650346524073398 0.09890157639017978 343.51321721076965 5 68.60030875205993 1765125805 False 1 5784f433 2025-12-07_17-43-25 362.93026328086853 362.93026328086853 22900 LAPTOP-2OQK6GT5 127.0.0.1 362.93026328086853 1 False False True 0.46204975067512033 0.192768833446102 0.0 0.6328281433384326 5784f433
34 0.011650346524073398 0.09890157639017978 343.80972242355347 5 68.66908102035522 1765126172 False 1 83af0528 2025-12-07_17-49-32 364.5850279331207 364.5850279331207 9832 LAPTOP-2OQK6GT5 127.0.0.1 364.5850279331207 1 False False True 0.4663139585990712 0.1845869678485352 0.0 0.6299207399141384 83af0528
35 0.011650346524073398 0.09890157639017978 344.11421155929565 5 68.72400512695313 1765126177 False 1 12cbaa22 2025-12-07_17-49-37 364.24684858322144 364.24684858322144 5968 LAPTOP-2OQK6GT5 127.0.0.1 364.24684858322144 1 False False True 0.47277853181431145 0.40562176755388546 0.0 0.6314990057451438 12cbaa22
36 0.011763789518968464 0.09968897796498292 348.5801889896393 5 69.61860737800598 1765126547 False 1 a3a87765 2025-12-07_17-55-47 369.27432322502136 369.27432322502136 24372 LAPTOP-2OQK6GT5 127.0.0.1 369.27432322502136 1 False False True 0.45010042945259804 0.2855696990924951 0.0 0.6351522397620386 a3a87765
37 0.0441989903761154 0.13204740781578367 347.0340585708618 5 69.31097078323364 1765126548 False 1 cf2bad0c 2025-12-07_17-55-48 366.1882207393646 366.1882207393646 3272 LAPTOP-2OQK6GT5 127.0.0.1 366.1882207393646 1 False False False 0.5890116605741096 0.283660909026841 0.0 0.4602911956047037 cf2bad0c
38 0.0441989903761154 0.13204740781578367 343.53946828842163 5 68.61563892364502 1765126916 False 1 9a9b91e7 2025-12-07_18-01-56 364.0171241760254 364.0171241760254 2272 LAPTOP-2OQK6GT5 127.0.0.1 364.0171241760254 1 False False False 0.6089594786916612 0.3646091181984181 0.0 0.46522499154449626 9a9b91e7
39 0.012199205894660016 0.10122473640840064 345.76200914382935 5 69.05782113075256 1765126922 False 1 e326d901 2025-12-07_18-02-02 365.42848086357117 365.42848086357117 24932 LAPTOP-2OQK6GT5 127.0.0.1 365.42848086357117 1 False False True 0.5932289185132622 0.37353729921136775 0.0 0.46368845919414936 e326d901
40 0.011990281344944778 0.09910429396546264 344.40758872032166 5 68.7896653175354 1765127287 False 1 ccb3f19a 2025-12-07_18-08-07 365.1469933986664 365.1469933986664 1104 LAPTOP-2OQK6GT5 127.0.0.1 365.1469933986664 1 True False True 0.6866411603181266 0.4537774266698106 0.0 0.3059281770286948 ccb3f19a
41 0.012186205997500013 0.1012282592390342 343.9386422634125 5 68.69270787239074 1765127290 False 1 8c12c55f 2025-12-07_18-08-10 363.29733777046204 363.29733777046204 19700 LAPTOP-2OQK6GT5 127.0.0.1 363.29733777046204 1 True False True 0.6710404650258701 0.44441637238072235 0.0 0.2641320116724262 8c12c55f
42 0.0662709141213666 0.16851508812176408 359.4665718078613 5 71.7971097946167 1765127672 False 1 5a62d5b6 2025-12-07_18-14-32 380.3328058719635 380.3328058719635 26528 LAPTOP-2OQK6GT5 127.0.0.1 380.3328058719635 1 True True True 0.40414134317929745 0.2010474655405967 0.0 0.59925716647257 5a62d5b6
43 0.07070075496425433 0.17390976502682037 356.3221182823181 5 71.16437225341797 1765127673 False 1 bb4495b7 2025-12-07_18-14-33 375.9771683216095 375.9771683216095 21772 LAPTOP-2OQK6GT5 127.0.0.1 375.9771683216095 1 False True False 0.39073713326110354 0.5764393142467112 0.0 0.5413963334094041 bb4495b7
44 0.01153507274885726 0.09890157639017978 344.71807885169983 5 68.8583309173584 1765128044 False 1 9d90711d 2025-12-07_18-20-44 365.7700536251068 365.7700536251068 17592 LAPTOP-2OQK6GT5 127.0.0.1 365.7700536251068 1 False False True 0.46895437796002276 0.5411583003121286 0.0 0.6350154738477746 9d90711d
45 0.01153507274885726 0.09890157639017978 343.69704604148865 5 68.64236354827881 1765128046 False 1 daaec3f8 2025-12-07_18-20-46 363.0186264514923 363.0186264514923 21292 LAPTOP-2OQK6GT5 127.0.0.1 363.0186264514923 1 False False True 0.4743507729816579 0.5213407674549528 0.0 0.6445669851749475 daaec3f8
46 0.01153507274885726 0.09890157639017978 343.6039113998413 5 68.62933912277222 1765128413 False 1 51fb5915 2025-12-07_18-26-53 364.0196588039398 364.0196588039398 21772 LAPTOP-2OQK6GT5 127.0.0.1 364.0196588039398 1 False False True 0.48541186574386475 0.5810500215434935 0.0 0.6463595394763801 51fb5915
47 0.01164485418311018 0.09964993325879434 344.2613036632538 5 68.75940155982971 1765128417 False 1 18966a33 2025-12-07_18-26-57 363.3374502658844 363.3374502658844 16900 LAPTOP-2OQK6GT5 127.0.0.1 363.3374502658844 1 False False True 0.5501591363807381 0.5132901504443755 0.0 0.6489815927562321 18966a33
48 0.012314479669876154 0.10205118268939237 345.49542331695557 5 69.01211080551147 1765128785 False 1 b67080f9 2025-12-07_18-33-05 366.01860308647156 366.01860308647156 20948 LAPTOP-2OQK6GT5 127.0.0.1 366.01860308647156 1 False False True 0.5534122098827526 0.5760738874546728 0.0 0.5609719434431071 b67080f9
49 0.07209115365923097 0.17918874278969218 351.96662616729736 5 70.29538555145264 1765128795 False 1 2533f368 2025-12-07_18-33-15 371.205295085907 371.205295085907 11208 LAPTOP-2OQK6GT5 127.0.0.1 371.205295085907 1 False True True 0.5572268058153711 0.5246075332847907 0.0 0.558307419246103 2533f368
50 0.06479949428557605 0.16493742503085831 357.1695992946625 5 71.33717932701111 1765129169 False 1 451d018d 2025-12-07_18-39-29 378.8273491859436 378.8273491859436 3616 LAPTOP-2OQK6GT5 127.0.0.1 378.8273491859436 1 False True False 0.6340187369543626 0.5494644274379972 0.0 0.6521052525663952 451d018d
51 0.04429208645222718 0.13283833222122038 349.41683983802795 5 69.77591800689697 1765129169 False 1 2256e752 2025-12-07_18-39-29 369.8801362514496 369.8801362514496 25468 LAPTOP-2OQK6GT5 127.0.0.1 369.8801362514496 1 True False False 0.6478037819045206 0.6228629446714814 0.0 0.6546094515631737 2256e752
52 0.012292301970771797 0.10201566081383735 346.071848154068 5 69.12432713508606 1765129542 False 1 0a892729 2025-12-07_18-45-42 367.237042427063 367.237042427063 26212 LAPTOP-2OQK6GT5 127.0.0.1 367.237042427063 1 False False True 0.42173310551322135 0.542928875009614 0.0 0.601586841052583 0a892729
53 0.012292301970771797 0.10201566081383735 346.42522287368774 5 69.19188222885131 1765129545 False 1 495075f5 2025-12-07_18-45-45 365.53574872016907 365.53574872016907 23604 LAPTOP-2OQK6GT5 127.0.0.1 365.53574872016907 1 False False True 0.4186754897467695 0.6318747444402091 0.0 0.5956181518703515 495075f5
54 0.011974150685190959 0.10047637953978608 346.9409854412079 5 69.29810705184937 1765129915 False 1 54c45552 2025-12-07_18-51-55 367.9469211101532 367.9469211101532 25352 LAPTOP-2OQK6GT5 127.0.0.1 367.9469211101532 1 False False True 0.46382270850905233 0.6196868829200468 0.0 0.6126115785559785 54c45552
55 0.011974150685190959 0.10047637953978608 346.4141414165497 5 69.18586716651916 1765129917 False 1 6b2e9b93 2025-12-07_18-51-57 365.9887709617615 365.9887709617615 25400 LAPTOP-2OQK6GT5 127.0.0.1 365.9887709617615 1 False False True 0.4751854264500806 0.48925010555288895 0.0 0.515482483148412 6b2e9b93
56 0.01153507274885726 0.09890157639017978 346.25940680503845 5 69.15517511367798 1765130288 False 1 e9a6b81f 2025-12-07_18-58-08 367.33222007751465 367.33222007751465 4036 LAPTOP-2OQK6GT5 127.0.0.1 367.33222007751465 1 False False True 0.4879296810791008 0.4925520261481197 0.0 0.6483489622744677 e9a6b81f
57 0.01153507274885726 0.09890157639017978 345.8425042629242 5 69.06782102584839 1765130290 False 1 076c5450 2025-12-07_18-58-10 365.1877450942993 365.1877450942993 4832 LAPTOP-2OQK6GT5 127.0.0.1 365.1877450942993 1 False False True 0.48842171509426413 0.5881329256041945 0.0 0.6569193185887352 076c5450
58 0.011875401733542455 0.10047637953978608 350.2443346977234 5 69.94839100837707 1765130664 False 1 4a42a3ea 2025-12-07_19-04-24 370.9968421459198 370.9968421459198 14912 LAPTOP-2OQK6GT5 127.0.0.1 370.9968421459198 1 False False True 0.5590357657789103 0.5940413385819063 0.0 0.6573225721220606 4a42a3ea
59 0.012080110024228227 0.10047637953978608 351.5000901222229 5 70.19009194374084 1765130669 False 1 041795f1 2025-12-07_19-04-29 370.946097612381 370.946097612381 22372 LAPTOP-2OQK6GT5 127.0.0.1 370.946097612381 1 False False True 0.5650092236486315 0.6617440972899422 0.0 0.6629504776006702 041795f1
60 0.012314479669876154 0.10205118268939237 343.53907656669617 5 68.6134319782257 1765131035 False 1 8abb3f37 2025-12-07_19-10-35 364.67463064193726 364.67463064193726 22012 LAPTOP-2OQK6GT5 127.0.0.1 364.67463064193726 1 False False True 0.48982107744168 0.4636820835063238 0.0 0.39458266779240964 8abb3f37
61 0.012314479669876154 0.10205118268939237 345.5919795036316 5 69.02381987571717 1765131040 False 1 f2cb682e 2025-12-07_19-10-40 364.90754437446594 364.90754437446594 5752 LAPTOP-2OQK6GT5 127.0.0.1 364.90754437446594 1 True False True 0.4917954659583112 0.45224829356708557 0.0 0.42597097228928366 f2cb682e
62 0.012314479669876154 0.10205118268939237 349.50936698913574 5 69.80772981643676 1765131411 False 1 463fe5e7 2025-12-07_19-16-51 370.56375885009766 370.56375885009766 16524 LAPTOP-2OQK6GT5 127.0.0.1 370.56375885009766 1 True False True 0.5373435635563055 0.5202382560972127 0.0 0.5340573143597149 463fe5e7
63 0.012083932119443879 0.10122473640840064 350.1439118385315 5 69.92809920310974 1765131415 False 1 88bbe87d 2025-12-07_19-16-55 369.54999685287476 369.54999685287476 15084 LAPTOP-2OQK6GT5 127.0.0.1 369.54999685287476 1 False False True 0.5274586910866753 0.5110782288617315 0.0 0.5368958272648865 88bbe87d
64 0.011875401733542455 0.10047637953978608 355.52406072616577 5 71.00808920860291 1765131794 False 1 33ea1cc6 2025-12-07_19-23-14 376.746440410614 376.746440410614 17380 LAPTOP-2OQK6GT5 127.0.0.1 376.746440410614 1 False False True 0.5229924883346121 0.5158065672775711 0.0 0.6679657240993034 33ea1cc6
65 0.011853224034438097 0.10044085766423105 355.67893862724304 5 71.0243070602417 1765131797 False 1 1243723e 2025-12-07_19-23-17 375.44413685798645 375.44413685798645 11232 LAPTOP-2OQK6GT5 127.0.0.1 375.44413685798645 1 False False True 0.3726772055073363 0.5573152713604742 0.0 0.6766134238094554 1243723e