Hyper param serach results
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,5 +1,5 @@
|
|||||||
~$*.docx
|
~$*.docx
|
||||||
results/
|
results/
|
||||||
__pycache__/*
|
__pycache__/
|
||||||
dataset
|
dataset
|
||||||
results
|
results
|
||||||
@@ -1,496 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 108,
|
|
||||||
"id": "93809ffc",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Requirement already satisfied: pip in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (25.3)\n",
|
|
||||||
"Note: you may need to restart the kernel to use updated packages.\n",
|
|
||||||
"Requirement already satisfied: jupyter in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (1.1.1)\n",
|
|
||||||
"Requirement already satisfied: notebook in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter) (7.5.0)\n",
|
|
||||||
"Requirement already satisfied: jupyter-console in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter) (6.6.3)\n",
|
|
||||||
"Requirement already satisfied: nbconvert in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter) (7.16.6)\n",
|
|
||||||
"Requirement already satisfied: ipykernel in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from jupyter) (7.1.0)\n",
|
|
||||||
"Requirement already satisfied: ipywidgets in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter) (8.1.8)\n",
|
|
||||||
"Requirement already satisfied: jupyterlab in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter) (4.5.0)\n",
|
|
||||||
"Requirement already satisfied: comm>=0.1.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (0.2.3)\n",
|
|
||||||
"Requirement already satisfied: debugpy>=1.6.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (1.8.17)\n",
|
|
||||||
"Requirement already satisfied: ipython>=7.23.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (9.8.0)\n",
|
|
||||||
"Requirement already satisfied: jupyter-client>=8.0.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (8.6.3)\n",
|
|
||||||
"Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (5.9.1)\n",
|
|
||||||
"Requirement already satisfied: matplotlib-inline>=0.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (0.2.1)\n",
|
|
||||||
"Requirement already satisfied: nest-asyncio>=1.4 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (1.6.0)\n",
|
|
||||||
"Requirement already satisfied: packaging>=22 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (25.0)\n",
|
|
||||||
"Requirement already satisfied: psutil>=5.7 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (7.1.3)\n",
|
|
||||||
"Requirement already satisfied: pyzmq>=25 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (27.1.0)\n",
|
|
||||||
"Requirement already satisfied: tornado>=6.2 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (6.5.2)\n",
|
|
||||||
"Requirement already satisfied: traitlets>=5.4.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel->jupyter) (5.14.3)\n",
|
|
||||||
"Requirement already satisfied: colorama>=0.4.4 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.4.6)\n",
|
|
||||||
"Requirement already satisfied: decorator>=4.3.2 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (5.2.1)\n",
|
|
||||||
"Requirement already satisfied: ipython-pygments-lexers>=1.0.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (1.1.1)\n",
|
|
||||||
"Requirement already satisfied: jedi>=0.18.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.19.2)\n",
|
|
||||||
"Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (3.0.52)\n",
|
|
||||||
"Requirement already satisfied: pygments>=2.11.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (2.19.2)\n",
|
|
||||||
"Requirement already satisfied: stack_data>=0.6.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.6.3)\n",
|
|
||||||
"Requirement already satisfied: typing_extensions>=4.6 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (4.15.0)\n",
|
|
||||||
"Requirement already satisfied: wcwidth in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=7.23.1->ipykernel->jupyter) (0.2.14)\n",
|
|
||||||
"Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from jedi>=0.18.1->ipython>=7.23.1->ipykernel->jupyter) (0.8.5)\n",
|
|
||||||
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from jupyter-client>=8.0.0->ipykernel->jupyter) (2.9.0.post0)\n",
|
|
||||||
"Requirement already satisfied: platformdirs>=2.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel->jupyter) (4.5.1)\n",
|
|
||||||
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from python-dateutil>=2.8.2->jupyter-client>=8.0.0->ipykernel->jupyter) (1.17.0)\n",
|
|
||||||
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel->jupyter) (2.2.1)\n",
|
|
||||||
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel->jupyter) (3.0.1)\n",
|
|
||||||
"Requirement already satisfied: pure-eval in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel->jupyter) (0.2.3)\n",
|
|
||||||
"Requirement already satisfied: widgetsnbextension~=4.0.14 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ipywidgets->jupyter) (4.0.15)\n",
|
|
||||||
"Requirement already satisfied: jupyterlab_widgets~=3.0.15 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ipywidgets->jupyter) (3.0.16)\n",
|
|
||||||
"Requirement already satisfied: async-lru>=1.0.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab->jupyter) (2.0.5)\n",
|
|
||||||
"Requirement already satisfied: httpx<1,>=0.25.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab->jupyter) (0.28.1)\n",
|
|
||||||
"Requirement already satisfied: jinja2>=3.0.3 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab->jupyter) (3.1.6)\n",
|
|
||||||
"Requirement already satisfied: jupyter-lsp>=2.0.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab->jupyter) (2.3.0)\n",
|
|
||||||
"Requirement already satisfied: jupyter-server<3,>=2.4.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab->jupyter) (2.17.0)\n",
|
|
||||||
"Requirement already satisfied: jupyterlab-server<3,>=2.28.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab->jupyter) (2.28.0)\n",
|
|
||||||
"Requirement already satisfied: notebook-shim>=0.2 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab->jupyter) (0.2.4)\n",
|
|
||||||
"Requirement already satisfied: setuptools>=41.1.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab->jupyter) (65.5.0)\n",
|
|
||||||
"Requirement already satisfied: anyio in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (4.12.0)\n",
|
|
||||||
"Requirement already satisfied: certifi in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (2025.11.12)\n",
|
|
||||||
"Requirement already satisfied: httpcore==1.* in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (1.0.9)\n",
|
|
||||||
"Requirement already satisfied: idna in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (3.11)\n",
|
|
||||||
"Requirement already satisfied: h11>=0.16 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from httpcore==1.*->httpx<1,>=0.25.0->jupyterlab->jupyter) (0.16.0)\n",
|
|
||||||
"Requirement already satisfied: argon2-cffi>=21.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (25.1.0)\n",
|
|
||||||
"Requirement already satisfied: jupyter-events>=0.11.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.12.0)\n",
|
|
||||||
"Requirement already satisfied: jupyter-server-terminals>=0.4.4 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.5.3)\n",
|
|
||||||
"Requirement already satisfied: nbformat>=5.3.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (5.10.4)\n",
|
|
||||||
"Requirement already satisfied: overrides>=5.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (7.7.0)\n",
|
|
||||||
"Requirement already satisfied: prometheus-client>=0.9 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.23.1)\n",
|
|
||||||
"Requirement already satisfied: pywinpty>=2.0.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (3.0.2)\n",
|
|
||||||
"Requirement already satisfied: send2trash>=1.8.2 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.8.3)\n",
|
|
||||||
"Requirement already satisfied: terminado>=0.8.3 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.18.1)\n",
|
|
||||||
"Requirement already satisfied: websocket-client>=1.7 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.9.0)\n",
|
|
||||||
"Requirement already satisfied: babel>=2.10 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (2.17.0)\n",
|
|
||||||
"Requirement already satisfied: json5>=0.9.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (0.12.1)\n",
|
|
||||||
"Requirement already satisfied: jsonschema>=4.18.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (4.25.1)\n",
|
|
||||||
"Requirement already satisfied: requests>=2.31 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (2.32.5)\n",
|
|
||||||
"Requirement already satisfied: argon2-cffi-bindings in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (25.1.0)\n",
|
|
||||||
"Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jinja2>=3.0.3->jupyterlab->jupyter) (3.0.3)\n",
|
|
||||||
"Requirement already satisfied: attrs>=22.2.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (25.4.0)\n",
|
|
||||||
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (2025.9.1)\n",
|
|
||||||
"Requirement already satisfied: referencing>=0.28.4 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (0.37.0)\n",
|
|
||||||
"Requirement already satisfied: rpds-py>=0.7.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (0.30.0)\n",
|
|
||||||
"Requirement already satisfied: python-json-logger>=2.0.4 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (4.0.0)\n",
|
|
||||||
"Requirement already satisfied: pyyaml>=5.3 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (6.0.2)\n",
|
|
||||||
"Requirement already satisfied: rfc3339-validator in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.1.4)\n",
|
|
||||||
"Requirement already satisfied: rfc3986-validator>=0.1.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.1.1)\n",
|
|
||||||
"Requirement already satisfied: fqdn in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.5.1)\n",
|
|
||||||
"Requirement already satisfied: isoduration in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (20.11.0)\n",
|
|
||||||
"Requirement already satisfied: jsonpointer>1.13 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (3.0.0)\n",
|
|
||||||
"Requirement already satisfied: rfc3987-syntax>=1.1.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.1.0)\n",
|
|
||||||
"Requirement already satisfied: uri-template in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.3.0)\n",
|
|
||||||
"Requirement already satisfied: webcolors>=24.6.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (25.10.0)\n",
|
|
||||||
"Requirement already satisfied: beautifulsoup4 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from nbconvert->jupyter) (4.14.3)\n",
|
|
||||||
"Requirement already satisfied: bleach!=5.0.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from bleach[css]!=5.0.0->nbconvert->jupyter) (6.3.0)\n",
|
|
||||||
"Requirement already satisfied: defusedxml in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from nbconvert->jupyter) (0.7.1)\n",
|
|
||||||
"Requirement already satisfied: jupyterlab-pygments in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from nbconvert->jupyter) (0.3.0)\n",
|
|
||||||
"Requirement already satisfied: mistune<4,>=2.0.3 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from nbconvert->jupyter) (3.1.4)\n",
|
|
||||||
"Requirement already satisfied: nbclient>=0.5.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from nbconvert->jupyter) (0.10.2)\n",
|
|
||||||
"Requirement already satisfied: pandocfilters>=1.4.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from nbconvert->jupyter) (1.5.1)\n",
|
|
||||||
"Requirement already satisfied: webencodings in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from bleach!=5.0.0->bleach[css]!=5.0.0->nbconvert->jupyter) (0.5.1)\n",
|
|
||||||
"Requirement already satisfied: tinycss2<1.5,>=1.1.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from bleach[css]!=5.0.0->nbconvert->jupyter) (1.4.0)\n",
|
|
||||||
"Requirement already satisfied: fastjsonschema>=2.15 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from nbformat>=5.3.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2.21.2)\n",
|
|
||||||
"Requirement already satisfied: charset_normalizer<4,>=2 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests>=2.31->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (3.4.4)\n",
|
|
||||||
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests>=2.31->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (2.6.0)\n",
|
|
||||||
"Requirement already satisfied: lark>=1.2.2 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from rfc3987-syntax>=1.1.0->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.3.1)\n",
|
|
||||||
"Requirement already satisfied: cffi>=1.0.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from argon2-cffi-bindings->argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2.0.0)\n",
|
|
||||||
"Requirement already satisfied: pycparser in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2.23)\n",
|
|
||||||
"Requirement already satisfied: soupsieve>=1.6.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from beautifulsoup4->nbconvert->jupyter) (2.8)\n",
|
|
||||||
"Requirement already satisfied: arrow>=0.15.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.4.0)\n",
|
|
||||||
"Requirement already satisfied: tzdata in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from arrow>=0.15.0->isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2025.2)\n",
|
|
||||||
"Note: you may need to restart the kernel to use updated packages.\n",
|
|
||||||
"Requirement already satisfied: ipywidgets in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (8.1.8)\n",
|
|
||||||
"Requirement already satisfied: comm>=0.1.3 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipywidgets) (0.2.3)\n",
|
|
||||||
"Requirement already satisfied: ipython>=6.1.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipywidgets) (9.8.0)\n",
|
|
||||||
"Requirement already satisfied: traitlets>=4.3.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipywidgets) (5.14.3)\n",
|
|
||||||
"Requirement already satisfied: widgetsnbextension~=4.0.14 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ipywidgets) (4.0.15)\n",
|
|
||||||
"Requirement already satisfied: jupyterlab_widgets~=3.0.15 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ipywidgets) (3.0.16)\n",
|
|
||||||
"Requirement already satisfied: colorama>=0.4.4 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=6.1.0->ipywidgets) (0.4.6)\n",
|
|
||||||
"Requirement already satisfied: decorator>=4.3.2 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=6.1.0->ipywidgets) (5.2.1)\n",
|
|
||||||
"Requirement already satisfied: ipython-pygments-lexers>=1.0.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=6.1.0->ipywidgets) (1.1.1)\n",
|
|
||||||
"Requirement already satisfied: jedi>=0.18.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=6.1.0->ipywidgets) (0.19.2)\n",
|
|
||||||
"Requirement already satisfied: matplotlib-inline>=0.1.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=6.1.0->ipywidgets) (0.2.1)\n",
|
|
||||||
"Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=6.1.0->ipywidgets) (3.0.52)\n",
|
|
||||||
"Requirement already satisfied: pygments>=2.11.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=6.1.0->ipywidgets) (2.19.2)\n",
|
|
||||||
"Requirement already satisfied: stack_data>=0.6.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=6.1.0->ipywidgets) (0.6.3)\n",
|
|
||||||
"Requirement already satisfied: typing_extensions>=4.6 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=6.1.0->ipywidgets) (4.15.0)\n",
|
|
||||||
"Requirement already satisfied: wcwidth in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets) (0.2.14)\n",
|
|
||||||
"Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from jedi>=0.18.1->ipython>=6.1.0->ipywidgets) (0.8.5)\n",
|
|
||||||
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (2.2.1)\n",
|
|
||||||
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (3.0.1)\n",
|
|
||||||
"Requirement already satisfied: pure-eval in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (0.2.3)\n",
|
|
||||||
"Note: you may need to restart the kernel to use updated packages.\n",
|
|
||||||
"Requirement already satisfied: ipykernel in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (7.1.0)\n",
|
|
||||||
"Requirement already satisfied: comm>=0.1.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (0.2.3)\n",
|
|
||||||
"Requirement already satisfied: debugpy>=1.6.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (1.8.17)\n",
|
|
||||||
"Requirement already satisfied: ipython>=7.23.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (9.8.0)\n",
|
|
||||||
"Requirement already satisfied: jupyter-client>=8.0.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (8.6.3)\n",
|
|
||||||
"Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (5.9.1)\n",
|
|
||||||
"Requirement already satisfied: matplotlib-inline>=0.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (0.2.1)\n",
|
|
||||||
"Requirement already satisfied: nest-asyncio>=1.4 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (1.6.0)\n",
|
|
||||||
"Requirement already satisfied: packaging>=22 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (25.0)\n",
|
|
||||||
"Requirement already satisfied: psutil>=5.7 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (7.1.3)\n",
|
|
||||||
"Requirement already satisfied: pyzmq>=25 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (27.1.0)\n",
|
|
||||||
"Requirement already satisfied: tornado>=6.2 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (6.5.2)\n",
|
|
||||||
"Requirement already satisfied: traitlets>=5.4.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipykernel) (5.14.3)\n",
|
|
||||||
"Requirement already satisfied: colorama>=0.4.4 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel) (0.4.6)\n",
|
|
||||||
"Requirement already satisfied: decorator>=4.3.2 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel) (5.2.1)\n",
|
|
||||||
"Requirement already satisfied: ipython-pygments-lexers>=1.0.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel) (1.1.1)\n",
|
|
||||||
"Requirement already satisfied: jedi>=0.18.1 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel) (0.19.2)\n",
|
|
||||||
"Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel) (3.0.52)\n",
|
|
||||||
"Requirement already satisfied: pygments>=2.11.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel) (2.19.2)\n",
|
|
||||||
"Requirement already satisfied: stack_data>=0.6.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel) (0.6.3)\n",
|
|
||||||
"Requirement already satisfied: typing_extensions>=4.6 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from ipython>=7.23.1->ipykernel) (4.15.0)\n",
|
|
||||||
"Requirement already satisfied: wcwidth in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=7.23.1->ipykernel) (0.2.14)\n",
|
|
||||||
"Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from jedi>=0.18.1->ipython>=7.23.1->ipykernel) (0.8.5)\n",
|
|
||||||
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from jupyter-client>=8.0.0->ipykernel) (2.9.0.post0)\n",
|
|
||||||
"Requirement already satisfied: platformdirs>=2.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel) (4.5.1)\n",
|
|
||||||
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from python-dateutil>=2.8.2->jupyter-client>=8.0.0->ipykernel) (1.17.0)\n",
|
|
||||||
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel) (2.2.1)\n",
|
|
||||||
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel) (3.0.1)\n",
|
|
||||||
"Requirement already satisfied: pure-eval in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel) (0.2.3)\n",
|
|
||||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"%pip install --upgrade pip\n",
|
|
||||||
"%pip install --upgrade jupyter\n",
|
|
||||||
"%pip install --upgrade ipywidgets\n",
|
|
||||||
"%pip install --upgrade ipykernel"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 109,
|
|
||||||
"id": "48724594",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Requirement already satisfied: pdf2image in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (1.17.0)\n",
|
|
||||||
"Requirement already satisfied: pillow in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (12.0.0)\n",
|
|
||||||
"Note: you may need to restart the kernel to use updated packages.\n",
|
|
||||||
"Requirement already satisfied: PyMuPDF in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (1.26.6)\n",
|
|
||||||
"Note: you may need to restart the kernel to use updated packages.\n",
|
|
||||||
"Requirement already satisfied: pandas in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (2.3.3)\n",
|
|
||||||
"Requirement already satisfied: numpy>=1.23.2 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pandas) (2.3.5)\n",
|
|
||||||
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from pandas) (2.9.0.post0)\n",
|
|
||||||
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pandas) (2025.2)\n",
|
|
||||||
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pandas) (2025.2)\n",
|
|
||||||
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n",
|
|
||||||
"Note: you may need to restart the kernel to use updated packages.\n",
|
|
||||||
"Requirement already satisfied: matplotlib in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (3.10.7)\n",
|
|
||||||
"Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib) (1.3.3)\n",
|
|
||||||
"Requirement already satisfied: cycler>=0.10 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib) (0.12.1)\n",
|
|
||||||
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib) (4.61.0)\n",
|
|
||||||
"Requirement already satisfied: kiwisolver>=1.3.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib) (1.4.9)\n",
|
|
||||||
"Requirement already satisfied: numpy>=1.23 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib) (2.3.5)\n",
|
|
||||||
"Requirement already satisfied: packaging>=20.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from matplotlib) (25.0)\n",
|
|
||||||
"Requirement already satisfied: pillow>=8 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib) (12.0.0)\n",
|
|
||||||
"Requirement already satisfied: pyparsing>=3 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib) (3.2.5)\n",
|
|
||||||
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from matplotlib) (2.9.0.post0)\n",
|
|
||||||
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from python-dateutil>=2.7->matplotlib) (1.17.0)\n",
|
|
||||||
"Note: you may need to restart the kernel to use updated packages.\n",
|
|
||||||
"Requirement already satisfied: seaborn in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (0.13.2)\n",
|
|
||||||
"Requirement already satisfied: numpy!=1.24.0,>=1.20 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from seaborn) (2.3.5)\n",
|
|
||||||
"Requirement already satisfied: pandas>=1.2 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from seaborn) (2.3.3)\n",
|
|
||||||
"Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from seaborn) (3.10.7)\n",
|
|
||||||
"Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.3.3)\n",
|
|
||||||
"Requirement already satisfied: cycler>=0.10 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (0.12.1)\n",
|
|
||||||
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (4.61.0)\n",
|
|
||||||
"Requirement already satisfied: kiwisolver>=1.3.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.4.9)\n",
|
|
||||||
"Requirement already satisfied: packaging>=20.0 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (25.0)\n",
|
|
||||||
"Requirement already satisfied: pillow>=8 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (12.0.0)\n",
|
|
||||||
"Requirement already satisfied: pyparsing>=3 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (3.2.5)\n",
|
|
||||||
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (2.9.0.post0)\n",
|
|
||||||
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pandas>=1.2->seaborn) (2025.2)\n",
|
|
||||||
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\sergio\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pandas>=1.2->seaborn) (2025.2)\n",
|
|
||||||
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\appdata\\roaming\\python\\python311\\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn) (1.17.0)\n",
|
|
||||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"# Install necessary packages\n",
|
|
||||||
"%pip install pdf2image pillow \n",
|
|
||||||
"# pdf reading\n",
|
|
||||||
"%pip install PyMuPDF\n",
|
|
||||||
"\n",
|
|
||||||
"# Data analysis and visualization\n",
|
|
||||||
"%pip install pandas\n",
|
|
||||||
"%pip install matplotlib\n",
|
|
||||||
"%pip install seaborn"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 110,
|
|
||||||
"id": "e1f793b6",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import os, json\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"import matplotlib.pyplot as plt\n",
|
|
||||||
"from pdf2image import convert_from_path\n",
|
|
||||||
"from PIL import Image, ImageOps\n",
|
|
||||||
"import fitz # PyMuPDF\n",
|
|
||||||
"import re\n",
|
|
||||||
"from datetime import datetime\n",
|
|
||||||
"from typing import List\n",
|
|
||||||
"import shutil"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 111,
|
|
||||||
"id": "1652a78e",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def pdf_to_images(pdf_path: str, output_dir: str, dpi: int = 300):\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" Render a PDF into a list of PIL Images using PyMuPDF or pdf2image.\n",
|
|
||||||
" 'pages' is 1-based (e.g., range(1, 10) -> pages 1–9).\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" if fitz is not None:\n",
|
|
||||||
" doc = fitz.open(pdf_path)\n",
|
|
||||||
" total_pages = len(doc)\n",
|
|
||||||
"\n",
|
|
||||||
" # Adjust page indices (PyMuPDF uses 0-based indexing)\n",
|
|
||||||
" page_indices = list(range(total_pages))\n",
|
|
||||||
"\n",
|
|
||||||
" for i in page_indices:\n",
|
|
||||||
" page = doc.load_page(i)\n",
|
|
||||||
" mat = fitz.Matrix(dpi / 72.0, dpi / 72.0)\n",
|
|
||||||
" pix = page.get_pixmap(matrix=mat, alpha=False)\n",
|
|
||||||
" img = Image.frombytes(\"RGB\", [pix.width, pix.height], pix.samples)\n",
|
|
||||||
" # Build filename\n",
|
|
||||||
" out_path = os.path.join(\n",
|
|
||||||
" output_dir,\n",
|
|
||||||
" f\"page_{i + 1:04d}.png\"\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
" img.save(out_path, \"PNG\")\n",
|
|
||||||
" doc.close()\n",
|
|
||||||
" else:\n",
|
|
||||||
" raise RuntimeError(\"Install PyMuPDF or pdf2image to convert PDFs.\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 112,
|
|
||||||
"id": "f523dd58",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import fitz\n",
|
|
||||||
"import re\n",
|
|
||||||
"import os\n",
|
|
||||||
"\n",
|
|
||||||
"def _pdf_extract_text_structured(page, margin_threshold=50):\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" Extract text using PyMuPDF's dict mode which preserves\n",
|
|
||||||
" the actual line structure from the PDF.\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" data = page.get_text(\"dict\")\n",
|
|
||||||
" \n",
|
|
||||||
" # Collect all lines with their Y position\n",
|
|
||||||
" all_lines = []\n",
|
|
||||||
" margin_text_parts = [] # Collect vertical/margin text\n",
|
|
||||||
" margin_y_positions = []\n",
|
|
||||||
" \n",
|
|
||||||
" for block in data.get(\"blocks\", []):\n",
|
|
||||||
" if block.get(\"type\") != 0: # Skip non-text blocks\n",
|
|
||||||
" continue\n",
|
|
||||||
" \n",
|
|
||||||
" block_bbox = block.get(\"bbox\", (0, 0, 0, 0))\n",
|
|
||||||
" block_width = block_bbox[2] - block_bbox[0]\n",
|
|
||||||
" block_height = block_bbox[3] - block_bbox[1]\n",
|
|
||||||
" \n",
|
|
||||||
" # Detect vertical/margin text\n",
|
|
||||||
" is_margin_text = (block_bbox[0] < margin_threshold or \n",
|
|
||||||
" block_height > block_width * 2)\n",
|
|
||||||
" \n",
|
|
||||||
" for line in block.get(\"lines\", []):\n",
|
|
||||||
" direction = line.get(\"dir\", (1, 0))\n",
|
|
||||||
" bbox = line.get(\"bbox\", (0, 0, 0, 0))\n",
|
|
||||||
" y_center = (bbox[1] + bbox[3]) / 2\n",
|
|
||||||
" x_start = bbox[0]\n",
|
|
||||||
" \n",
|
|
||||||
" # Collect text from all spans\n",
|
|
||||||
" line_text = \"\"\n",
|
|
||||||
" for span in line.get(\"spans\", []):\n",
|
|
||||||
" text = span.get(\"text\", \"\")\n",
|
|
||||||
" line_text += text\n",
|
|
||||||
" \n",
|
|
||||||
" line_text = line_text.strip()\n",
|
|
||||||
" line_text = re.sub(r\"[•▪◦●❖▶■\\uf000-\\uf0ff]\", \"\", line_text)\n",
|
|
||||||
" \n",
|
|
||||||
" if not line_text:\n",
|
|
||||||
" continue\n",
|
|
||||||
" \n",
|
|
||||||
" # Check if this is margin/vertical text\n",
|
|
||||||
" if is_margin_text or abs(direction[0]) < 0.9:\n",
|
|
||||||
" margin_text_parts.append((y_center, line_text))\n",
|
|
||||||
" margin_y_positions.append(y_center)\n",
|
|
||||||
" else:\n",
|
|
||||||
" all_lines.append((y_center, x_start, line_text))\n",
|
|
||||||
" \n",
|
|
||||||
" # Reconstruct margin text as single line at its vertical center\n",
|
|
||||||
" if margin_text_parts:\n",
|
|
||||||
" # Sort by Y position (top to bottom) and join\n",
|
|
||||||
" margin_text_parts.sort(key=lambda x: x[0])\n",
|
|
||||||
" full_margin_text = \" \".join(part[1] for part in margin_text_parts)\n",
|
|
||||||
" # Calculate vertical center of the watermark\n",
|
|
||||||
" avg_y = sum(margin_y_positions) / len(margin_y_positions)\n",
|
|
||||||
" # Add as a single line\n",
|
|
||||||
" all_lines.append((avg_y, -1, full_margin_text)) # x=-1 to sort first\n",
|
|
||||||
" \n",
|
|
||||||
" if not all_lines:\n",
|
|
||||||
" return \"\"\n",
|
|
||||||
" \n",
|
|
||||||
" # Sort by Y first, then by X\n",
|
|
||||||
" all_lines.sort(key=lambda x: (x[0], x[1]))\n",
|
|
||||||
" \n",
|
|
||||||
" # Group lines at same vertical position\n",
|
|
||||||
" merged_rows = []\n",
|
|
||||||
" current_row = [all_lines[0]]\n",
|
|
||||||
" current_y = all_lines[0][0]\n",
|
|
||||||
" \n",
|
|
||||||
" for y_center, x_start, text in all_lines[1:]:\n",
|
|
||||||
" if abs(y_center - current_y) <= 2:\n",
|
|
||||||
" current_row.append((y_center, x_start, text))\n",
|
|
||||||
" else:\n",
|
|
||||||
" current_row.sort(key=lambda x: x[1])\n",
|
|
||||||
" row_text = \" \".join(item[2] for item in current_row)\n",
|
|
||||||
" merged_rows.append((current_y, row_text))\n",
|
|
||||||
" current_row = [(y_center, x_start, text)]\n",
|
|
||||||
" current_y = y_center\n",
|
|
||||||
" \n",
|
|
||||||
" if current_row:\n",
|
|
||||||
" current_row.sort(key=lambda x: x[1])\n",
|
|
||||||
" row_text = \" \".join(item[2] for item in current_row)\n",
|
|
||||||
" merged_rows.append((current_y, row_text))\n",
|
|
||||||
" \n",
|
|
||||||
" # Sort rows by Y and extract text\n",
|
|
||||||
" merged_rows.sort(key=lambda x: x[0])\n",
|
|
||||||
" lines = [row[1] for row in merged_rows]\n",
|
|
||||||
" \n",
|
|
||||||
" # Join and clean up\n",
|
|
||||||
" text = \"\\n\".join(lines)\n",
|
|
||||||
" text = re.sub(r\" +\", \" \", text).strip()\n",
|
|
||||||
" text = re.sub(r\"\\n{3,}\", \"\\n\\n\", text).strip()\n",
|
|
||||||
" \n",
|
|
||||||
" return text\n",
|
|
||||||
"\n",
|
|
||||||
"def pdf_extract_text(pdf_path, output_dir, margin_threshold=50):\n",
|
|
||||||
" os.makedirs(output_dir, exist_ok=True)\n",
|
|
||||||
" doc = fitz.open(pdf_path)\n",
|
|
||||||
" \n",
|
|
||||||
" for i, page in enumerate(doc):\n",
|
|
||||||
" text = _pdf_extract_text_structured(page, margin_threshold)\n",
|
|
||||||
" if not text.strip():\n",
|
|
||||||
" continue\n",
|
|
||||||
" out_path = os.path.join(output_dir, f\"page_{i + 1:04d}.txt\")\n",
|
|
||||||
" with open(out_path, \"w\", encoding=\"utf-8\") as f:\n",
|
|
||||||
" f.write(text)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 113,
|
|
||||||
"id": "9f64a8c0",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"PDF_FOLDER = './instructions' # Folder containing PDF files\n",
|
|
||||||
"OUTPUT_FOLDER = './dataset'\n",
|
|
||||||
"\n",
|
|
||||||
"os.makedirs(OUTPUT_FOLDER, exist_ok=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 114,
|
|
||||||
"id": "41e4651d",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"i = 0\n",
|
|
||||||
"\n",
|
|
||||||
"pdf_files = sorted([\n",
|
|
||||||
" fname for fname in os.listdir(PDF_FOLDER)\n",
|
|
||||||
" if fname.lower().endswith(\".pdf\")\n",
|
|
||||||
"])\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"for fname in pdf_files:\n",
|
|
||||||
" # build output directories\n",
|
|
||||||
" out_img_path = os.path.join(OUTPUT_FOLDER, str(i), \"img\")\n",
|
|
||||||
" out_txt_path = os.path.join(OUTPUT_FOLDER, str(i), \"txt\")\n",
|
|
||||||
"\n",
|
|
||||||
" os.makedirs(out_img_path, exist_ok=True)\n",
|
|
||||||
" os.makedirs(out_txt_path, exist_ok=True)\n",
|
|
||||||
"\n",
|
|
||||||
" # source and destination PDF paths\n",
|
|
||||||
" src_pdf = os.path.join(PDF_FOLDER, fname)\n",
|
|
||||||
" pdf_path = os.path.join(OUTPUT_FOLDER, str(i), fname)\n",
|
|
||||||
"\n",
|
|
||||||
" # copy PDF into numbered folder\n",
|
|
||||||
" shutil.copy(src_pdf, pdf_path)\n",
|
|
||||||
"\n",
|
|
||||||
" # convert PDF → images\n",
|
|
||||||
" pdf_to_images(\n",
|
|
||||||
" pdf_path=pdf_path,\n",
|
|
||||||
" output_dir=out_img_path,\n",
|
|
||||||
" dpi=300\n",
|
|
||||||
" )\n",
|
|
||||||
" pdf_extract_text(\n",
|
|
||||||
" pdf_path=pdf_path,\n",
|
|
||||||
" output_dir=out_txt_path,\n",
|
|
||||||
" margin_threshold=40\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
" i += 1"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.11.9"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
||||||
File diff suppressed because one or more lines are too long
@@ -112,13 +112,23 @@ def assemble_from_paddle_result(paddleocr_predict, min_score=0.0, line_tol_facto
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
# dataset root folder
|
||||||
parser.add_argument("--pdf-folder", required=True)
|
parser.add_argument("--pdf-folder", required=True)
|
||||||
|
#Whether to use document image orientation classification.
|
||||||
|
parser.add_argument("--use-doc-orientation-classify", type=lambda s: s.lower()=="true", default=False)
|
||||||
|
# Whether to use text image unwarping.
|
||||||
|
parser.add_argument("--use-doc-unwarping", type=lambda s: s.lower()=="true", default=False)
|
||||||
|
# Whether to use text line orientation classification.
|
||||||
parser.add_argument("--textline-orientation", type=lambda s: s.lower()=="true", default=True)
|
parser.add_argument("--textline-orientation", type=lambda s: s.lower()=="true", default=True)
|
||||||
parser.add_argument("--text-det-box-thresh", type=float, default=0.6)
|
# Detection pixel threshold for the text detection model. Pixels with scores greater than this threshold in the output probability map are considered text pixels.
|
||||||
|
parser.add_argument("--text-det-thresh", type=float, default=0.0)
|
||||||
|
# Detection box threshold for the text detection model. A detection result is considered a text region if the average score of all pixels within the border of the result is greater than this threshold.
|
||||||
|
parser.add_argument("--text-det-box-thresh", type=float, default=0.0)
|
||||||
|
# Text detection expansion coefficient, which expands the text region using this method. The larger the value, the larger the expansion area.
|
||||||
parser.add_argument("--text-det-unclip-ratio", type=float, default=1.5)
|
parser.add_argument("--text-det-unclip-ratio", type=float, default=1.5)
|
||||||
|
# Text recognition threshold. Text results with scores greater than this threshold are retained.
|
||||||
parser.add_argument("--text-rec-score-thresh", type=float, default=0.0)
|
parser.add_argument("--text-rec-score-thresh", type=float, default=0.0)
|
||||||
parser.add_argument("--line-tolerance", type=float, default=0.6)
|
# text location
|
||||||
parser.add_argument("--min-box-score", type=float, default=0.0)
|
|
||||||
parser.add_argument("--lang", default="es")
|
parser.add_argument("--lang", default="es")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
@@ -140,12 +150,15 @@ def main():
|
|||||||
tp0 = time.time()
|
tp0 = time.time()
|
||||||
out = ocr.predict(
|
out = ocr.predict(
|
||||||
arr,
|
arr,
|
||||||
|
use_doc_orientation_classify=args.use_doc_orientation_classify,
|
||||||
|
use_doc_unwarping=args.use_doc_unwarping,
|
||||||
|
use_textline_orientation=args.textline_orientation, #str2bool Whether to use text line orientation classification.
|
||||||
|
text_det_thresh=args.text_det_thresh,
|
||||||
text_det_box_thresh=args.text_det_box_thresh,
|
text_det_box_thresh=args.text_det_box_thresh,
|
||||||
text_det_unclip_ratio=args.text_det_unclip_ratio,
|
text_det_unclip_ratio=args.text_det_unclip_ratio,
|
||||||
text_rec_score_thresh=args.text_rec_score_thresh,
|
text_rec_score_thresh=args.text_rec_score_thresh
|
||||||
use_textline_orientation=args.textline_orientation
|
|
||||||
)
|
)
|
||||||
pred = assemble_from_paddle_result(out, args.min_box_score, args.line_tolerance)
|
pred = assemble_from_paddle_result(out)
|
||||||
time_per_page_list.append(float(time.time() - tp0))
|
time_per_page_list.append(float(time.time() - tp0))
|
||||||
m = evaluate_text(ref, pred)
|
m = evaluate_text(ref, pred)
|
||||||
cer_list.append(m["CER"])
|
cer_list.append(m["CER"])
|
||||||
504
src/prepare_dataset.ipynb
Normal file
504
src/prepare_dataset.ipynb
Normal file
@@ -0,0 +1,504 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "93809ffc",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Requirement already satisfied: pip in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (25.3)\n",
|
||||||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||||
|
"Requirement already satisfied: jupyter in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (1.1.1)\n",
|
||||||
|
"Requirement already satisfied: notebook in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter) (7.5.0)\n",
|
||||||
|
"Requirement already satisfied: jupyter-console in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter) (6.6.3)\n",
|
||||||
|
"Requirement already satisfied: nbconvert in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter) (7.16.6)\n",
|
||||||
|
"Requirement already satisfied: ipykernel in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter) (7.1.0)\n",
|
||||||
|
"Requirement already satisfied: ipywidgets in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter) (8.1.8)\n",
|
||||||
|
"Requirement already satisfied: jupyterlab in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter) (4.5.0)\n",
|
||||||
|
"Requirement already satisfied: comm>=0.1.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (0.2.3)\n",
|
||||||
|
"Requirement already satisfied: debugpy>=1.6.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (1.8.17)\n",
|
||||||
|
"Requirement already satisfied: ipython>=7.23.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (9.8.0)\n",
|
||||||
|
"Requirement already satisfied: jupyter-client>=8.0.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (8.6.3)\n",
|
||||||
|
"Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (5.9.1)\n",
|
||||||
|
"Requirement already satisfied: matplotlib-inline>=0.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (0.2.1)\n",
|
||||||
|
"Requirement already satisfied: nest-asyncio>=1.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (1.6.0)\n",
|
||||||
|
"Requirement already satisfied: packaging>=22 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (25.0)\n",
|
||||||
|
"Requirement already satisfied: psutil>=5.7 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (7.1.3)\n",
|
||||||
|
"Requirement already satisfied: pyzmq>=25 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (27.1.0)\n",
|
||||||
|
"Requirement already satisfied: tornado>=6.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (6.5.2)\n",
|
||||||
|
"Requirement already satisfied: traitlets>=5.4.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel->jupyter) (5.14.3)\n",
|
||||||
|
"Requirement already satisfied: colorama>=0.4.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.4.6)\n",
|
||||||
|
"Requirement already satisfied: decorator>=4.3.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (5.2.1)\n",
|
||||||
|
"Requirement already satisfied: ipython-pygments-lexers>=1.0.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (1.1.1)\n",
|
||||||
|
"Requirement already satisfied: jedi>=0.18.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.19.2)\n",
|
||||||
|
"Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (3.0.52)\n",
|
||||||
|
"Requirement already satisfied: pygments>=2.11.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (2.19.2)\n",
|
||||||
|
"Requirement already satisfied: stack_data>=0.6.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.6.3)\n",
|
||||||
|
"Requirement already satisfied: typing_extensions>=4.6 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel->jupyter) (4.15.0)\n",
|
||||||
|
"Requirement already satisfied: wcwidth in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=7.23.1->ipykernel->jupyter) (0.2.14)\n",
|
||||||
|
"Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jedi>=0.18.1->ipython>=7.23.1->ipykernel->jupyter) (0.8.5)\n",
|
||||||
|
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-client>=8.0.0->ipykernel->jupyter) (2.9.0.post0)\n",
|
||||||
|
"Requirement already satisfied: platformdirs>=2.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel->jupyter) (4.5.1)\n",
|
||||||
|
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from python-dateutil>=2.8.2->jupyter-client>=8.0.0->ipykernel->jupyter) (1.17.0)\n",
|
||||||
|
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel->jupyter) (2.2.1)\n",
|
||||||
|
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel->jupyter) (3.0.1)\n",
|
||||||
|
"Requirement already satisfied: pure-eval in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel->jupyter) (0.2.3)\n",
|
||||||
|
"Requirement already satisfied: widgetsnbextension~=4.0.14 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets->jupyter) (4.0.15)\n",
|
||||||
|
"Requirement already satisfied: jupyterlab_widgets~=3.0.15 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets->jupyter) (3.0.16)\n",
|
||||||
|
"Requirement already satisfied: async-lru>=1.0.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (2.0.5)\n",
|
||||||
|
"Requirement already satisfied: httpx<1,>=0.25.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (0.28.1)\n",
|
||||||
|
"Requirement already satisfied: jinja2>=3.0.3 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (3.1.6)\n",
|
||||||
|
"Requirement already satisfied: jupyter-lsp>=2.0.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (2.3.0)\n",
|
||||||
|
"Requirement already satisfied: jupyter-server<3,>=2.4.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (2.17.0)\n",
|
||||||
|
"Requirement already satisfied: jupyterlab-server<3,>=2.28.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (2.28.0)\n",
|
||||||
|
"Requirement already satisfied: notebook-shim>=0.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (0.2.4)\n",
|
||||||
|
"Requirement already satisfied: setuptools>=41.1.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab->jupyter) (65.5.0)\n",
|
||||||
|
"Requirement already satisfied: anyio in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (4.12.0)\n",
|
||||||
|
"Requirement already satisfied: certifi in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (2025.11.12)\n",
|
||||||
|
"Requirement already satisfied: httpcore==1.* in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (1.0.9)\n",
|
||||||
|
"Requirement already satisfied: idna in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from httpx<1,>=0.25.0->jupyterlab->jupyter) (3.11)\n",
|
||||||
|
"Requirement already satisfied: h11>=0.16 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from httpcore==1.*->httpx<1,>=0.25.0->jupyterlab->jupyter) (0.16.0)\n",
|
||||||
|
"Requirement already satisfied: argon2-cffi>=21.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (25.1.0)\n",
|
||||||
|
"Requirement already satisfied: jupyter-events>=0.11.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.12.0)\n",
|
||||||
|
"Requirement already satisfied: jupyter-server-terminals>=0.4.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.5.3)\n",
|
||||||
|
"Requirement already satisfied: nbformat>=5.3.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (5.10.4)\n",
|
||||||
|
"Requirement already satisfied: overrides>=5.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (7.7.0)\n",
|
||||||
|
"Requirement already satisfied: prometheus-client>=0.9 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.23.1)\n",
|
||||||
|
"Requirement already satisfied: pywinpty>=2.0.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (3.0.2)\n",
|
||||||
|
"Requirement already satisfied: send2trash>=1.8.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.8.3)\n",
|
||||||
|
"Requirement already satisfied: terminado>=0.8.3 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.18.1)\n",
|
||||||
|
"Requirement already satisfied: websocket-client>=1.7 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.9.0)\n",
|
||||||
|
"Requirement already satisfied: babel>=2.10 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (2.17.0)\n",
|
||||||
|
"Requirement already satisfied: json5>=0.9.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (0.12.1)\n",
|
||||||
|
"Requirement already satisfied: jsonschema>=4.18.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (4.25.1)\n",
|
||||||
|
"Requirement already satisfied: requests>=2.31 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (2.32.5)\n",
|
||||||
|
"Requirement already satisfied: argon2-cffi-bindings in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (25.1.0)\n",
|
||||||
|
"Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jinja2>=3.0.3->jupyterlab->jupyter) (3.0.3)\n",
|
||||||
|
"Requirement already satisfied: attrs>=22.2.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (25.4.0)\n",
|
||||||
|
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (2025.9.1)\n",
|
||||||
|
"Requirement already satisfied: referencing>=0.28.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (0.37.0)\n",
|
||||||
|
"Requirement already satisfied: rpds-py>=0.7.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (0.30.0)\n",
|
||||||
|
"Requirement already satisfied: python-json-logger>=2.0.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (4.0.0)\n",
|
||||||
|
"Requirement already satisfied: pyyaml>=5.3 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (6.0.2)\n",
|
||||||
|
"Requirement already satisfied: rfc3339-validator in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.1.4)\n",
|
||||||
|
"Requirement already satisfied: rfc3986-validator>=0.1.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.1.1)\n",
|
||||||
|
"Requirement already satisfied: fqdn in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.5.1)\n",
|
||||||
|
"Requirement already satisfied: isoduration in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (20.11.0)\n",
|
||||||
|
"Requirement already satisfied: jsonpointer>1.13 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (3.0.0)\n",
|
||||||
|
"Requirement already satisfied: rfc3987-syntax>=1.1.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.1.0)\n",
|
||||||
|
"Requirement already satisfied: uri-template in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.3.0)\n",
|
||||||
|
"Requirement already satisfied: webcolors>=24.6.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (25.10.0)\n",
|
||||||
|
"Requirement already satisfied: beautifulsoup4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbconvert->jupyter) (4.14.3)\n",
|
||||||
|
"Requirement already satisfied: bleach!=5.0.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from bleach[css]!=5.0.0->nbconvert->jupyter) (6.3.0)\n",
|
||||||
|
"Requirement already satisfied: defusedxml in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbconvert->jupyter) (0.7.1)\n",
|
||||||
|
"Requirement already satisfied: jupyterlab-pygments in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbconvert->jupyter) (0.3.0)\n",
|
||||||
|
"Requirement already satisfied: mistune<4,>=2.0.3 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbconvert->jupyter) (3.1.4)\n",
|
||||||
|
"Requirement already satisfied: nbclient>=0.5.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbconvert->jupyter) (0.10.2)\n",
|
||||||
|
"Requirement already satisfied: pandocfilters>=1.4.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbconvert->jupyter) (1.5.1)\n",
|
||||||
|
"Requirement already satisfied: webencodings in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from bleach!=5.0.0->bleach[css]!=5.0.0->nbconvert->jupyter) (0.5.1)\n",
|
||||||
|
"Requirement already satisfied: tinycss2<1.5,>=1.1.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from bleach[css]!=5.0.0->nbconvert->jupyter) (1.4.0)\n",
|
||||||
|
"Requirement already satisfied: fastjsonschema>=2.15 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from nbformat>=5.3.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2.21.2)\n",
|
||||||
|
"Requirement already satisfied: charset_normalizer<4,>=2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from requests>=2.31->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (3.4.4)\n",
|
||||||
|
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from requests>=2.31->jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter) (2.6.0)\n",
|
||||||
|
"Requirement already satisfied: lark>=1.2.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from rfc3987-syntax>=1.1.0->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.3.1)\n",
|
||||||
|
"Requirement already satisfied: cffi>=1.0.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from argon2-cffi-bindings->argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2.0.0)\n",
|
||||||
|
"Requirement already satisfied: pycparser in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2.23)\n",
|
||||||
|
"Requirement already satisfied: soupsieve>=1.6.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from beautifulsoup4->nbconvert->jupyter) (2.8)\n",
|
||||||
|
"Requirement already satisfied: arrow>=0.15.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (1.4.0)\n",
|
||||||
|
"Requirement already satisfied: tzdata in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from arrow>=0.15.0->isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (2025.2)\n",
|
||||||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||||
|
"Requirement already satisfied: ipywidgets in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (8.1.8)\n",
|
||||||
|
"Requirement already satisfied: comm>=0.1.3 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets) (0.2.3)\n",
|
||||||
|
"Requirement already satisfied: ipython>=6.1.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets) (9.8.0)\n",
|
||||||
|
"Requirement already satisfied: traitlets>=4.3.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets) (5.14.3)\n",
|
||||||
|
"Requirement already satisfied: widgetsnbextension~=4.0.14 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets) (4.0.15)\n",
|
||||||
|
"Requirement already satisfied: jupyterlab_widgets~=3.0.15 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipywidgets) (3.0.16)\n",
|
||||||
|
"Requirement already satisfied: colorama>=0.4.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.4.6)\n",
|
||||||
|
"Requirement already satisfied: decorator>=4.3.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (5.2.1)\n",
|
||||||
|
"Requirement already satisfied: ipython-pygments-lexers>=1.0.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (1.1.1)\n",
|
||||||
|
"Requirement already satisfied: jedi>=0.18.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.19.2)\n",
|
||||||
|
"Requirement already satisfied: matplotlib-inline>=0.1.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.2.1)\n",
|
||||||
|
"Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (3.0.52)\n",
|
||||||
|
"Requirement already satisfied: pygments>=2.11.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (2.19.2)\n",
|
||||||
|
"Requirement already satisfied: stack_data>=0.6.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.6.3)\n",
|
||||||
|
"Requirement already satisfied: typing_extensions>=4.6 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (4.15.0)\n",
|
||||||
|
"Requirement already satisfied: wcwidth in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets) (0.2.14)\n",
|
||||||
|
"Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jedi>=0.18.1->ipython>=6.1.0->ipywidgets) (0.8.5)\n",
|
||||||
|
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (2.2.1)\n",
|
||||||
|
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (3.0.1)\n",
|
||||||
|
"Requirement already satisfied: pure-eval in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (0.2.3)\n",
|
||||||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||||
|
"Requirement already satisfied: ipykernel in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (7.1.0)\n",
|
||||||
|
"Requirement already satisfied: comm>=0.1.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (0.2.3)\n",
|
||||||
|
"Requirement already satisfied: debugpy>=1.6.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (1.8.17)\n",
|
||||||
|
"Requirement already satisfied: ipython>=7.23.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (9.8.0)\n",
|
||||||
|
"Requirement already satisfied: jupyter-client>=8.0.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (8.6.3)\n",
|
||||||
|
"Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (5.9.1)\n",
|
||||||
|
"Requirement already satisfied: matplotlib-inline>=0.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (0.2.1)\n",
|
||||||
|
"Requirement already satisfied: nest-asyncio>=1.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (1.6.0)\n",
|
||||||
|
"Requirement already satisfied: packaging>=22 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (25.0)\n",
|
||||||
|
"Requirement already satisfied: psutil>=5.7 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (7.1.3)\n",
|
||||||
|
"Requirement already satisfied: pyzmq>=25 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (27.1.0)\n",
|
||||||
|
"Requirement already satisfied: tornado>=6.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (6.5.2)\n",
|
||||||
|
"Requirement already satisfied: traitlets>=5.4.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipykernel) (5.14.3)\n",
|
||||||
|
"Requirement already satisfied: colorama>=0.4.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (0.4.6)\n",
|
||||||
|
"Requirement already satisfied: decorator>=4.3.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (5.2.1)\n",
|
||||||
|
"Requirement already satisfied: ipython-pygments-lexers>=1.0.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (1.1.1)\n",
|
||||||
|
"Requirement already satisfied: jedi>=0.18.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (0.19.2)\n",
|
||||||
|
"Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (3.0.52)\n",
|
||||||
|
"Requirement already satisfied: pygments>=2.11.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (2.19.2)\n",
|
||||||
|
"Requirement already satisfied: stack_data>=0.6.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (0.6.3)\n",
|
||||||
|
"Requirement already satisfied: typing_extensions>=4.6 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (4.15.0)\n",
|
||||||
|
"Requirement already satisfied: wcwidth in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=7.23.1->ipykernel) (0.2.14)\n",
|
||||||
|
"Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jedi>=0.18.1->ipython>=7.23.1->ipykernel) (0.8.5)\n",
|
||||||
|
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-client>=8.0.0->ipykernel) (2.9.0.post0)\n",
|
||||||
|
"Requirement already satisfied: platformdirs>=2.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel) (4.5.1)\n",
|
||||||
|
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from python-dateutil>=2.8.2->jupyter-client>=8.0.0->ipykernel) (1.17.0)\n",
|
||||||
|
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel) (2.2.1)\n",
|
||||||
|
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel) (3.0.1)\n",
|
||||||
|
"Requirement already satisfied: pure-eval in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=7.23.1->ipykernel) (0.2.3)\n",
|
||||||
|
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"%pip install --upgrade pip\n",
|
||||||
|
"%pip install --upgrade jupyter\n",
|
||||||
|
"%pip install --upgrade ipywidgets\n",
|
||||||
|
"%pip install --upgrade ipykernel"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "48724594",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Collecting pdf2image\n",
|
||||||
|
" Using cached pdf2image-1.17.0-py3-none-any.whl.metadata (6.2 kB)\n",
|
||||||
|
"Requirement already satisfied: pillow in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (12.0.0)\n",
|
||||||
|
"Using cached pdf2image-1.17.0-py3-none-any.whl (11 kB)\n",
|
||||||
|
"Installing collected packages: pdf2image\n",
|
||||||
|
"Successfully installed pdf2image-1.17.0\n",
|
||||||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||||
|
"Collecting PyMuPDF\n",
|
||||||
|
" Using cached pymupdf-1.26.6-cp310-abi3-win_amd64.whl.metadata (3.4 kB)\n",
|
||||||
|
"Using cached pymupdf-1.26.6-cp310-abi3-win_amd64.whl (18.4 MB)\n",
|
||||||
|
"Installing collected packages: PyMuPDF\n",
|
||||||
|
"Successfully installed PyMuPDF-1.26.6\n",
|
||||||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||||
|
"Requirement already satisfied: pandas in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (2.3.3)\n",
|
||||||
|
"Requirement already satisfied: numpy>=1.23.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas) (2.3.5)\n",
|
||||||
|
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas) (2.9.0.post0)\n",
|
||||||
|
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas) (2025.2)\n",
|
||||||
|
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas) (2025.2)\n",
|
||||||
|
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n",
|
||||||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||||
|
"Requirement already satisfied: matplotlib in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (3.10.7)\n",
|
||||||
|
"Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (1.3.3)\n",
|
||||||
|
"Requirement already satisfied: cycler>=0.10 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (0.12.1)\n",
|
||||||
|
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (4.61.0)\n",
|
||||||
|
"Requirement already satisfied: kiwisolver>=1.3.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (1.4.9)\n",
|
||||||
|
"Requirement already satisfied: numpy>=1.23 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (2.3.5)\n",
|
||||||
|
"Requirement already satisfied: packaging>=20.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (25.0)\n",
|
||||||
|
"Requirement already satisfied: pillow>=8 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (12.0.0)\n",
|
||||||
|
"Requirement already satisfied: pyparsing>=3 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (3.2.5)\n",
|
||||||
|
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib) (2.9.0.post0)\n",
|
||||||
|
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from python-dateutil>=2.7->matplotlib) (1.17.0)\n",
|
||||||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||||
|
"Requirement already satisfied: seaborn in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (0.13.2)\n",
|
||||||
|
"Requirement already satisfied: numpy!=1.24.0,>=1.20 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from seaborn) (2.3.5)\n",
|
||||||
|
"Requirement already satisfied: pandas>=1.2 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from seaborn) (2.3.3)\n",
|
||||||
|
"Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from seaborn) (3.10.7)\n",
|
||||||
|
"Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.3.3)\n",
|
||||||
|
"Requirement already satisfied: cycler>=0.10 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (0.12.1)\n",
|
||||||
|
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (4.61.0)\n",
|
||||||
|
"Requirement already satisfied: kiwisolver>=1.3.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.4.9)\n",
|
||||||
|
"Requirement already satisfied: packaging>=20.0 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (25.0)\n",
|
||||||
|
"Requirement already satisfied: pillow>=8 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (12.0.0)\n",
|
||||||
|
"Requirement already satisfied: pyparsing>=3 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (3.2.5)\n",
|
||||||
|
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (2.9.0.post0)\n",
|
||||||
|
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas>=1.2->seaborn) (2025.2)\n",
|
||||||
|
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas>=1.2->seaborn) (2025.2)\n",
|
||||||
|
"Requirement already satisfied: six>=1.5 in c:\\users\\sergio\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn) (1.17.0)\n",
|
||||||
|
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Install necessary packages\n",
|
||||||
|
"%pip install pdf2image pillow \n",
|
||||||
|
"# pdf reading\n",
|
||||||
|
"%pip install PyMuPDF\n",
|
||||||
|
"\n",
|
||||||
|
"# Data analysis and visualization\n",
|
||||||
|
"%pip install pandas\n",
|
||||||
|
"%pip install matplotlib\n",
|
||||||
|
"%pip install seaborn"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"id": "e1f793b6",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os, json\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"from pdf2image import convert_from_path\n",
|
||||||
|
"from PIL import Image, ImageOps\n",
|
||||||
|
"import fitz # PyMuPDF\n",
|
||||||
|
"import re\n",
|
||||||
|
"from datetime import datetime\n",
|
||||||
|
"from typing import List\n",
|
||||||
|
"import shutil"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"id": "1652a78e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def pdf_to_images(pdf_path: str, output_dir: str, dpi: int = 300):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Render a PDF into a list of PIL Images using PyMuPDF or pdf2image.\n",
|
||||||
|
" 'pages' is 1-based (e.g., range(1, 10) -> pages 1–9).\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" if fitz is not None:\n",
|
||||||
|
" doc = fitz.open(pdf_path)\n",
|
||||||
|
" total_pages = len(doc)\n",
|
||||||
|
"\n",
|
||||||
|
" # Adjust page indices (PyMuPDF uses 0-based indexing)\n",
|
||||||
|
" page_indices = list(range(total_pages))\n",
|
||||||
|
"\n",
|
||||||
|
" for i in page_indices:\n",
|
||||||
|
" page = doc.load_page(i)\n",
|
||||||
|
" mat = fitz.Matrix(dpi / 72.0, dpi / 72.0)\n",
|
||||||
|
" pix = page.get_pixmap(matrix=mat, alpha=False)\n",
|
||||||
|
" img = Image.frombytes(\"RGB\", [pix.width, pix.height], pix.samples)\n",
|
||||||
|
" # Build filename\n",
|
||||||
|
" out_path = os.path.join(\n",
|
||||||
|
" output_dir,\n",
|
||||||
|
" f\"page_{i + 1:04d}.png\"\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" img.save(out_path, \"PNG\")\n",
|
||||||
|
" doc.close()\n",
|
||||||
|
" else:\n",
|
||||||
|
" raise RuntimeError(\"Install PyMuPDF or pdf2image to convert PDFs.\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"id": "f523dd58",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import fitz\n",
|
||||||
|
"import re\n",
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"def _pdf_extract_text_structured(page, margin_threshold=50):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Extract text using PyMuPDF's dict mode which preserves\n",
|
||||||
|
" the actual line structure from the PDF.\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" data = page.get_text(\"dict\")\n",
|
||||||
|
" \n",
|
||||||
|
" # Collect all lines with their Y position\n",
|
||||||
|
" all_lines = []\n",
|
||||||
|
" margin_text_parts = [] # Collect vertical/margin text\n",
|
||||||
|
" margin_y_positions = []\n",
|
||||||
|
" \n",
|
||||||
|
" for block in data.get(\"blocks\", []):\n",
|
||||||
|
" if block.get(\"type\") != 0: # Skip non-text blocks\n",
|
||||||
|
" continue\n",
|
||||||
|
" \n",
|
||||||
|
" block_bbox = block.get(\"bbox\", (0, 0, 0, 0))\n",
|
||||||
|
" block_width = block_bbox[2] - block_bbox[0]\n",
|
||||||
|
" block_height = block_bbox[3] - block_bbox[1]\n",
|
||||||
|
" \n",
|
||||||
|
" # Detect vertical/margin text\n",
|
||||||
|
" is_margin_text = (block_bbox[0] < margin_threshold or \n",
|
||||||
|
" block_height > block_width * 2)\n",
|
||||||
|
" \n",
|
||||||
|
" for line in block.get(\"lines\", []):\n",
|
||||||
|
" direction = line.get(\"dir\", (1, 0))\n",
|
||||||
|
" bbox = line.get(\"bbox\", (0, 0, 0, 0))\n",
|
||||||
|
" y_center = (bbox[1] + bbox[3]) / 2\n",
|
||||||
|
" x_start = bbox[0]\n",
|
||||||
|
" \n",
|
||||||
|
" # Collect text from all spans\n",
|
||||||
|
" line_text = \"\"\n",
|
||||||
|
" for span in line.get(\"spans\", []):\n",
|
||||||
|
" text = span.get(\"text\", \"\")\n",
|
||||||
|
" line_text += text\n",
|
||||||
|
" \n",
|
||||||
|
" line_text = line_text.strip()\n",
|
||||||
|
" line_text = re.sub(r\"[•▪◦●❖▶■\\uf000-\\uf0ff]\", \"\", line_text)\n",
|
||||||
|
" \n",
|
||||||
|
" if not line_text:\n",
|
||||||
|
" continue\n",
|
||||||
|
" \n",
|
||||||
|
" # Check if this is margin/vertical text\n",
|
||||||
|
" if is_margin_text or abs(direction[0]) < 0.9:\n",
|
||||||
|
" margin_text_parts.append((y_center, line_text))\n",
|
||||||
|
" margin_y_positions.append(y_center)\n",
|
||||||
|
" else:\n",
|
||||||
|
" all_lines.append((y_center, x_start, line_text))\n",
|
||||||
|
" \n",
|
||||||
|
" # Reconstruct margin text as single line at its vertical center\n",
|
||||||
|
" if margin_text_parts:\n",
|
||||||
|
" # Sort by Y position (top to bottom) and join\n",
|
||||||
|
" margin_text_parts.sort(key=lambda x: x[0])\n",
|
||||||
|
" full_margin_text = \" \".join(part[1] for part in margin_text_parts)\n",
|
||||||
|
" # Calculate vertical center of the watermark\n",
|
||||||
|
" avg_y = sum(margin_y_positions) / len(margin_y_positions)\n",
|
||||||
|
" # Add as a single line\n",
|
||||||
|
" all_lines.append((avg_y, -1, full_margin_text)) # x=-1 to sort first\n",
|
||||||
|
" \n",
|
||||||
|
" if not all_lines:\n",
|
||||||
|
" return \"\"\n",
|
||||||
|
" \n",
|
||||||
|
" # Sort by Y first, then by X\n",
|
||||||
|
" all_lines.sort(key=lambda x: (x[0], x[1]))\n",
|
||||||
|
" \n",
|
||||||
|
" # Group lines at same vertical position\n",
|
||||||
|
" merged_rows = []\n",
|
||||||
|
" current_row = [all_lines[0]]\n",
|
||||||
|
" current_y = all_lines[0][0]\n",
|
||||||
|
" \n",
|
||||||
|
" for y_center, x_start, text in all_lines[1:]:\n",
|
||||||
|
" if abs(y_center - current_y) <= 2:\n",
|
||||||
|
" current_row.append((y_center, x_start, text))\n",
|
||||||
|
" else:\n",
|
||||||
|
" current_row.sort(key=lambda x: x[1])\n",
|
||||||
|
" row_text = \" \".join(item[2] for item in current_row)\n",
|
||||||
|
" merged_rows.append((current_y, row_text))\n",
|
||||||
|
" current_row = [(y_center, x_start, text)]\n",
|
||||||
|
" current_y = y_center\n",
|
||||||
|
" \n",
|
||||||
|
" if current_row:\n",
|
||||||
|
" current_row.sort(key=lambda x: x[1])\n",
|
||||||
|
" row_text = \" \".join(item[2] for item in current_row)\n",
|
||||||
|
" merged_rows.append((current_y, row_text))\n",
|
||||||
|
" \n",
|
||||||
|
" # Sort rows by Y and extract text\n",
|
||||||
|
" merged_rows.sort(key=lambda x: x[0])\n",
|
||||||
|
" lines = [row[1] for row in merged_rows]\n",
|
||||||
|
" \n",
|
||||||
|
" # Join and clean up\n",
|
||||||
|
" text = \"\\n\".join(lines)\n",
|
||||||
|
" text = re.sub(r\" +\", \" \", text).strip()\n",
|
||||||
|
" text = re.sub(r\"\\n{3,}\", \"\\n\\n\", text).strip()\n",
|
||||||
|
" \n",
|
||||||
|
" return text\n",
|
||||||
|
"\n",
|
||||||
|
"def pdf_extract_text(pdf_path, output_dir, margin_threshold=50):\n",
|
||||||
|
" os.makedirs(output_dir, exist_ok=True)\n",
|
||||||
|
" doc = fitz.open(pdf_path)\n",
|
||||||
|
" \n",
|
||||||
|
" for i, page in enumerate(doc):\n",
|
||||||
|
" text = _pdf_extract_text_structured(page, margin_threshold)\n",
|
||||||
|
" if not text.strip():\n",
|
||||||
|
" continue\n",
|
||||||
|
" out_path = os.path.join(output_dir, f\"page_{i + 1:04d}.txt\")\n",
|
||||||
|
" with open(out_path, \"w\", encoding=\"utf-8\") as f:\n",
|
||||||
|
" f.write(text)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"id": "9f64a8c0",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"PDF_FOLDER = './../instructions' # Folder containing PDF files\n",
|
||||||
|
"OUTPUT_FOLDER = './dataset'\n",
|
||||||
|
"\n",
|
||||||
|
"os.makedirs(OUTPUT_FOLDER, exist_ok=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"id": "41e4651d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"i = 0\n",
|
||||||
|
"\n",
|
||||||
|
"pdf_files = sorted([\n",
|
||||||
|
" fname for fname in os.listdir(PDF_FOLDER)\n",
|
||||||
|
" if fname.lower().endswith(\".pdf\")\n",
|
||||||
|
"])\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"for fname in pdf_files:\n",
|
||||||
|
" # build output directories\n",
|
||||||
|
" out_img_path = os.path.join(OUTPUT_FOLDER, str(i), \"img\")\n",
|
||||||
|
" out_txt_path = os.path.join(OUTPUT_FOLDER, str(i), \"txt\")\n",
|
||||||
|
"\n",
|
||||||
|
" os.makedirs(out_img_path, exist_ok=True)\n",
|
||||||
|
" os.makedirs(out_txt_path, exist_ok=True)\n",
|
||||||
|
"\n",
|
||||||
|
" # source and destination PDF paths\n",
|
||||||
|
" src_pdf = os.path.join(PDF_FOLDER, fname)\n",
|
||||||
|
" pdf_path = os.path.join(OUTPUT_FOLDER, str(i), fname)\n",
|
||||||
|
"\n",
|
||||||
|
" # copy PDF into numbered folder\n",
|
||||||
|
" shutil.copy(src_pdf, pdf_path)\n",
|
||||||
|
"\n",
|
||||||
|
" # convert PDF → images\n",
|
||||||
|
" pdf_to_images(\n",
|
||||||
|
" pdf_path=pdf_path,\n",
|
||||||
|
" output_dir=out_img_path,\n",
|
||||||
|
" dpi=300\n",
|
||||||
|
" )\n",
|
||||||
|
" pdf_extract_text(\n",
|
||||||
|
" pdf_path=pdf_path,\n",
|
||||||
|
" output_dir=out_txt_path,\n",
|
||||||
|
" margin_threshold=40\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" i += 1"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": ".venv (3.11.9)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.9"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
65
src/raytune_paddle_subproc_results_20251207_192320.csv
Normal file
65
src/raytune_paddle_subproc_results_20251207_192320.csv
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
CER,WER,TIME,PAGES,TIME_PER_PAGE,timestamp,checkpoint_dir_name,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore,config/use_doc_orientation_classify,config/use_doc_unwarping,config/textline_orientation,config/text_det_thresh,config/text_det_box_thresh,config/text_det_unclip_ratio,config/text_rec_score_thresh,logdir
|
||||||
|
0.013515850203159258,0.1050034776034098,353.85077571868896,5,70.66230463981628,1765120215,,False,1,d5238c33,2025-12-07_16-10-15,374.27777338027954,374.27777338027954,19452,LAPTOP-2OQK6GT5,127.0.0.1,374.27777338027954,1,True,False,True,0.08878208965533294,0.623029468177504,0.0,0.22994386685874743,d5238c33
|
||||||
|
0.03905195479212187,0.13208645252197226,354.61478638648987,5,70.82208666801452,1765120220,,False,1,ea8a2f7a,2025-12-07_16-10-20,374.2999520301819,374.2999520301819,7472,LAPTOP-2OQK6GT5,127.0.0.1,374.2999520301819,1,False,False,False,0.39320080607112917,0.6712014538998344,0.0,0.16880221913810864,ea8a2f7a
|
||||||
|
0.06606238373546518,0.16619192810354325,359.09717535972595,5,71.72569246292115,1765120601,,False,1,ebb12e5b,2025-12-07_16-16-41,379.5437698364258,379.5437698364258,21480,LAPTOP-2OQK6GT5,127.0.0.1,379.5437698364258,1,True,True,True,0.4328784710891528,0.23572507118228522,0.0,0.18443532434104057,ebb12e5b
|
||||||
|
0.41810946199338,0.5037103242611287,336.6613118648529,5,67.22685413360595,1765120583,,False,1,b3775034,2025-12-07_16-16-23,356.52618169784546,356.52618169784546,23084,LAPTOP-2OQK6GT5,127.0.0.1,356.52618169784546,1,True,True,False,0.06412882230680782,0.3377439247010605,0.0,0.5764053439963283,b3775034
|
||||||
|
0.1972515944870667,0.2953531713611584,350.1465151309967,5,69.93639450073242,1765120959,,False,1,bf10d370,2025-12-07_16-22-39,370.90337228775024,370.90337228775024,26140,LAPTOP-2OQK6GT5,127.0.0.1,370.90337228775024,1,True,True,True,0.6719551054359146,0.6902317374774642,0.0,0.3964896632708511,bf10d370
|
||||||
|
0.3864103728596727,0.45583610828383464,320.96620512008667,5,64.09520988464355,1765120947,,False,1,111e5a9e,2025-12-07_16-22-27,341.0712642669678,341.0712642669678,20664,LAPTOP-2OQK6GT5,127.0.0.1,341.0712642669678,1,True,False,False,0.04481600265034593,0.4832664381621284,0.0,0.5464155154391461,111e5a9e
|
||||||
|
0.5160689446919982,0.5945298276300801,326.65670347213745,5,65.2350733757019,1765121300,,False,1,415d7ba1,2025-12-07_16-28-20,347.29887080192566,347.29887080192566,23848,LAPTOP-2OQK6GT5,127.0.0.1,347.29887080192566,1,True,True,True,0.01699705273201909,0.5233849789194689,0.0,0.20833106578160068,415d7ba1
|
||||||
|
0.5025130639131208,0.5677161936883898,326.9156484603882,5,65.28343558311462,1765121310,,False,1,a58d8109,2025-12-07_16-28-30,346.09022212028503,346.09022212028503,25248,LAPTOP-2OQK6GT5,127.0.0.1,346.09022212028503,1,False,True,True,0.04024319071476844,0.6705892008057031,0.0,0.1885847677314521,a58d8109
|
||||||
|
0.07092029393242118,0.17390976502682037,368.5711796283722,5,73.62503981590271,1765121692,,False,1,33bdf2a9,2025-12-07_16-34-52,388.150607585907,388.150607585907,24024,LAPTOP-2OQK6GT5,127.0.0.1,388.150607585907,1,False,True,False,0.4347371576992484,0.490009080993297,0.0,0.1519055407457635,33bdf2a9
|
||||||
|
0.1168252568583151,0.22212978798067146,364.6228621006012,5,72.82479510307311,1765121699,,False,1,d9df79f3,2025-12-07_16-34-59,384.67676973342896,384.67676973342896,5368,LAPTOP-2OQK6GT5,127.0.0.1,384.67676973342896,1,True,True,False,0.17806350429159667,0.6261942434824851,0.0,0.38547742746319813,d9df79f3
|
||||||
|
0.06459478599489028,0.16493742503085831,366.6067085266113,5,73.22199411392212,1765122086,,False,1,80ea65f2,2025-12-07_16-41-26,387.6792531013489,387.6792531013489,14064,LAPTOP-2OQK6GT5,127.0.0.1,387.6792531013489,1,True,True,False,0.6011116675422127,0.25138233186284487,0.0,0.31312371671514233,80ea65f2
|
||||||
|
0.01340057642794312,0.10741926673961485,359.5969452857971,5,71.80434017181396,1765122084,,False,1,2e978bfa,2025-12-07_16-41-24,380.28105759620667,380.28105759620667,11060,LAPTOP-2OQK6GT5,127.0.0.1,380.28105759620667,1,False,False,True,0.23485911670668447,0.07773192307960775,0.0,0.023694797982285992,2e978bfa
|
||||||
|
0.01340057642794312,0.10741926673961485,347.92934703826904,5,69.49003491401672,1765122459,,False,1,8518cc40,2025-12-07_16-47-39,368.54625153541565,368.54625153541565,21016,LAPTOP-2OQK6GT5,127.0.0.1,368.54625153541565,1,False,False,True,0.2225556801158737,0.00024186765038358704,0.0,0.0028910785387807336,8518cc40
|
||||||
|
0.01340057642794312,0.10741926673961485,347.14498376846313,5,69.324178647995,1765122461,,False,1,2c691aaa,2025-12-07_16-47-41,366.3459825515747,366.3459825515747,21540,LAPTOP-2OQK6GT5,127.0.0.1,366.3459825515747,1,False,False,True,0.22472742766369874,0.030333356491349384,0.0,0.05099688981312009,2c691aaa
|
||||||
|
0.013040374955575204,0.10485434443992256,347.22006940841675,5,69.34554209709168,1765122832,,False,1,31e60691,2025-12-07_16-53-52,368.0382122993469,368.0382122993469,17532,LAPTOP-2OQK6GT5,127.0.0.1,368.0382122993469,1,False,False,True,0.25914070057597594,0.0019604082489898533,0.0,0.0035094431353713818,31e60691
|
||||||
|
0.012582941415352794,0.10327954129031627,349.2319846153259,5,69.74626359939575,1765122837,,False,1,d4d288c6,2025-12-07_16-53-57,368.903502702713,368.903502702713,22216,LAPTOP-2OQK6GT5,127.0.0.1,368.903502702713,1,False,False,True,0.2734075225731028,0.0033989235904911125,0.0,0.015420451500634869,d4d288c6
|
||||||
|
0.012582941415352794,0.10327954129031627,346.6979134082794,5,69.24065437316895,1765123205,,False,1,7645b77c,2025-12-07_17-00-05,367.4564206600189,367.4564206600189,2272,LAPTOP-2OQK6GT5,127.0.0.1,367.4564206600189,1,False,False,True,0.279241869770728,0.1138413707810162,0.0,0.07531508117874008,7645b77c
|
||||||
|
0.012407575745987933,0.10201566081383735,346.5196530818939,5,69.19977960586547,1765123208,,False,1,3256ae36,2025-12-07_17-00-08,366.00227642059326,366.00227642059326,6604,LAPTOP-2OQK6GT5,127.0.0.1,366.00227642059326,1,False,False,True,0.30993017979826853,0.1292131176570399,0.0,0.11201957956206357,3256ae36
|
||||||
|
0.012407575745987933,0.10201566081383735,344.0291979312897,5,68.71350336074829,1765123575,,False,1,b0dda58b,2025-12-07_17-06-15,364.82790350914,364.82790350914,9732,LAPTOP-2OQK6GT5,127.0.0.1,364.82790350914,1,False,False,True,0.3149521989502957,0.11783753596277924,0.0,0.6825729339913746,b0dda58b
|
||||||
|
0.012429753445092291,0.10205118268939237,346.11818265914917,5,69.12530856132507,1765123581,,False,1,e9d40333,2025-12-07_17-06-21,365.62638425827026,365.62638425827026,23416,LAPTOP-2OQK6GT5,127.0.0.1,365.62638425827026,1,False,False,True,0.5302520310849914,0.1569390945373281,0.0,0.10019443545563994,e9d40333
|
||||||
|
0.011990675508758594,0.10047637953978608,346.5398359298706,5,69.2183114528656,1765123948,,False,1,aa89fe7a,2025-12-07_17-12-28,366.7530257701874,366.7530257701874,16200,LAPTOP-2OQK6GT5,127.0.0.1,366.7530257701874,1,False,False,True,0.5039700850900125,0.16208277029791282,0.0,0.6765386284546205,aa89fe7a
|
||||||
|
0.011968497809654236,0.10044085766423105,345.97880601882935,5,69.09321279525757,1765123951,,False,1,92c48d07,2025-12-07_17-12-31,365.0942301750183,365.0942301750183,15432,LAPTOP-2OQK6GT5,127.0.0.1,365.0942301750183,1,False,False,True,0.33321916406589397,0.1864428656555301,0.0,0.6775297319325386,92c48d07
|
||||||
|
0.011968497809654236,0.10044085766423105,344.1725525856018,5,68.74226913452148,1765124318,,False,1,187790d7,2025-12-07_17-18-38,364.47401189804077,364.47401189804077,24676,LAPTOP-2OQK6GT5,127.0.0.1,364.47401189804077,1,False,False,True,0.3372505528404193,0.2352515935896671,0.0,0.6987321324340134,187790d7
|
||||||
|
0.011760127958326316,0.09964993325879434,345.9427492618561,5,69.08389501571655,1765124322,,False,1,442a2439,2025-12-07_17-18-42,364.755074262619,364.755074262619,7892,LAPTOP-2OQK6GT5,127.0.0.1,364.755074262619,1,False,False,True,0.5098036701758629,0.2122757290966333,0.0,0.6992468303721803,442a2439
|
||||||
|
0.011968497809654236,0.10044085766423105,345.40264558792114,5,68.98561010360717,1765124689,,False,1,70862adc,2025-12-07_17-24-49,365.9752175807953,365.9752175807953,15412,LAPTOP-2OQK6GT5,127.0.0.1,365.9752175807953,1,False,False,True,0.3963969237347287,0.2163058925653838,0.0,0.6859176720785957,70862adc
|
||||||
|
0.012407575745987933,0.10201566081383735,345.8808228969574,5,69.07736506462098,1765124693,,False,1,e6821f34,2025-12-07_17-24-53,365.25493717193604,365.25493717193604,26088,LAPTOP-2OQK6GT5,127.0.0.1,365.25493717193604,1,False,False,True,0.3668982772069688,0.2407751620351906,0.0,0.5737620270733486,e6821f34
|
||||||
|
0.012199205894660016,0.10122473640840064,347.05629682540894,5,69.31870231628417,1765125062,,False,1,8b680875,2025-12-07_17-31-02,367.2029130458832,367.2029130458832,1720,LAPTOP-2OQK6GT5,127.0.0.1,367.2029130458832,1,False,False,True,0.5312495877753942,0.3193426688929859,0.0,0.591252589724218,8b680875
|
||||||
|
0.012429753445092291,0.10205118268939237,349.60691928863525,5,69.8253363609314,1765125068,,False,1,fc54867b,2025-12-07_17-31-08,368.73608803749084,368.73608803749084,4888,LAPTOP-2OQK6GT5,127.0.0.1,368.73608803749084,1,False,False,True,0.5034080657304706,0.3042864908472832,0.0,0.5024906014323391,fc54867b
|
||||||
|
0.013385453418768206,0.10927323740570172,343.8553657531738,5,68.67559289932251,1765125432,,False,1,c32d0d5e,2025-12-07_17-37-12,364.42339730262756,364.42339730262756,25808,LAPTOP-2OQK6GT5,127.0.0.1,364.42339730262756,1,False,False,True,0.15300672154002157,0.39848899797721926,0.0,0.5167681121564286,c32d0d5e
|
||||||
|
0.013537204772521452,0.10852488053708713,344.60119009017944,5,68.81447420120239,1765125436,,False,1,4762fbbb,2025-12-07_17-37-16,363.3258783817291,363.3258783817291,20760,LAPTOP-2OQK6GT5,127.0.0.1,363.3258783817291,1,False,False,True,0.13342603167575784,0.4010104919178914,0.0,0.618812411626611,4762fbbb
|
||||||
|
0.011763789518968464,0.09968897796498292,344.03784108161926,5,68.71829047203065,1765125803,,False,1,522ac97c,2025-12-07_17-43-23,364.7200028896332,364.7200028896332,2372,LAPTOP-2OQK6GT5,127.0.0.1,364.7200028896332,1,False,False,True,0.4489762005319642,0.402754966715804,0.0,0.6426372526242771,522ac97c
|
||||||
|
0.011650346524073398,0.09890157639017978,343.51321721076965,5,68.60030875205993,1765125805,,False,1,5784f433,2025-12-07_17-43-25,362.93026328086853,362.93026328086853,22900,LAPTOP-2OQK6GT5,127.0.0.1,362.93026328086853,1,False,False,True,0.46204975067512033,0.192768833446102,0.0,0.6328281433384326,5784f433
|
||||||
|
0.011650346524073398,0.09890157639017978,343.80972242355347,5,68.66908102035522,1765126172,,False,1,83af0528,2025-12-07_17-49-32,364.5850279331207,364.5850279331207,9832,LAPTOP-2OQK6GT5,127.0.0.1,364.5850279331207,1,False,False,True,0.4663139585990712,0.1845869678485352,0.0,0.6299207399141384,83af0528
|
||||||
|
0.011650346524073398,0.09890157639017978,344.11421155929565,5,68.72400512695313,1765126177,,False,1,12cbaa22,2025-12-07_17-49-37,364.24684858322144,364.24684858322144,5968,LAPTOP-2OQK6GT5,127.0.0.1,364.24684858322144,1,False,False,True,0.47277853181431145,0.40562176755388546,0.0,0.6314990057451438,12cbaa22
|
||||||
|
0.011763789518968464,0.09968897796498292,348.5801889896393,5,69.61860737800598,1765126547,,False,1,a3a87765,2025-12-07_17-55-47,369.27432322502136,369.27432322502136,24372,LAPTOP-2OQK6GT5,127.0.0.1,369.27432322502136,1,False,False,True,0.45010042945259804,0.2855696990924951,0.0,0.6351522397620386,a3a87765
|
||||||
|
0.0441989903761154,0.13204740781578367,347.0340585708618,5,69.31097078323364,1765126548,,False,1,cf2bad0c,2025-12-07_17-55-48,366.1882207393646,366.1882207393646,3272,LAPTOP-2OQK6GT5,127.0.0.1,366.1882207393646,1,False,False,False,0.5890116605741096,0.283660909026841,0.0,0.4602911956047037,cf2bad0c
|
||||||
|
0.0441989903761154,0.13204740781578367,343.53946828842163,5,68.61563892364502,1765126916,,False,1,9a9b91e7,2025-12-07_18-01-56,364.0171241760254,364.0171241760254,2272,LAPTOP-2OQK6GT5,127.0.0.1,364.0171241760254,1,False,False,False,0.6089594786916612,0.3646091181984181,0.0,0.46522499154449626,9a9b91e7
|
||||||
|
0.012199205894660016,0.10122473640840064,345.76200914382935,5,69.05782113075256,1765126922,,False,1,e326d901,2025-12-07_18-02-02,365.42848086357117,365.42848086357117,24932,LAPTOP-2OQK6GT5,127.0.0.1,365.42848086357117,1,False,False,True,0.5932289185132622,0.37353729921136775,0.0,0.46368845919414936,e326d901
|
||||||
|
0.011990281344944778,0.09910429396546264,344.40758872032166,5,68.7896653175354,1765127287,,False,1,ccb3f19a,2025-12-07_18-08-07,365.1469933986664,365.1469933986664,1104,LAPTOP-2OQK6GT5,127.0.0.1,365.1469933986664,1,True,False,True,0.6866411603181266,0.4537774266698106,0.0,0.3059281770286948,ccb3f19a
|
||||||
|
0.012186205997500013,0.1012282592390342,343.9386422634125,5,68.69270787239074,1765127290,,False,1,8c12c55f,2025-12-07_18-08-10,363.29733777046204,363.29733777046204,19700,LAPTOP-2OQK6GT5,127.0.0.1,363.29733777046204,1,True,False,True,0.6710404650258701,0.44441637238072235,0.0,0.2641320116724262,8c12c55f
|
||||||
|
0.0662709141213666,0.16851508812176408,359.4665718078613,5,71.7971097946167,1765127672,,False,1,5a62d5b6,2025-12-07_18-14-32,380.3328058719635,380.3328058719635,26528,LAPTOP-2OQK6GT5,127.0.0.1,380.3328058719635,1,True,True,True,0.40414134317929745,0.2010474655405967,0.0,0.59925716647257,5a62d5b6
|
||||||
|
0.07070075496425433,0.17390976502682037,356.3221182823181,5,71.16437225341797,1765127673,,False,1,bb4495b7,2025-12-07_18-14-33,375.9771683216095,375.9771683216095,21772,LAPTOP-2OQK6GT5,127.0.0.1,375.9771683216095,1,False,True,False,0.39073713326110354,0.5764393142467112,0.0,0.5413963334094041,bb4495b7
|
||||||
|
0.01153507274885726,0.09890157639017978,344.71807885169983,5,68.8583309173584,1765128044,,False,1,9d90711d,2025-12-07_18-20-44,365.7700536251068,365.7700536251068,17592,LAPTOP-2OQK6GT5,127.0.0.1,365.7700536251068,1,False,False,True,0.46895437796002276,0.5411583003121286,0.0,0.6350154738477746,9d90711d
|
||||||
|
0.01153507274885726,0.09890157639017978,343.69704604148865,5,68.64236354827881,1765128046,,False,1,daaec3f8,2025-12-07_18-20-46,363.0186264514923,363.0186264514923,21292,LAPTOP-2OQK6GT5,127.0.0.1,363.0186264514923,1,False,False,True,0.4743507729816579,0.5213407674549528,0.0,0.6445669851749475,daaec3f8
|
||||||
|
0.01153507274885726,0.09890157639017978,343.6039113998413,5,68.62933912277222,1765128413,,False,1,51fb5915,2025-12-07_18-26-53,364.0196588039398,364.0196588039398,21772,LAPTOP-2OQK6GT5,127.0.0.1,364.0196588039398,1,False,False,True,0.48541186574386475,0.5810500215434935,0.0,0.6463595394763801,51fb5915
|
||||||
|
0.01164485418311018,0.09964993325879434,344.2613036632538,5,68.75940155982971,1765128417,,False,1,18966a33,2025-12-07_18-26-57,363.3374502658844,363.3374502658844,16900,LAPTOP-2OQK6GT5,127.0.0.1,363.3374502658844,1,False,False,True,0.5501591363807381,0.5132901504443755,0.0,0.6489815927562321,18966a33
|
||||||
|
0.012314479669876154,0.10205118268939237,345.49542331695557,5,69.01211080551147,1765128785,,False,1,b67080f9,2025-12-07_18-33-05,366.01860308647156,366.01860308647156,20948,LAPTOP-2OQK6GT5,127.0.0.1,366.01860308647156,1,False,False,True,0.5534122098827526,0.5760738874546728,0.0,0.5609719434431071,b67080f9
|
||||||
|
0.07209115365923097,0.17918874278969218,351.96662616729736,5,70.29538555145264,1765128795,,False,1,2533f368,2025-12-07_18-33-15,371.205295085907,371.205295085907,11208,LAPTOP-2OQK6GT5,127.0.0.1,371.205295085907,1,False,True,True,0.5572268058153711,0.5246075332847907,0.0,0.558307419246103,2533f368
|
||||||
|
0.06479949428557605,0.16493742503085831,357.1695992946625,5,71.33717932701111,1765129169,,False,1,451d018d,2025-12-07_18-39-29,378.8273491859436,378.8273491859436,3616,LAPTOP-2OQK6GT5,127.0.0.1,378.8273491859436,1,False,True,False,0.6340187369543626,0.5494644274379972,0.0,0.6521052525663952,451d018d
|
||||||
|
0.04429208645222718,0.13283833222122038,349.41683983802795,5,69.77591800689697,1765129169,,False,1,2256e752,2025-12-07_18-39-29,369.8801362514496,369.8801362514496,25468,LAPTOP-2OQK6GT5,127.0.0.1,369.8801362514496,1,True,False,False,0.6478037819045206,0.6228629446714814,0.0,0.6546094515631737,2256e752
|
||||||
|
0.012292301970771797,0.10201566081383735,346.071848154068,5,69.12432713508606,1765129542,,False,1,0a892729,2025-12-07_18-45-42,367.237042427063,367.237042427063,26212,LAPTOP-2OQK6GT5,127.0.0.1,367.237042427063,1,False,False,True,0.42173310551322135,0.542928875009614,0.0,0.601586841052583,0a892729
|
||||||
|
0.012292301970771797,0.10201566081383735,346.42522287368774,5,69.19188222885131,1765129545,,False,1,495075f5,2025-12-07_18-45-45,365.53574872016907,365.53574872016907,23604,LAPTOP-2OQK6GT5,127.0.0.1,365.53574872016907,1,False,False,True,0.4186754897467695,0.6318747444402091,0.0,0.5956181518703515,495075f5
|
||||||
|
0.011974150685190959,0.10047637953978608,346.9409854412079,5,69.29810705184937,1765129915,,False,1,54c45552,2025-12-07_18-51-55,367.9469211101532,367.9469211101532,25352,LAPTOP-2OQK6GT5,127.0.0.1,367.9469211101532,1,False,False,True,0.46382270850905233,0.6196868829200468,0.0,0.6126115785559785,54c45552
|
||||||
|
0.011974150685190959,0.10047637953978608,346.4141414165497,5,69.18586716651916,1765129917,,False,1,6b2e9b93,2025-12-07_18-51-57,365.9887709617615,365.9887709617615,25400,LAPTOP-2OQK6GT5,127.0.0.1,365.9887709617615,1,False,False,True,0.4751854264500806,0.48925010555288895,0.0,0.515482483148412,6b2e9b93
|
||||||
|
0.01153507274885726,0.09890157639017978,346.25940680503845,5,69.15517511367798,1765130288,,False,1,e9a6b81f,2025-12-07_18-58-08,367.33222007751465,367.33222007751465,4036,LAPTOP-2OQK6GT5,127.0.0.1,367.33222007751465,1,False,False,True,0.4879296810791008,0.4925520261481197,0.0,0.6483489622744677,e9a6b81f
|
||||||
|
0.01153507274885726,0.09890157639017978,345.8425042629242,5,69.06782102584839,1765130290,,False,1,076c5450,2025-12-07_18-58-10,365.1877450942993,365.1877450942993,4832,LAPTOP-2OQK6GT5,127.0.0.1,365.1877450942993,1,False,False,True,0.48842171509426413,0.5881329256041945,0.0,0.6569193185887352,076c5450
|
||||||
|
0.011875401733542455,0.10047637953978608,350.2443346977234,5,69.94839100837707,1765130664,,False,1,4a42a3ea,2025-12-07_19-04-24,370.9968421459198,370.9968421459198,14912,LAPTOP-2OQK6GT5,127.0.0.1,370.9968421459198,1,False,False,True,0.5590357657789103,0.5940413385819063,0.0,0.6573225721220606,4a42a3ea
|
||||||
|
0.012080110024228227,0.10047637953978608,351.5000901222229,5,70.19009194374084,1765130669,,False,1,041795f1,2025-12-07_19-04-29,370.946097612381,370.946097612381,22372,LAPTOP-2OQK6GT5,127.0.0.1,370.946097612381,1,False,False,True,0.5650092236486315,0.6617440972899422,0.0,0.6629504776006702,041795f1
|
||||||
|
0.012314479669876154,0.10205118268939237,343.53907656669617,5,68.6134319782257,1765131035,,False,1,8abb3f37,2025-12-07_19-10-35,364.67463064193726,364.67463064193726,22012,LAPTOP-2OQK6GT5,127.0.0.1,364.67463064193726,1,False,False,True,0.48982107744168,0.4636820835063238,0.0,0.39458266779240964,8abb3f37
|
||||||
|
0.012314479669876154,0.10205118268939237,345.5919795036316,5,69.02381987571717,1765131040,,False,1,f2cb682e,2025-12-07_19-10-40,364.90754437446594,364.90754437446594,5752,LAPTOP-2OQK6GT5,127.0.0.1,364.90754437446594,1,True,False,True,0.4917954659583112,0.45224829356708557,0.0,0.42597097228928366,f2cb682e
|
||||||
|
0.012314479669876154,0.10205118268939237,349.50936698913574,5,69.80772981643676,1765131411,,False,1,463fe5e7,2025-12-07_19-16-51,370.56375885009766,370.56375885009766,16524,LAPTOP-2OQK6GT5,127.0.0.1,370.56375885009766,1,True,False,True,0.5373435635563055,0.5202382560972127,0.0,0.5340573143597149,463fe5e7
|
||||||
|
0.012083932119443879,0.10122473640840064,350.1439118385315,5,69.92809920310974,1765131415,,False,1,88bbe87d,2025-12-07_19-16-55,369.54999685287476,369.54999685287476,15084,LAPTOP-2OQK6GT5,127.0.0.1,369.54999685287476,1,False,False,True,0.5274586910866753,0.5110782288617315,0.0,0.5368958272648865,88bbe87d
|
||||||
|
0.011875401733542455,0.10047637953978608,355.52406072616577,5,71.00808920860291,1765131794,,False,1,33ea1cc6,2025-12-07_19-23-14,376.746440410614,376.746440410614,17380,LAPTOP-2OQK6GT5,127.0.0.1,376.746440410614,1,False,False,True,0.5229924883346121,0.5158065672775711,0.0,0.6679657240993034,33ea1cc6
|
||||||
|
0.011853224034438097,0.10044085766423105,355.67893862724304,5,71.0243070602417,1765131797,,False,1,1243723e,2025-12-07_19-23-17,375.44413685798645,375.44413685798645,11232,LAPTOP-2OQK6GT5,127.0.0.1,375.44413685798645,1,False,False,True,0.3726772055073363,0.5573152713604742,0.0,0.6766134238094554,1243723e
|
||||||
|
Reference in New Issue
Block a user