Making file parser flexible to deprecate pdf parser (#3073)

Co-authored-by: Suchintan <suchintan@users.noreply.github.com>
This commit is contained in:
PHSB
2025-08-06 11:15:04 -06:00
committed by GitHub
parent 31aa7d6973
commit 468f5c6051
15 changed files with 555 additions and 49 deletions

51
poetry.lock generated
View File

@@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand.
# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
[[package]]
name = "about-time"
@@ -1587,6 +1587,18 @@ files = [
[package.extras]
mypy = ["mypy"]
[[package]]
name = "et-xmlfile"
version = "2.0.0"
description = "An implementation of lxml.xmlfile for the standard library"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa"},
{file = "et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54"},
]
[[package]]
name = "exceptiongroup"
version = "1.3.0"
@@ -2374,7 +2386,7 @@ description = "Lightweight in-process concurrent programming"
optional = false
python-versions = ">=3.9"
groups = ["main"]
markers = "python_version == \"3.12\" or python_version == \"3.13\""
markers = "python_version >= \"3.12\""
files = [
{file = "greenlet-3.2.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:1afd685acd5597349ee6d7a88a8bec83ce13c106ac78c196ee9dde7c04fe87be"},
{file = "greenlet-3.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:761917cac215c61e9dc7324b2606107b3b292a8349bdebb31503ab4de3f559ac"},
@@ -3183,7 +3195,7 @@ description = "Low-level, pure Python DBus protocol wrapper."
optional = false
python-versions = ">=3.7"
groups = ["dev"]
markers = "platform_machine != \"ppc64le\" and platform_machine != \"s390x\" and sys_platform == \"linux\""
markers = "sys_platform == \"linux\" and platform_machine != \"ppc64le\" and platform_machine != \"s390x\""
files = [
{file = "jeepney-0.9.0-py3-none-any.whl", hash = "sha256:97e5714520c16fc0a45695e5365a2e11b81ea79bba796e26f9f1d178cb182683"},
{file = "jeepney-0.9.0.tar.gz", hash = "sha256:cf0e9e845622b81e4a28df94c40345400256ec608d0e55bb8a3feaa9163f5732"},
@@ -4829,7 +4841,7 @@ description = "ONNX Runtime is a runtime accelerator for Machine Learning models
optional = false
python-versions = ">=3.10"
groups = ["main"]
markers = "python_version == \"3.12\" or python_version == \"3.13\""
markers = "python_version >= \"3.12\""
files = [
{file = "onnxruntime-1.22.0-cp310-cp310-macosx_13_0_universal2.whl", hash = "sha256:85d8826cc8054e4d6bf07f779dc742a363c39094015bdad6a08b3c18cfe0ba8c"},
{file = "onnxruntime-1.22.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:468c9502a12f6f49ec335c2febd22fdceecc1e4cc96dfc27e419ba237dff5aff"},
@@ -4937,6 +4949,21 @@ jsonschema-path = ">=0.3.1,<0.4.0"
lazy-object-proxy = ">=1.7.1,<2.0.0"
openapi-schema-validator = ">=0.6.0,<0.7.0"
[[package]]
name = "openpyxl"
version = "3.1.5"
description = "A Python library to read/write Excel 2010 xlsx/xlsm files"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"},
{file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"},
]
[package.dependencies]
et-xmlfile = "*"
[[package]]
name = "opentelemetry-api"
version = "1.34.1"
@@ -5930,7 +5957,7 @@ description = "A high-level API to automate web browsers"
optional = false
python-versions = ">=3.9"
groups = ["main"]
markers = "python_version == \"3.12\" or python_version == \"3.13\""
markers = "python_version >= \"3.12\""
files = [
{file = "playwright-1.53.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:48a1a15ce810f0ffe512b6050de9871ea193b41dd3cc1bbed87b8431012419ba"},
{file = "playwright-1.53.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a701f9498a5b87e3f929ec01cea3109fbde75821b19c7ba4bba54f6127b94f76"},
@@ -6227,7 +6254,7 @@ description = "PostgreSQL database adapter for Python"
optional = false
python-versions = ">=3.7"
groups = ["main"]
markers = "python_version == \"3.12\" or python_version == \"3.11\""
markers = "python_version < \"3.13\""
files = [
{file = "psycopg-3.1.18-py3-none-any.whl", hash = "sha256:4d5a0a5a8590906daa58ebd5f3cfc34091377354a1acced269dd10faf55da60e"},
{file = "psycopg-3.1.18.tar.gz", hash = "sha256:31144d3fb4c17d78094d9e579826f047d4af1da6a10427d91dfcfb6ecdf6f12b"},
@@ -6280,7 +6307,7 @@ description = "PostgreSQL database adapter for Python -- C optimisation distribu
optional = false
python-versions = ">=3.7"
groups = ["main"]
markers = "(python_version == \"3.12\" or python_version == \"3.11\") and implementation_name != \"pypy\""
markers = "python_version < \"3.13\" and implementation_name != \"pypy\""
files = [
{file = "psycopg_binary-3.1.18-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5c323103dfa663b88204cf5f028e83c77d7a715f9b6f51d2bbc8184b99ddd90a"},
{file = "psycopg_binary-3.1.18-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:887f8d856c91510148be942c7acd702ccf761a05f59f8abc123c22ab77b5a16c"},
@@ -6731,7 +6758,7 @@ description = "A rough port of Node.js's EventEmitter to Python with a few trick
optional = false
python-versions = ">=3.8"
groups = ["main"]
markers = "python_version == \"3.12\" or python_version == \"3.13\""
markers = "python_version >= \"3.12\""
files = [
{file = "pyee-13.0.0-py3-none-any.whl", hash = "sha256:48195a3cddb3b1515ce0695ed76036b5ccc2ef3a9f963ff9f77aec0139845498"},
{file = "pyee-13.0.0.tar.gz", hash = "sha256:b391e3c5a434d1f5118a25615001dbc8f669cf410ab67d04c4d4e07c55481c37"},
@@ -7044,7 +7071,7 @@ description = "A (partial) reimplementation of pywin32 using ctypes/cffi"
optional = false
python-versions = ">=3.6"
groups = ["dev"]
markers = "platform_machine != \"ppc64le\" and platform_machine != \"s390x\" and sys_platform == \"win32\""
markers = "sys_platform == \"win32\" and platform_machine != \"ppc64le\" and platform_machine != \"s390x\""
files = [
{file = "pywin32-ctypes-0.2.3.tar.gz", hash = "sha256:d162dc04946d704503b2edc4d55f3dba5c1d539ead017afa00142c38b9885755"},
{file = "pywin32_ctypes-0.2.3-py3-none-any.whl", hash = "sha256:8a1513379d709975552d202d942d9837758905c8d01eb82b8bcc30918929e7b8"},
@@ -7784,7 +7811,7 @@ description = "Python bindings to FreeDesktop.org Secret Service API"
optional = false
python-versions = ">=3.6"
groups = ["dev"]
markers = "platform_machine != \"ppc64le\" and platform_machine != \"s390x\" and sys_platform == \"linux\""
markers = "sys_platform == \"linux\" and platform_machine != \"ppc64le\" and platform_machine != \"s390x\""
files = [
{file = "SecretStorage-3.3.3-py3-none-any.whl", hash = "sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99"},
{file = "SecretStorage-3.3.3.tar.gz", hash = "sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77"},
@@ -9128,7 +9155,7 @@ description = "Fast implementation of asyncio event loop on top of libuv"
optional = false
python-versions = ">=3.8.0"
groups = ["main"]
markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\""
markers = "platform_python_implementation != \"PyPy\" and sys_platform != \"win32\" and sys_platform != \"cygwin\""
files = [
{file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ec7e6b09a6fdded42403182ab6b832b71f4edaf7f37a9a0e371a01db5f0cb45f"},
{file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:196274f2adb9689a289ad7d65700d37df0c0930fd8e4e743fa4834e850d7719d"},
@@ -9763,4 +9790,4 @@ type = ["pytest-mypy"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.11,<3.14"
content-hash = "667e626dd8d08bae4f9b852616a9c2c5df9bdd4a3a37f7ef87030d7bfdf51f3b"
content-hash = "441c7080f7fbccb87de476e56dd86d1e56e4c0b8eaa8378d13c90d94a0a42123"