diff --git a/libs/extractor-api-lib/poetry.lock b/libs/extractor-api-lib/poetry.lock index f6a42940..bfdf5294 100644 --- a/libs/extractor-api-lib/poetry.lock +++ b/libs/extractor-api-lib/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. [[package]] name = "accelerate" @@ -7601,14 +7601,14 @@ files = [ [[package]] name = "unstructured" -version = "0.18.15" +version = "0.18.18" description = "A library that prepares raw documents for downstream ML tasks." optional = false python-versions = ">=3.10.0" groups = ["main"] files = [ - {file = "unstructured-0.18.15-py3-none-any.whl", hash = "sha256:f05b1defcbe8190319d30da8adddbb888f74bf8ec7f65886867d7dca41d67ad0"}, - {file = "unstructured-0.18.15.tar.gz", hash = "sha256:81d8481280a4ac5cefe74bdb6db3687e8f240d5643706f86728eac39549112b5"}, + {file = "unstructured-0.18.18-py3-none-any.whl", hash = "sha256:d5189bdd5e2a1c5ed3cc289cfb4fb483c6f2dd544b42744bdc5b81d3388ea527"}, + {file = "unstructured-0.18.18.tar.gz", hash = "sha256:cfe6c84a36d374e5767930e13cfc10622357b3b68a5b7c735fdb1eeca08c6b57"}, ] [package.dependencies] @@ -7637,19 +7637,19 @@ unstructured-client = "*" wrapt = "*" [package.extras] -all-docs = ["effdet", "google-cloud-vision", "markdown", "msoffcrypto-tool", "networkx", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (>=1.0.5)", "unstructured.pytesseract (>=0.3.12)", "xlrd"] +all-docs = ["effdet", "google-cloud-vision", "markdown", "msoffcrypto-tool", "networkx", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi_heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (>=1.0.5)", "unstructured.pytesseract (>=0.3.12)", "xlrd"] csv = ["pandas"] doc = ["python-docx (>=1.1.2)"] docx = ["python-docx (>=1.1.2)"] epub = ["pypandoc"] huggingface = ["langdetect", "sacremoses", "sentencepiece", "torch", "transformers"] -image = ["effdet", "google-cloud-vision", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypdf", "unstructured-inference (>=1.0.5)", "unstructured.pytesseract (>=0.3.12)"] -local-inference = ["effdet", "google-cloud-vision", "markdown", "msoffcrypto-tool", "networkx", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (>=1.0.5)", "unstructured.pytesseract (>=0.3.12)", "xlrd"] +image = ["effdet", "google-cloud-vision", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "pdf2image", "pdfminer.six", "pi_heif", "pikepdf", "pypdf", "unstructured-inference (>=1.0.5)", "unstructured.pytesseract (>=0.3.12)"] +local-inference = ["effdet", "google-cloud-vision", "markdown", "msoffcrypto-tool", "networkx", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi_heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (>=1.0.5)", "unstructured.pytesseract (>=0.3.12)", "xlrd"] md = ["markdown"] odt = ["pypandoc", "python-docx (>=1.1.2)"] org = ["pypandoc"] paddleocr = ["paddlepaddle (>=3.0.0b1)", "unstructured.paddleocr (==2.10.0)"] -pdf = ["effdet", "google-cloud-vision", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypdf", "unstructured-inference (>=1.0.5)", "unstructured.pytesseract (>=0.3.12)"] +pdf = ["effdet", "google-cloud-vision", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "pdf2image", "pdfminer.six", "pi_heif", "pikepdf", "pypdf", "unstructured-inference (>=1.0.5)", "unstructured.pytesseract (>=0.3.12)"] ppt = ["python-pptx (>=1.0.1)"] pptx = ["python-pptx (>=1.0.1)"] rst = ["pypandoc"] @@ -8340,4 +8340,4 @@ cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and pyt [metadata] lock-version = "2.1" python-versions = ">=3.13,<3.14" -content-hash = "0f99411e253e91ab5ecdb2761edfebe729c5cf699c4c8a229b313bf7de14b4a1" +content-hash = "579161ba9795e58cd1452c9386f33aacde055601ae3c820d036d15ae873a78cf" diff --git a/libs/extractor-api-lib/pyproject.toml b/libs/extractor-api-lib/pyproject.toml index 904a73d9..c44cc362 100644 --- a/libs/extractor-api-lib/pyproject.toml +++ b/libs/extractor-api-lib/pyproject.toml @@ -109,7 +109,7 @@ partial = "^1.0" pyyaml = "^6.0.2" numpy = "^2.2.5" docx2txt = "^0.9" -unstructured = {extras = ["docx", "pptx"], version = "0.18.15"} +unstructured = {extras = ["docx", "pptx"], version = "0.18.18"} html5lib = "^1.1" langchain-community = "^0.4.1" atlassian-python-api = "^4.0.3"