diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bdace7e..b1375aad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.0.7 + +* Fix a hardcoded file extension causing confusion in the logs + ## 1.0.6 * Add slicing through indexing for vectorized elements diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py index 029198d7..1afb74e9 100644 --- a/unstructured_inference/__version__.py +++ b/unstructured_inference/__version__.py @@ -1 +1 @@ -__version__ = "1.0.6" # pragma: no cover +__version__ = "1.0.7" # pragma: no cover diff --git a/unstructured_inference/inference/layout.py b/unstructured_inference/inference/layout.py index 57e17a08..7681db89 100644 --- a/unstructured_inference/inference/layout.py +++ b/unstructured_inference/inference/layout.py @@ -337,12 +337,15 @@ def process_data_with_model( password: Optional[str] = None, **kwargs: Any, ) -> DocumentLayout: - """Process PDF as file-like object `data` into a `DocumentLayout`. + """Process PDF or image as file-like object `data` into a `DocumentLayout`. Uses the model identified by `model_name`. """ + # Note: We use a temp dir, not a temp file, + # because the latter fails on Windows + # https://github.com/Unstructured-IO/unstructured-inference/pull/376 with tempfile.TemporaryDirectory() as tmp_dir_path: - file_path = os.path.join(tmp_dir_path, "document.pdf") + file_path = os.path.join(tmp_dir_path, "document") with open(file_path, "wb") as f: f.write(data.read()) f.flush() @@ -365,8 +368,8 @@ def process_file_with_model( password: Optional[str] = None, **kwargs: Any, ) -> DocumentLayout: - """Processes pdf file with name filename into a DocumentLayout by using a model identified by - model_name.""" + """Processes pdf or image file with name filename into a DocumentLayout by using + a model identified by model_name.""" model = get_model(model_name, **kwargs) if isinstance(model, UnstructuredObjectDetectionModel):