diff --git a/data_pipeline_api/__init__.py b/data_pipeline_api/__init__.py index 46217a6e..d85fe5a1 100644 --- a/data_pipeline_api/__init__.py +++ b/data_pipeline_api/__init__.py @@ -3,6 +3,8 @@ "link_read", "link_write", "finalise", + "search", + "search_data_products", "raise_issue_by_data_product", "raise_issue_by_index", "raise_issue_with_config", @@ -15,7 +17,7 @@ from .fdp_utils import get_handle_index_from_path from .link import link_read, link_write -from .pipeline import finalise, initialise +from .pipeline import finalise, initialise, search, search_data_products from .raise_issue import ( raise_issue_by_data_product, raise_issue_by_existing_data_product, diff --git a/data_pipeline_api/pipeline.py b/data_pipeline_api/pipeline.py index 4a10b35c..a6d79bb0 100644 --- a/data_pipeline_api/pipeline.py +++ b/data_pipeline_api/pipeline.py @@ -1,12 +1,85 @@ import datetime +import json import logging import os +from typing import List +import requests import yaml from data_pipeline_api import fdp_utils +def search_data_products( + handle: dict, wildcard: str, token: str = None +) -> str: + """ + search_data_products + wrapper function to search to search using a data_product name using a wildcard + + Parameters + ---------- + handle : dict + + wildcard : str + search parameter + token : str, optional + optional token, by default None + + Returns + ------- + List + list of data products that meet the search criteria + """ + return search(handle, wildcard, "data_product", "name", token=token) + + +def search( + handle: dict, + wildcard: str, + endpoint: str, + key: str, + token: str = None, +) -> str: + """ + Reads 'read' information from handle, wildcard, endpoint and key and returns a list of items found at the endpoint where the key contains the wildcard + + Args: + | handle: pipeline handle + | token: registry token + | wildcard: string to search + | key: key of the results dictionary where the wildcard is searched + | endpoint: api endpoint where the search is performed + + Returns: + | json: a list of dictionaries where the wildcard was found + """ + + try: + api_version = handle["yaml"]["run_metadata"]["api_version"] + registry_url = handle["yaml"]["run_metadata"][ + "local_data_registry_url" + ] + except KeyError: + return json.dumps(None, indent=4, sort_keys=True) + + headers = fdp_utils.get_headers(token=token, api_version=api_version) + if not registry_url: + return json.dumps(None, indent=4, sort_keys=True) + if registry_url[-1] != "/": + registry_url += "/" + registry_url += endpoint + "/?" + + response = requests.get(registry_url, headers=headers) + if response.status_code != 200: + return json.dumps(None, indent=4, sort_keys=True) + data = response.json() + + res = [obj for obj in data["results"] if wildcard in obj[key]] # type: ignore + + return json.dumps(res, indent=4, sort_keys=True) + + def initialise(token: str, config: str, script: str) -> dict: """Reads in token, config file and script, creates necessary registry items and creates new code run. diff --git a/pyproject.toml b/pyproject.toml index 316d5562..95439309 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,15 +1,14 @@ [tool.poetry] +authors = [ + "Ryan J Field ", + "Dennis Reddyhoff ", + "Robert D Turner ", + "Bruno Viola ", + "Kristian Zarebski ", +] +description = "Python api to interact with the Fair Data Pipeline" name = "data_pipeline_api" version = "0.7.5" -description = "Python api to interact with the Fair Data Pipeline" -authors = [ - "Ryan J Field ", - "Dennis Reddyhoff ", - "Robert D Turner ", - "Bruno Viola ", - "Kristian Zarebski " - ] - homepage = "https://www.fairdatapipeline.org/" @@ -18,74 +17,69 @@ repository = "https://github.com/FAIRDataPipeline/pyDataPipeline" license = "GNU General Public License v3.0" packages = [ - {include = "data_pipeline_api"} + {include = "data_pipeline_api"}, ] readme = "README.md" keywords = [ - "FAIR Data Pipeline", - "FAIR", - "Data Management", - "Provenance" + "FAIR Data Pipeline", + "FAIR", + "Data Management", + "Provenance", ] [tool.poetry.dependencies] +PyYAML = "^6.0" +h5py = "^3.6.0" +matplotlib = "^3.5.1" python = "^3.7.1,<3.11" requests = "^2.27.1" -matplotlib = "^3.5.1" scipy = "^1.7.3" -h5py = "^3.6.0" -PyYAML = "^6.0" [tool.poetry.dev-dependencies] -pytest = "^7.0.0" +Sphinx = "^4.3.2" black = "^22.1" -mypy = "^0.931" flake8 = "^4.0.1" +isort = "^5.10.1" +mypy = "^0.931" poetry = "^1.1.6" -pytest-mock = "^3.7.0" -pytest-dependency = "^0.5.1" -pytest-cov = "^3.0.0" pre-commit = "^2.17.0" -isort = "^5.10.1" -Sphinx = "^4.3.2" +pytest = "^7.0.0" +pytest-cov = "^3.0.0" +pytest-dependency = "^0.5.1" +pytest-mock = "^3.7.0" sphinx-rtd-theme = "^1.0.0" [build-system] -requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" - +requires = ["poetry-core>=1.0.0"] [tool.poetry.urls] "Issue Tracker" = "https://github.com/FAIRDataPipeline/pyDataPipeline/issues" - - [tool.isort] -profile = 'black' -multi_line_output = 3 -include_trailing_comma = true -force_grid_wrap = 0 -use_parentheses = true ensure_newline_before_comments = true +force_grid_wrap = 0 +include_trailing_comma = true line_length = 79 - - +multi_line_output = 3 +profile = 'black' +use_parentheses = true [tool.black] line-length = 79 [tool.pytest.ini_options] -addopts = '-s -v --cov=fairdatapipeline --cov-report=html --cov-report=term' +addopts = '-s -v --cov=data_pipeline_api --cov-report=html --cov-report=term' markers = [ - "pipeline: tests for 'pipeline' module ", - "issue: tests for raising issues ", - "utilities: tests for 'utilities' functions ", - "apiversion: tests for api versioning ", + "pipeline: tests for 'pipeline' module ", + "issue: tests for raising issues ", + "utilities: tests for 'utilities' functions ", + "apiversion: tests for api versioning ", ] [tool.mypy] -ignore_missing_imports = true -disallow_untyped_defs = true disallow_untyped_calls = true +disallow_untyped_defs = true +ignore_missing_imports = true diff --git a/tests/ext/find_csv.yaml b/tests/ext/find_csv.yaml new file mode 100644 index 00000000..110ef979 --- /dev/null +++ b/tests/ext/find_csv.yaml @@ -0,0 +1,20 @@ +run_metadata: + description: Write csv file + local_data_registry_url: http://127.0.0.1:8000/api/ + remote_data_registry_url: https://data.scrc.uk/api/ + default_input_namespace: testing + default_output_namespace: testing + write_data_store: tmp + local_repo: ./ + script: |- + python3 py.test + public: true + latest_commit: 221bfe8b52bbfb3b2dbdc23037b7dd94b49aaa70 + remote_repo: https://github.com/FAIRDataPipeline/pyDataPipeline + +write: +- data_product: find/csv + description: test csv to test find data products + file_type: csv + use: + version: 0.0.1 diff --git a/tests/test_raise_issue.py b/tests/test_raise_issue.py index 32b0030b..6a36072b 100644 --- a/tests/test_raise_issue.py +++ b/tests/test_raise_issue.py @@ -1,3 +1,5 @@ +import copy +import json import os import shutil @@ -6,6 +8,8 @@ import data_pipeline_api as pipeline import data_pipeline_api.fdp_utils as fdp_utils +TEST_CSV = "test.csv" + @pytest.fixture def test_dir() -> str: @@ -29,6 +33,11 @@ def config(test_dir: str) -> str: return os.path.join(test_dir, "write_csv.yaml") +@pytest.fixture +def fconfig(test_dir: str) -> str: + return os.path.join(test_dir, "find_csv.yaml") + + @pytest.mark.pipeline def test_initialise(token: str, config: str, script: str) -> None: handle = pipeline.initialise(token, config, script) @@ -109,7 +118,7 @@ def test_link_read( token: str, config: str, script: str, test_dir: str ) -> None: handle = pipeline.initialise(token, config, script) - tmp_csv = os.path.join(test_dir, "test.csv") + tmp_csv = os.path.join(test_dir, TEST_CSV) link_write = pipeline.link_write(handle, "test/csv") shutil.copy(tmp_csv, link_write) pipeline.finalise(token, handle) @@ -122,12 +131,76 @@ def test_link_read( assert type(link_read_1) == str and type(link_read_2) == str +@pytest.mark.pipeline +def test_find_data_product( + token: str, + fconfig: str, + script: str, + test_dir: str, + wildcard: str = "find", + key: str = "name", +) -> None: + + handle = pipeline.initialise(token, fconfig, script) + tmp_csv = os.path.join(test_dir, TEST_CSV) + link_write = pipeline.link_write(handle, "find/csv") + shutil.copy(tmp_csv, link_write) + pipeline.finalise(token, handle) + config = os.path.join(test_dir, "read_csv.yaml") + handle = pipeline.initialise(token, config, script) + + results = pipeline.search_data_products(handle, wildcard) + res = json.loads(results) + assert len(res) == 1 + result = fdp_utils.get_first_entry(res) + assert wildcard in result[key] + + +@pytest.mark.pipeline +def test_find_data_product_empty_handle( + wildcard: str = "find", +) -> None: + handle: dict = {} + + results = pipeline.search_data_products(handle, wildcard) + res = json.loads(results) + assert res is None + + +@pytest.mark.pipeline +def test_find_data_product_wrong_registry( + token: str, + fconfig: str, + script: str, + test_dir: str, + wildcard: str = "find", + key: str = "name", +) -> None: + + handle = pipeline.initialise(token, fconfig, script) + tmp_csv = os.path.join(test_dir, TEST_CSV) + link_write = pipeline.link_write(handle, "find/csv") + shutil.copy(tmp_csv, link_write) + pipeline.finalise(token, handle) + config = os.path.join(test_dir, "read_csv.yaml") + handle = pipeline.initialise(token, config, script) + wrong_handle = copy.deepcopy(handle) + wrong_handle["yaml"]["run_metadata"]["local_data_registry_url"] = "" + results = pipeline.search_data_products(wrong_handle, wildcard) + res = json.loads(results) + assert res is None + handle["yaml"]["run_metadata"]["local_data_registry_url"] += "-error" + results = pipeline.search_data_products(handle, wildcard) + res = json.loads(results) + assert res is None + + @pytest.mark.pipeline def test_link_read_data_product_exists( token: str, config: str, script: str, test_dir: str ) -> None: handle = pipeline.initialise(token, config, script) - tmp_csv = os.path.join(test_dir, "test.csv") + tmp_csv = os.path.join(test_dir, TEST_CSV) link_write = pipeline.link_write(handle, "test/csv") shutil.copy(tmp_csv, link_write) pipeline.finalise(token, handle)