FAIRDataPipeline · bruvio · Jan 31, 2022 · Feb 17, 2022 · Feb 28, 2022 · Feb 28, 2022
diff --git a/data_pipeline_api/__init__.py b/data_pipeline_api/__init__.py
@@ -3,6 +3,8 @@
     "link_read",
     "link_write",
     "finalise",
+    "search",
+    "search_data_products",
     "raise_issue_by_data_product",
     "raise_issue_by_index",
     "raise_issue_with_config",
@@ -15,7 +17,7 @@
 
 from .fdp_utils import get_handle_index_from_path
 from .link import link_read, link_write
-from .pipeline import finalise, initialise
+from .pipeline import finalise, initialise, search, search_data_products
 from .raise_issue import (
     raise_issue_by_data_product,
     raise_issue_by_existing_data_product,

diff --git a/data_pipeline_api/pipeline.py b/data_pipeline_api/pipeline.py
@@ -1,12 +1,85 @@
 import datetime
+import json
 import logging
 import os
+from typing import List
 
+import requests
 import yaml
 
 from data_pipeline_api import fdp_utils
 
 
+def search_data_products(
+    handle: dict, wildcard: str, token: str = None
+) -> str:
+    """
+    search_data_products
+    wrapper function to search to search using a data_product name using a wildcard
+
+    Parameters
+    ----------
+    handle : dict
+
+    wildcard : str
+        search parameter
+    token : str, optional
+        optional token, by default None
+
+    Returns
+    -------
+    List
+        list of data products that meet the search criteria
+    """
+    return search(handle, wildcard, "data_product", "name", token=token)
+
+
+def search(
+    handle: dict,
+    wildcard: str,
+    endpoint: str,
+    key: str,
+    token: str = None,
+) -> str:
+    """
+    Reads 'read' information from handle, wildcard, endpoint and key and returns a list of items found at the endpoint where the key contains the wildcard
+
+    Args:
+        |   handle: pipeline handle
+        |   token: registry token
+        |   wildcard: string to search
+        |   key: key of the results dictionary where the wildcard is searched
+        |   endpoint: api endpoint where the search is performed
+
+    Returns:
+        |   json: a list of dictionaries where the wildcard was found
+    """
+
+    try:
+        api_version = handle["yaml"]["run_metadata"]["api_version"]
+        registry_url = handle["yaml"]["run_metadata"][
+            "local_data_registry_url"
+        ]
+    except KeyError:
+        return json.dumps(None, indent=4, sort_keys=True)
+
+    headers = fdp_utils.get_headers(token=token, api_version=api_version)
+    if not registry_url:
+        return json.dumps(None, indent=4, sort_keys=True)
+    if registry_url[-1] != "/":
+        registry_url += "/"
+    registry_url += endpoint + "/?"
+
+    response = requests.get(registry_url, headers=headers)
+    if response.status_code != 200:
+        return json.dumps(None, indent=4, sort_keys=True)
+    data = response.json()
+
+    res = [obj for obj in data["results"] if wildcard in obj[key]]  # type: ignore
+
+    return json.dumps(res, indent=4, sort_keys=True)
+
+
 def initialise(token: str, config: str, script: str) -> dict:
     """Reads in token, config file and script, creates necessary registry items
     and creates new code run.

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,15 +1,14 @@
 [tool.poetry]
+authors = [
+  "Ryan J Field <[email protected]>",
+  "Dennis Reddyhoff <[email protected]>",
+  "Robert D Turner <[email protected]>",
+  "Bruno Viola <[email protected]>",
+  "Kristian Zarebski <[email protected]>",
+]
+description = "Python api to interact with the Fair Data Pipeline"
 name = "data_pipeline_api"
 version = "0.7.5"
-description = "Python api to interact with the Fair Data Pipeline"
-authors = [
-    "Ryan J Field <[email protected]>",
-    "Dennis Reddyhoff <[email protected]>",
-    "Robert D Turner <[email protected]>",
-    "Bruno Viola <[email protected]>",
-    "Kristian Zarebski <[email protected]>"
-    ]
-
 
 homepage = "https://www.fairdatapipeline.org/"
 
@@ -18,74 +17,69 @@ repository = "https://github.com/FAIRDataPipeline/pyDataPipeline"
 license = "GNU General Public License v3.0"
 
 packages = [
-    {include = "data_pipeline_api"}
+  {include = "data_pipeline_api"},
 ]
 
 readme = "README.md"
 
 keywords = [
-    "FAIR Data Pipeline",
-    "FAIR",
-    "Data Management",
-    "Provenance"
+  "FAIR Data Pipeline",
+  "FAIR",
+  "Data Management",
+  "Provenance",
 ]
 
 [tool.poetry.dependencies]
+PyYAML = "^6.0"
+h5py = "^3.6.0"
+matplotlib = "^3.5.1"
 python = "^3.7.1,<3.11"
 requests = "^2.27.1"
-matplotlib = "^3.5.1"
 scipy = "^1.7.3"
-h5py = "^3.6.0"
-PyYAML = "^6.0"
 
 [tool.poetry.dev-dependencies]
-pytest = "^7.0.0"
+Sphinx = "^4.3.2"
 black = "^22.1"
-mypy = "^0.931"
 flake8 = "^4.0.1"
+isort = "^5.10.1"
+mypy = "^0.931"
 poetry = "^1.1.6"
-pytest-mock = "^3.7.0"
-pytest-dependency = "^0.5.1"
-pytest-cov = "^3.0.0"
 pre-commit = "^2.17.0"
-isort = "^5.10.1"
-Sphinx = "^4.3.2"
+pytest = "^7.0.0"
+pytest-cov = "^3.0.0"
+pytest-dependency = "^0.5.1"
+pytest-mock = "^3.7.0"
 sphinx-rtd-theme = "^1.0.0"
 
 [build-system]
-requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
-
+requires = ["poetry-core>=1.0.0"]
 
 [tool.poetry.urls]
 "Issue Tracker" = "https://github.com/FAIRDataPipeline/pyDataPipeline/issues"
 
-
-
 [tool.isort]
-profile = 'black'
-multi_line_output = 3
-include_trailing_comma = true
-force_grid_wrap = 0
-use_parentheses = true
 ensure_newline_before_comments = true
+force_grid_wrap = 0
+include_trailing_comma = true
 line_length = 79
-
-
+multi_line_output = 3
+profile = 'black'
+use_parentheses = true
 
 [tool.black]
 line-length = 79
 
 [tool.pytest.ini_options]
-addopts = '-s -v --cov=fairdatapipeline  --cov-report=html --cov-report=term'
+addopts = '-s -v --cov=data_pipeline_api  --cov-report=html --cov-report=term'
 markers = [
-    "pipeline: tests for 'pipeline' module ",
-    "issue: tests for raising issues ",
-    "utilities: tests for 'utilities' functions ",
-    "apiversion: tests for api versioning ",
+  "pipeline: tests for 'pipeline' module ",
+  "issue: tests for raising issues ",
+  "utilities: tests for 'utilities' functions ",
+  "apiversion: tests for api versioning ",
 ]
 
 [tool.mypy]
-ignore_missing_imports = true
-disallow_untyped_defs = true
 disallow_untyped_calls = true
+disallow_untyped_defs = true
+ignore_missing_imports = true
diff --git a/tests/ext/find_csv.yaml b/tests/ext/find_csv.yaml
@@ -0,0 +1,20 @@
+run_metadata:
+  description: Write csv file
+  local_data_registry_url: http://127.0.0.1:8000/api/
+  remote_data_registry_url: https://data.scrc.uk/api/
+  default_input_namespace: testing
+  default_output_namespace: testing
+  write_data_store: tmp
+  local_repo: ./
+  script: |-
+        python3 py.test
+  public: true
+  latest_commit: 221bfe8b52bbfb3b2dbdc23037b7dd94b49aaa70
+  remote_repo: https://github.com/FAIRDataPipeline/pyDataPipeline
+
+write:
+- data_product: find/csv
+  description: test csv to test find data products
+  file_type: csv
+  use:
+    version: 0.0.1
diff --git a/tests/test_raise_issue.py b/tests/test_raise_issue.py
@@ -1,3 +1,5 @@
+import copy
+import json
 import os
 import shutil
 
@@ -6,6 +8,8 @@
 import data_pipeline_api as pipeline
 import data_pipeline_api.fdp_utils as fdp_utils
 
+TEST_CSV = "test.csv"
+
 
 @pytest.fixture
 def test_dir() -> str:
@@ -29,6 +33,11 @@ def config(test_dir: str) -> str:
     return os.path.join(test_dir, "write_csv.yaml")
 
 
+@pytest.fixture
+def fconfig(test_dir: str) -> str:
+    return os.path.join(test_dir, "find_csv.yaml")
+
+
 @pytest.mark.pipeline
 def test_initialise(token: str, config: str, script: str) -> None:
     handle = pipeline.initialise(token, config, script)
@@ -109,7 +118,7 @@ def test_link_read(
     token: str, config: str, script: str, test_dir: str
 ) -> None:
     handle = pipeline.initialise(token, config, script)
-    tmp_csv = os.path.join(test_dir, "test.csv")
+    tmp_csv = os.path.join(test_dir, TEST_CSV)
     link_write = pipeline.link_write(handle, "test/csv")
     shutil.copy(tmp_csv, link_write)
     pipeline.finalise(token, handle)
@@ -122,12 +131,76 @@ def test_link_read(
     assert type(link_read_1) == str and type(link_read_2) == str
 
 
+@pytest.mark.pipeline
+def test_find_data_product(
+    token: str,
+    fconfig: str,
+    script: str,
+    test_dir: str,
+    wildcard: str = "find",
+    key: str = "name",
+) -> None:
+
+    handle = pipeline.initialise(token, fconfig, script)
+    tmp_csv = os.path.join(test_dir, TEST_CSV)
+    link_write = pipeline.link_write(handle, "find/csv")
+    shutil.copy(tmp_csv, link_write)
+    pipeline.finalise(token, handle)
+    config = os.path.join(test_dir, "read_csv.yaml")
+    handle = pipeline.initialise(token, config, script)
+
+    results = pipeline.search_data_products(handle, wildcard)
+    res = json.loads(results)
+    assert len(res) == 1
+    result = fdp_utils.get_first_entry(res)
+    assert wildcard in result[key]
+
+
+@pytest.mark.pipeline
+def test_find_data_product_empty_handle(
+    wildcard: str = "find",
+) -> None:
+    handle: dict = {}
+
+    results = pipeline.search_data_products(handle, wildcard)
+    res = json.loads(results)
+    assert res is None
+
+
+@pytest.mark.pipeline
+def test_find_data_product_wrong_registry(
+    token: str,
+    fconfig: str,
+    script: str,
+    test_dir: str,
+    wildcard: str = "find",
+    key: str = "name",
+) -> None:
+
+    handle = pipeline.initialise(token, fconfig, script)
+    tmp_csv = os.path.join(test_dir, TEST_CSV)
+    link_write = pipeline.link_write(handle, "find/csv")
+    shutil.copy(tmp_csv, link_write)
+    pipeline.finalise(token, handle)
+    config = os.path.join(test_dir, "read_csv.yaml")
+    handle = pipeline.initialise(token, config, script)
+    wrong_handle = copy.deepcopy(handle)
+    wrong_handle["yaml"]["run_metadata"]["local_data_registry_url"] = ""
+    results = pipeline.search_data_products(wrong_handle, wildcard)
+    res = json.loads(results)
+    assert res is None
+    handle["yaml"]["run_metadata"]["local_data_registry_url"] += "-error"
+    results = pipeline.search_data_products(handle, wildcard)
+    res = json.loads(results)
+    assert res is None
+
+
 @pytest.mark.pipeline
 def test_link_read_data_product_exists(
     token: str, config: str, script: str, test_dir: str
 ) -> None:
     handle = pipeline.initialise(token, config, script)
-    tmp_csv = os.path.join(test_dir, "test.csv")
+    tmp_csv = os.path.join(test_dir, TEST_CSV)
     link_write = pipeline.link_write(handle, "test/csv")
     shutil.copy(tmp_csv, link_write)
     pipeline.finalise(token, handle)