Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion data_pipeline_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
"link_read",
"link_write",
"finalise",
"search",
"search_data_products",
"raise_issue_by_data_product",
"raise_issue_by_index",
"raise_issue_with_config",
Expand All @@ -15,7 +17,7 @@

from .fdp_utils import get_handle_index_from_path
from .link import link_read, link_write
from .pipeline import finalise, initialise
from .pipeline import finalise, initialise, search, search_data_products
from .raise_issue import (
raise_issue_by_data_product,
raise_issue_by_existing_data_product,
Expand Down
73 changes: 73 additions & 0 deletions data_pipeline_api/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,85 @@
import datetime
import json
import logging
import os
from typing import List

import requests
import yaml

from data_pipeline_api import fdp_utils


def search_data_products(
handle: dict, wildcard: str, token: str = None
) -> str:
"""
search_data_products
wrapper function to search to search using a data_product name using a wildcard

Parameters
----------
handle : dict

wildcard : str
search parameter
token : str, optional
optional token, by default None

Returns
-------
List
list of data products that meet the search criteria
"""
return search(handle, wildcard, "data_product", "name", token=token)


def search(
handle: dict,
wildcard: str,
endpoint: str,
key: str,
token: str = None,
) -> str:
"""
Reads 'read' information from handle, wildcard, endpoint and key and returns a list of items found at the endpoint where the key contains the wildcard

Args:
| handle: pipeline handle
| token: registry token
| wildcard: string to search
| key: key of the results dictionary where the wildcard is searched
| endpoint: api endpoint where the search is performed

Returns:
| json: a list of dictionaries where the wildcard was found
"""

try:
api_version = handle["yaml"]["run_metadata"]["api_version"]
registry_url = handle["yaml"]["run_metadata"][
"local_data_registry_url"
]
except KeyError:
return json.dumps(None, indent=4, sort_keys=True)

headers = fdp_utils.get_headers(token=token, api_version=api_version)
if not registry_url:
return json.dumps(None, indent=4, sort_keys=True)
if registry_url[-1] != "/":
registry_url += "/"
registry_url += endpoint + "/?"

response = requests.get(registry_url, headers=headers)
if response.status_code != 200:
return json.dumps(None, indent=4, sort_keys=True)
data = response.json()

res = [obj for obj in data["results"] if wildcard in obj[key]] # type: ignore

return json.dumps(res, indent=4, sort_keys=True)


def initialise(token: str, config: str, script: str) -> dict:
"""Reads in token, config file and script, creates necessary registry items
and creates new code run.
Expand Down
78 changes: 36 additions & 42 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
[tool.poetry]
authors = [
"Ryan J Field <[email protected]>",
"Dennis Reddyhoff <[email protected]>",
"Robert D Turner <[email protected]>",
"Bruno Viola <[email protected]>",
"Kristian Zarebski <[email protected]>",
]
description = "Python api to interact with the Fair Data Pipeline"
name = "data_pipeline_api"
version = "0.7.5"
description = "Python api to interact with the Fair Data Pipeline"
authors = [
"Ryan J Field <[email protected]>",
"Dennis Reddyhoff <[email protected]>",
"Robert D Turner <[email protected]>",
"Bruno Viola <[email protected]>",
"Kristian Zarebski <[email protected]>"
]


homepage = "https://www.fairdatapipeline.org/"

Expand All @@ -18,74 +17,69 @@ repository = "https://github.com/FAIRDataPipeline/pyDataPipeline"
license = "GNU General Public License v3.0"

packages = [
{include = "data_pipeline_api"}
{include = "data_pipeline_api"},
]

readme = "README.md"

keywords = [
"FAIR Data Pipeline",
"FAIR",
"Data Management",
"Provenance"
"FAIR Data Pipeline",
"FAIR",
"Data Management",
"Provenance",
]

[tool.poetry.dependencies]
PyYAML = "^6.0"
h5py = "^3.6.0"
matplotlib = "^3.5.1"
python = "^3.7.1,<3.11"
requests = "^2.27.1"
matplotlib = "^3.5.1"
scipy = "^1.7.3"
h5py = "^3.6.0"
PyYAML = "^6.0"

[tool.poetry.dev-dependencies]
pytest = "^7.0.0"
Sphinx = "^4.3.2"
black = "^22.1"
mypy = "^0.931"
flake8 = "^4.0.1"
isort = "^5.10.1"
mypy = "^0.931"
poetry = "^1.1.6"
pytest-mock = "^3.7.0"
pytest-dependency = "^0.5.1"
pytest-cov = "^3.0.0"
pre-commit = "^2.17.0"
isort = "^5.10.1"
Sphinx = "^4.3.2"
pytest = "^7.0.0"
pytest-cov = "^3.0.0"
pytest-dependency = "^0.5.1"
pytest-mock = "^3.7.0"
sphinx-rtd-theme = "^1.0.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

requires = ["poetry-core>=1.0.0"]

[tool.poetry.urls]
"Issue Tracker" = "https://github.com/FAIRDataPipeline/pyDataPipeline/issues"



[tool.isort]
profile = 'black'
multi_line_output = 3
include_trailing_comma = true
force_grid_wrap = 0
use_parentheses = true
ensure_newline_before_comments = true
force_grid_wrap = 0
include_trailing_comma = true
line_length = 79


multi_line_output = 3
profile = 'black'
use_parentheses = true

[tool.black]
line-length = 79

[tool.pytest.ini_options]
addopts = '-s -v --cov=fairdatapipeline --cov-report=html --cov-report=term'
addopts = '-s -v --cov=data_pipeline_api --cov-report=html --cov-report=term'
markers = [
"pipeline: tests for 'pipeline' module ",
"issue: tests for raising issues ",
"utilities: tests for 'utilities' functions ",
"apiversion: tests for api versioning ",
"pipeline: tests for 'pipeline' module ",
"issue: tests for raising issues ",
"utilities: tests for 'utilities' functions ",
"apiversion: tests for api versioning ",
]

[tool.mypy]
ignore_missing_imports = true
disallow_untyped_defs = true
disallow_untyped_calls = true
disallow_untyped_defs = true
ignore_missing_imports = true
20 changes: 20 additions & 0 deletions tests/ext/find_csv.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
run_metadata:
description: Write csv file
local_data_registry_url: http://127.0.0.1:8000/api/
remote_data_registry_url: https://data.scrc.uk/api/
default_input_namespace: testing
default_output_namespace: testing
write_data_store: tmp
local_repo: ./
script: |-
python3 py.test
public: true
latest_commit: 221bfe8b52bbfb3b2dbdc23037b7dd94b49aaa70
remote_repo: https://github.com/FAIRDataPipeline/pyDataPipeline

write:
- data_product: find/csv
description: test csv to test find data products
file_type: csv
use:
version: 0.0.1
77 changes: 75 additions & 2 deletions tests/test_raise_issue.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import copy
import json
import os
import shutil

Expand All @@ -6,6 +8,8 @@
import data_pipeline_api as pipeline
import data_pipeline_api.fdp_utils as fdp_utils

TEST_CSV = "test.csv"


@pytest.fixture
def test_dir() -> str:
Expand All @@ -29,6 +33,11 @@ def config(test_dir: str) -> str:
return os.path.join(test_dir, "write_csv.yaml")


@pytest.fixture
def fconfig(test_dir: str) -> str:
return os.path.join(test_dir, "find_csv.yaml")


@pytest.mark.pipeline
def test_initialise(token: str, config: str, script: str) -> None:
handle = pipeline.initialise(token, config, script)
Expand Down Expand Up @@ -109,7 +118,7 @@ def test_link_read(
token: str, config: str, script: str, test_dir: str
) -> None:
handle = pipeline.initialise(token, config, script)
tmp_csv = os.path.join(test_dir, "test.csv")
tmp_csv = os.path.join(test_dir, TEST_CSV)
link_write = pipeline.link_write(handle, "test/csv")
shutil.copy(tmp_csv, link_write)
pipeline.finalise(token, handle)
Expand All @@ -122,12 +131,76 @@ def test_link_read(
assert type(link_read_1) == str and type(link_read_2) == str


@pytest.mark.pipeline
def test_find_data_product(
token: str,
fconfig: str,
script: str,
test_dir: str,
wildcard: str = "find",
key: str = "name",
) -> None:

handle = pipeline.initialise(token, fconfig, script)
tmp_csv = os.path.join(test_dir, TEST_CSV)
link_write = pipeline.link_write(handle, "find/csv")
shutil.copy(tmp_csv, link_write)
pipeline.finalise(token, handle)
config = os.path.join(test_dir, "read_csv.yaml")
handle = pipeline.initialise(token, config, script)

results = pipeline.search_data_products(handle, wildcard)
res = json.loads(results)
assert len(res) == 1
result = fdp_utils.get_first_entry(res)
assert wildcard in result[key]


@pytest.mark.pipeline
def test_find_data_product_empty_handle(
wildcard: str = "find",
) -> None:
handle: dict = {}

results = pipeline.search_data_products(handle, wildcard)
res = json.loads(results)
assert res is None


@pytest.mark.pipeline
def test_find_data_product_wrong_registry(
token: str,
fconfig: str,
script: str,
test_dir: str,
wildcard: str = "find",
key: str = "name",
) -> None:

handle = pipeline.initialise(token, fconfig, script)
tmp_csv = os.path.join(test_dir, TEST_CSV)
link_write = pipeline.link_write(handle, "find/csv")
shutil.copy(tmp_csv, link_write)
pipeline.finalise(token, handle)
config = os.path.join(test_dir, "read_csv.yaml")
handle = pipeline.initialise(token, config, script)
wrong_handle = copy.deepcopy(handle)
wrong_handle["yaml"]["run_metadata"]["local_data_registry_url"] = ""
results = pipeline.search_data_products(wrong_handle, wildcard)
res = json.loads(results)
assert res is None
handle["yaml"]["run_metadata"]["local_data_registry_url"] += "-error"
results = pipeline.search_data_products(handle, wildcard)
res = json.loads(results)
assert res is None


@pytest.mark.pipeline
def test_link_read_data_product_exists(
token: str, config: str, script: str, test_dir: str
) -> None:
handle = pipeline.initialise(token, config, script)
tmp_csv = os.path.join(test_dir, "test.csv")
tmp_csv = os.path.join(test_dir, TEST_CSV)
link_write = pipeline.link_write(handle, "test/csv")
shutil.copy(tmp_csv, link_write)
pipeline.finalise(token, handle)
Expand Down