Skip to content
This repository was archived by the owner on Sep 24, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,12 @@ To deploy a signed certificate in cluster follow [trusted cluster cert](signed-c
### Object Storage
This solution requires object storage to be in place either through S3 or using Noobaa.

If you are using Noobaa apply the following [tuning paramters](noobaa/README.md)
If you are using Noobaa apply the following [tuning paramters](noobaa/README.md)

## How to run 🏃🏼

1. Create K8s config map and K8s secret based on the target Model Server Info. Use [kfp-model-server.yaml](./sdg/kfp-model-server.yaml).

2. Use pipeline.py file to generate the pipeline.yaml which will create RHOAI pipeline.

3. Create a run in RHOAI by providing required input parameter values.
15 changes: 11 additions & 4 deletions pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ def pipeline_wrapper(mock: List[Literal[MOCKED_STAGES]]):

# Imports for SDG stage
if "sdg" in mock:
from sdg.faked import git_clone_op, sdg_op
from sdg.faked import preflight_check_op, git_clone_op, sdg_op
else:
from sdg import git_clone_op, sdg_op
from sdg import preflight_check_op, git_clone_op, sdg_op

# Imports for Training stage
if "train" in mock:
Expand Down Expand Up @@ -70,11 +70,18 @@ def pipeline(
device: str = None,
):

# SDG stage
preflight_check_task = preflight_check_op(
repo_branch=repo_branch, repo_pr=repo_pr
)
use_config_map_as_env(preflight_check_task, K8S_NAME, dict(endpoint="endpoint", model="model"))
use_secret_as_env(preflight_check_task, K8S_NAME, {"api_key": "api_key"})

git_clone_task = git_clone_op(
repo_branch=repo_branch, repo_pr=repo_pr, repo_url=repo_url
)
).after(preflight_check_task)

# SDG stage

sdg_task = sdg_op(
num_instructions_to_generate=num_instructions_to_generate,
taxonomy=git_clone_task.outputs["taxonomy"],
Expand Down
2 changes: 2 additions & 0 deletions pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1049,6 +1049,8 @@ root:
enableCache: true
componentRef:
name: comp-git-clone-op
dependentTasks:
- preflight-check-op
inputs:
parameters:
repo_branch:
Expand Down
4 changes: 2 additions & 2 deletions sdg/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .components import git_clone_op, sdg_op
from .components import preflight_check_op, git_clone_op, sdg_op
from . import faked

__all__ = ["git_clone_op", "sdg_op", "faked"]
__all__ = ["preflight_check_op", "git_clone_op", "sdg_op", "faked"]
20 changes: 20 additions & 0 deletions sdg/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,26 @@

IMAGE = "quay.io/tcoufal/ilab-sdg:latest"

@dsl.component(base_image=IMAGE)
def preflight_check_op(
repo_branch: str,
repo_pr: Optional[int],
):
from os import getenv

if (not repo_branch) and (repo_pr is None or repo_pr <= 0 ):
raise Exception("Both taxonomy repo branch and taxonomy pull request number cannot be empty")
Comment on lines +15 to +16
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This gets checked when you try to create the run. It will give an error if you try to proceed without a PR or branch.
Failed to create a new run: Failed to generate the ExecutionSpec: invalid pipeline job inputs: Invalid input error: input parameter repo_pr requires type double or integer, but the parameter value is not of number value type

Can remove this check

api_key = getenv("api_key")
model = getenv("model")
endpoint = getenv("endpoint")

if not api_key:
raise Exception("Model Server Auth Key is missing in kfp-model-server secret")
if not model:
raise Exception("Model name is missing in kfp-model-server configMap")
if not endpoint:
raise Exception("Model Server endpoint URL is missing in kfp-model-server configMap")

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will fail at the first step anyways if these params are not set correctly. Do we want to add in some additional checks here that may cause failures during the training and eval steps later on as well?

@dsl.container_component
def git_clone_op(
taxonomy: dsl.Output[dsl.Dataset],
Expand Down
4 changes: 2 additions & 2 deletions sdg/faked/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .components import git_clone_op, sdg_op
from .components import preflight_check_op, git_clone_op, sdg_op

__all__ = ["git_clone_op", "sdg_op"]
__all__ = ["preflight_check_op", "git_clone_op", "sdg_op"]
11 changes: 10 additions & 1 deletion sdg/faked/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,16 @@
from kfp import dsl
from utils.consts import PYTHON_IMAGE

@dsl.component(base_image=PYTHON_IMAGE)
IMAGE = "registry.access.redhat.com/ubi9/python-311:latest"

@dsl.component(base_image=IMAGE)
def preflight_check_op(
repo_branch: str,
repo_pr: Optional[int],
):
pass

@dsl.component(base_image=IMAGE)
def git_clone_op(
taxonomy: dsl.Output[dsl.Dataset],
repo_branch: str,
Expand Down