emdgroup · AVHopp · Jun 4, 2025 · Jun 4, 2025 · Jun 6, 2025 · Jun 6, 2025
@@ -21,10 +21,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Changed
 - Dataframe-to-tensor conversion now yields contiguous tensors, improving
   reproducibility of downstream operations
+- Transfer learning now uses BoTorch's `MultiTaskGP` instead of a custom construction
 
 ### Fixed
 - Random seed not entering simulation when explicitly passed to `simulate_scenarios`
- 
+
 ## [0.14.1] - 2025-10-01
 ### Added
 - `to_json` and `from_json` methods now also natively support (de)serialization to/from

@@ -87,6 +87,13 @@ class TaskParameter(CategoricalParameter):
     encoding: CategoricalEncoding = field(default=CategoricalEncoding.INT, init=False)
     # See base class.
 
+    @override
+    @cached_property
+    def comp_df(self) -> pd.DataFrame:
+        # Task parameters do not enter the regular kernel computation (which operates
+        # on floats) but are used for indexing purposes and are thus treated as integers
+        return super().comp_df.astype(int)
+
 
 # Collect leftover original slotted classes processed by `attrs.define`
 gc.collect()
@@ -240,15 +240,23 @@ def parameter_names(self) -> tuple[str, ...]:
         """Return tuple of parameter names."""
         return self.discrete.parameter_names + self.continuous.parameter_names
 
+    @property
+    def _task_parameter(self) -> TaskParameter | None:
+        """The (single) task parameter of the space, if it exists."""
+        # Currently private since only a temporary solution (--> extension to multiple
+        # task parameters needed)
+        params = [p for p in self.parameters if isinstance(p, TaskParameter)]
+
+        if not params:
+            return None
+
+        assert len(params) == 1  # currently ensured by parameter validation step
+        return params[0]
+
     @property
     def task_idx(self) -> int | None:
         """The column index of the task parameter in computational representation."""
-        try:
-            # TODO [16932]: Redesign metadata handling
-            task_param = next(
-                p for p in self.parameters if isinstance(p, TaskParameter)
-            )
-        except StopIteration:
+        if (task_param := self._task_parameter) is None:
             return None
         # TODO[11611]: The current approach has three limitations:
         #   1.  It matches by column name and thus assumes that the parameter name
@@ -266,15 +274,10 @@ def n_tasks(self) -> int:
         #  multiple task parameters, we need to align what the output should even
         #  represent (e.g. number of combinatorial task combinations, number of
         #  tasks per task parameter, etc).
-        try:
-            task_param = next(
-                p for p in self.parameters if isinstance(p, TaskParameter)
-            )
-            return len(task_param.values)
-
-        # When there are no task parameters, we effectively have a single task
-        except StopIteration:
+        if (task_param := self._task_parameter) is None:
+            # When there are no task parameters, we effectively have a single task
             return 1
+        return len(task_param.values)
 
     def get_comp_rep_parameter_indices(self, name: str, /) -> tuple[int, ...]:
         """Find a parameter's column indices in the computational representation.

@@ -69,9 +69,14 @@ def parameter_bounds(self) -> Tensor:
 
         return torch.from_numpy(self.searchspace.scaling_bounds.values)
 
-    def get_numerical_indices(self, n_inputs: int) -> tuple[int, ...]:
-        """Get the indices of the regular numerical model inputs."""
-        return tuple(i for i in range(n_inputs) if i != self.task_idx)
+    @property
+    def numerical_indices(self) -> tuple[int, ...]:
+        """The indices of the regular numerical model inputs."""
+        return tuple(
+            i
+            for i in range(len(self.searchspace.comp_rep_columns))
+            if i != self.task_idx
+        )
 
 
 @define
@@ -83,7 +88,7 @@ class GaussianProcessSurrogate(Surrogate):
     # Note [Scaling Workaround]
     # -------------------------
     # For GPs, we deactivate the base class scaling and instead let the botorch
-    # model internally handle input/output scaling. The reasons is that we need to
+    # model internally handle input/output scaling. The reason is that we need to
     # make `to_botorch` expose the actual botorch GP object, instead of going
     # via the `AdapterModel`, because certain acquisition functions (like qNIPV)
     # require the capability to `fantasize`, which the `AdapterModel` does not support.
@@ -146,64 +151,58 @@ def _fit(self, train_x: Tensor, train_y: Tensor) -> None:
         import gpytorch
         import torch
 
-        # FIXME[typing]: It seems there is currently no better way to inform the type
-        #   checker that the attribute is available at the time of the function call
-        assert self._searchspace is not None
-
+        assert self._searchspace is not None  # provided by base class
         context = _ModelContext(self._searchspace)
 
-        numerical_idxs = context.get_numerical_indices(train_x.shape[-1])
-
-        # For GPs, we let botorch handle the scaling. See [Scaling Workaround] above.
+        # Input/output scaling
+        # NOTE: For GPs, we let BoTorch handle scaling (see [Scaling Workaround] above)
         input_transform = botorch.models.transforms.Normalize(
             train_x.shape[-1],
             bounds=context.parameter_bounds,
-            indices=list(numerical_idxs),
+            indices=list(context.numerical_indices),
         )
         outcome_transform = botorch.models.transforms.Standardize(train_y.shape[-1])
 
-        # extract the batch shape of the training data
-        batch_shape = train_x.shape[:-2]
+        # Mean function
+        mean_module = gpytorch.means.ConstantMean()
 
-        # create GP mean
-        mean_module = gpytorch.means.ConstantMean(batch_shape=batch_shape)
+        # Covariance function
+        kernel = self.kernel_factory(context.searchspace, train_x, train_y)
+        kernel_num_dims = train_x.shape[-1] - context.n_task_dimensions
+        covar_module = kernel.to_gpytorch(ard_num_dims=kernel_num_dims)
 
-        # define the covariance module for the numeric dimensions
-        base_covar_module = self.kernel_factory(
-            context.searchspace, train_x, train_y
-        ).to_gpytorch(
-            ard_num_dims=train_x.shape[-1] - context.n_task_dimensions,
-            active_dims=numerical_idxs,
-            batch_shape=batch_shape,
-        )
-
-        # create GP covariance
-        if not context.is_multitask:
-            covar_module = base_covar_module
-        else:
-            task_covar_module = gpytorch.kernels.IndexKernel(
-                num_tasks=context.n_tasks,
-                active_dims=context.task_idx,
-                rank=context.n_tasks,  # TODO: make controllable
-            )
-            covar_module = base_covar_module * task_covar_module
-
-        # create GP likelihood
+        # Likelihood model
         noise_prior = _default_noise_factory(context.searchspace, train_x, train_y)
         likelihood = gpytorch.likelihoods.GaussianLikelihood(
-            noise_prior=noise_prior[0].to_gpytorch(), batch_shape=batch_shape
+            noise_prior=noise_prior[0].to_gpytorch()
         )
         likelihood.noise = torch.tensor([noise_prior[1]])
 
-        # construct and fit the Gaussian process
-        self._model = botorch.models.SingleTaskGP(
+        # Model selection
+        model_cls: type[botorch.models.SingleTaskGP] | type[botorch.models.MultiTaskGP]
+        if (task_param := context.searchspace._task_parameter) is None:
+            model_cls = botorch.models.SingleTaskGP
+            model_kwargs = {}
+        else:
+            model_cls = botorch.models.MultiTaskGP
+            task_comp_rep = task_param.comp_df.iloc[:, 0]
+            model_kwargs = {
+                "task_feature": context.task_idx,
+                "output_tasks": task_comp_rep[list(task_param.active_values)],  # type: ignore[index]
+                "rank": context.n_tasks,
+                "all_tasks": task_comp_rep.to_list(),
+            }
+
+        # Model construction and fitting
+        self._model = model_cls(
             train_x,
             train_y,
             input_transform=input_transform,
             outcome_transform=outcome_transform,
             mean_module=mean_module,
             covar_module=covar_module,
             likelihood=likelihood,
+            **model_kwargs,  # type: ignore[arg-type]
         )
 
         # TODO: This is still a temporary workaround to avoid overfitting seen in

@@ -0,0 +1,68 @@
+"""Tests for transfer learning."""
+
+from typing import Literal
+
+import pandas as pd
+import pytest
+
+from baybe import Campaign
+from baybe.parameters import NumericalContinuousParameter, TaskParameter
+from baybe.recommenders.pure.bayesian.botorch import BotorchRecommender
+from baybe.searchspace import SearchSpace
+from baybe.targets import NumericalTarget
+
+
+@pytest.fixture
+def campaign(
+    training_data: Literal["source", "target", "both"],
+    active_tasks: Literal["target_only", "both"],
+) -> Campaign:
+    """A transfer-learning campaign with various active tasks and training data."""
+    assert training_data in ["source", "target", "both"]
+    assert active_tasks in ["target_only", "both"]
+
+    source = "B"
+    target = "A"
+    parameters = [
+        NumericalContinuousParameter("x", (0, 5)),
+        TaskParameter(
+            "task",
+            values=(target, source),
+            active_values=(
+                (target,) if active_tasks == "target_only" else (target, source)
+            ),
+        ),
+    ]
+    searchspace = SearchSpace.from_product(parameters=parameters)
+    objective = NumericalTarget(name="y").to_objective()
+    recommender = BotorchRecommender()
+    lookup = pd.DataFrame(
+        {
+            "x": [1.0, 2.0, 3.0, 4.0],
+            "y": [1.0, 2.0, 3.0, 4.0],
+            "task": [target] * 2 + [source] * 2,
+        }
+    )
+
+    if training_data == "source":
+        lookup = lookup[lookup["task"] == source]
+    elif training_data == "target":
+        lookup = lookup[lookup["task"] == target]
+
+    campaign = Campaign(
+        searchspace=searchspace,
+        objective=objective,
+        recommender=recommender,
+    )
+    campaign.add_measurements(lookup)
+
+    return campaign
+
+
+@pytest.mark.parametrize("active_tasks", ["target_only", "both"])
+@pytest.mark.parametrize("training_data", ["source", "target", "both"])
+def test_recommendation(campaign: Campaign):
+    """Transfer learning recommendation works regardless of which task are
+    present in the training data and which tasks are active.
+    """  # noqa: D205
+    campaign.recommend(1)