allow TorchModuleWrapper compute output shape (#21160)

pass-lin · web-flow · commit 6d52164c4b07 · 2025-04-14T11:32:56.000-07:00
* allow TorchModuleWrapper compute output shape

* modify
diff --git a/keras/src/utils/torch_utils.py b/keras/src/utils/torch_utils.py
@@ -25,6 +25,8 @@ class TorchModuleWrapper(Layer):
             instance, then its parameters must be initialized before
             passing the instance to `TorchModuleWrapper` (e.g. by calling
             it once).
+        output_shape :The shape of the output of this layer. It helps Keras
+            perform automatic shape inference.
         name: The name of the layer (string).
 
     Example:
@@ -80,7 +82,7 @@ def call(self, inputs):
     ```
     """
 
-    def __init__(self, module, name=None, **kwargs):
+    def __init__(self, module, name=None, output_shape=None, **kwargs):
         super().__init__(name=name, **kwargs)
         import torch.nn as nn
 
@@ -98,6 +100,7 @@ def __init__(self, module, name=None, **kwargs):
 
         self.module = module.to(get_device())
         self._track_module_parameters()
+        self.output_shape = output_shape
 
     def parameters(self, recurse=True):
         return self.module.parameters(recurse=recurse)
@@ -138,13 +141,21 @@ def load_own_variables(self, store):
             state_dict[key] = convert_to_tensor(store[key])
         self.module.load_state_dict(state_dict)
 
+    def compute_output_shape(self, input_shape):
+        if self.output_shape is None:
+            return super().compute_output_shape(input_shape)
+        return self.output_shape
+
     def get_config(self):
         base_config = super().get_config()
         import torch
 
         buffer = io.BytesIO()
         torch.save(self.module, buffer)
-        config = {"module": buffer.getvalue()}
+        config = {
+            "module": buffer.getvalue(),
+            "output_shape": self.output_shape,
+        }
         return {**base_config, **config}
 
     @classmethod
diff --git a/keras/src/utils/torch_utils_test.py b/keras/src/utils/torch_utils_test.py
@@ -5,6 +5,7 @@
 import torch
 from absl.testing import parameterized
 
+import keras
 from keras.src import backend
 from keras.src import layers
 from keras.src import models
@@ -235,3 +236,13 @@ def test_from_config(self):
         new_mw = TorchModuleWrapper.from_config(config)
         for ref_w, new_w in zip(mw.get_weights(), new_mw.get_weights()):
             self.assertAllClose(ref_w, new_w, atol=1e-5)
+
+    def test_build_model(self):
+        x = keras.Input([4])
+        z = TorchModuleWrapper(torch.nn.Linear(4, 8), output_shape=[None, 8])(x)
+        y = TorchModuleWrapper(torch.nn.Linear(8, 16), output_shape=[None, 16])(
+            z
+        )
+        model = keras.Model(x, y)
+        self.assertEqual(model.predict(np.zeros([5, 4])).shape, (5, 16))
+        self.assertEqual(model(np.zeros([5, 4])).shape, (5, 16))