Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 41 additions & 104 deletions difflogic/difflogic.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@
import torch
import difflogic_cuda
from pallas import autograd as pallas_autograd
import numpy as np
import jax
import jax.numpy as jnp
from jax.nn import softmax, one_hot
from .functional import bin_op_s, get_unique_connections, GradFactor
from .packbitstensor import PackBitsTensor


from pallas import compile as pallas_compile

@pallas_compile(backend="cuda")
def logic_layer_kernel(x, a, b, w, y):
i = pallas_autograd.program_id(0)
y[i] = x[i]


########################################################################################################################


Expand Down Expand Up @@ -53,30 +64,14 @@ def __init__(
assert self.connections in ['random', 'unique'], self.connections
self.indices = self.get_connections(self.connections, device)

if self.implementation == 'cuda':
"""
Defining additional indices for improving the efficiency of the backward of the CUDA implementation.
"""
given_x_indices_of_y = [[] for _ in range(in_dim)]
indices_0_np = self.indices[0].cpu().numpy()
indices_1_np = self.indices[1].cpu().numpy()
for y in range(out_dim):
given_x_indices_of_y[indices_0_np[y]].append(y)
given_x_indices_of_y[indices_1_np[y]].append(y)
self.given_x_indices_of_y_start = torch.tensor(
np.array([0] + [len(g) for g in given_x_indices_of_y]).cumsum(), device=device, dtype=torch.int64)
self.given_x_indices_of_y = torch.tensor(
[item for sublist in given_x_indices_of_y for item in sublist], dtype=torch.int64, device=device)

self.num_neurons = out_dim
self.num_weights = out_dim

def forward(self, x):
def forward(self, x, training: bool):
if isinstance(x, PackBitsTensor):
assert not self.training, 'PackBitsTensor is not supported for the differentiable training mode.'
assert self.device == 'cuda', 'PackBitsTensor is only supported for CUDA, not for {}. ' \
'If you want fast inference on CPU, please use CompiledDiffLogicModel.' \
''.format(self.device)
assert not training, 'PackBitsTensor is not supported for the differentiable training mode.'
assert self.device == 'cuda', 'PackBitsTensor is only supported for CUDA, not for {}. '.format(self.device) + \
'If you want fast inference on CPU, please use CompiledDiffLogicModel.'

else:
if self.grad_factor != 1.:
Expand All @@ -85,68 +80,41 @@ def forward(self, x):
if self.implementation == 'cuda':
if isinstance(x, PackBitsTensor):
return self.forward_cuda_eval(x)
return self.forward_cuda(x)
return self.forward_cuda(x, training)
elif self.implementation == 'python':
return self.forward_python(x)
return self.forward_python(x, training)
else:
raise ValueError(self.implementation)

def forward_python(self, x):
assert x.shape[-1] == self.in_dim, (x[0].shape[-1], self.in_dim)

if self.indices[0].dtype == torch.int64 or self.indices[1].dtype == torch.int64:
print(self.indices[0].dtype, self.indices[1].dtype)
self.indices = self.indices[0].long(), self.indices[1].long()
print(self.indices[0].dtype, self.indices[1].dtype)
def forward_python(self, x, training: bool):
assert x.shape[-1] == self.in_dim, (x.shape[-1], self.in_dim)

a, b = x[..., self.indices[0]], x[..., self.indices[1]]
if self.training:
x = bin_op_s(a, b, torch.nn.functional.softmax(self.weights, dim=-1))
weights = jnp.array(self.weights) # Convert to JAX array
if training:
x = bin_op_s(a, b, softmax(weights, axis=-1))
else:
weights = torch.nn.functional.one_hot(self.weights.argmax(-1), 16).to(torch.float32)
weights = one_hot(jnp.argmax(weights, axis=-1), 16).astype(jnp.float32)
x = bin_op_s(a, b, weights)
return x

def forward_cuda(self, x):
if self.training:
assert x.device.type == 'cuda', x.device
assert x.ndim == 2, x.ndim
def forward_cuda(self, x, training: bool):
x = jnp.array(x)
a = jnp.array(self.indices[0])
b = jnp.array(self.indices[1])
w = jnp.array(self.weights)
y = jnp.zeros((x.shape[0], self.out_dim), dtype=x.dtype)

x = x.transpose(0, 1)
x = x.contiguous()
grid_dim = (x.shape[0],)
block_dim = (min(x.shape[0], 1024),)

assert x.shape[0] == self.in_dim, (x.shape, self.in_dim)
logic_layer_kernel[grid_dim, block_dim](x[:,0], a, b, w, y)

a, b = self.indices

if self.training:
w = torch.nn.functional.softmax(self.weights, dim=-1).to(x.dtype)
return LogicLayerCudaFunction.apply(
x, a, b, w, self.given_x_indices_of_y_start, self.given_x_indices_of_y
).transpose(0, 1)
else:
w = torch.nn.functional.one_hot(self.weights.argmax(-1), 16).to(x.dtype)
with torch.no_grad():
return LogicLayerCudaFunction.apply(
x, a, b, w, self.given_x_indices_of_y_start, self.given_x_indices_of_y
).transpose(0, 1)
return y

def forward_cuda_eval(self, x: PackBitsTensor):
"""
WARNING: this is an in-place operation.

:param x:
:return:
"""
assert not self.training
assert isinstance(x, PackBitsTensor)
assert x.t.shape[0] == self.in_dim, (x.t.shape, self.in_dim)

a, b = self.indices
w = self.weights.argmax(-1).to(torch.uint8)
x.t = difflogic_cuda.eval(x.t, a, b, w)

return x
raise NotImplementedError("`forward_cuda_eval` is not yet implemented for the JAX version. "
"PackBitsTensor is currently not supported in JAX.")

def extra_repr(self):
return '{}, {}, {}'.format(self.in_dim, self.out_dim, 'train' if self.training else 'eval')
Expand All @@ -172,53 +140,22 @@ def get_connections(self, connections, device='cuda'):
########################################################################################################################


class GroupSum(torch.nn.Module):
class GroupSum:
"""
The GroupSum module.
"""
def __init__(self, k: int, tau: float = 1., device='cuda'):
"""

:param k: number of intended real valued outputs, e.g., number of classes
:param tau: the (softmax) temperature tau. The summed outputs are divided by tau.
:param device:
"""
super().__init__()
self.k = k
self.tau = tau
self.device = device

def forward(self, x):
if isinstance(x, PackBitsTensor):
return x.group_sum(self.k)
raise NotImplementedError("PackBitsTensor is not yet supported in JAX.")

assert x.shape[-1] % self.k == 0, (x.shape, self.k)
return x.reshape(*x.shape[:-1], self.k, x.shape[-1] // self.k).sum(-1) / self.tau

def extra_repr(self):
return 'k={}, tau={}'.format(self.k, self.tau)

return x.reshape(*x.shape[:-1], self.k, x.shape[-1] // self.k).sum(axis=-1) / self.tau

########################################################################################################################


class LogicLayerCudaFunction(torch.autograd.Function):
@staticmethod
def forward(ctx, x, a, b, w, given_x_indices_of_y_start, given_x_indices_of_y):
ctx.save_for_backward(x, a, b, w, given_x_indices_of_y_start, given_x_indices_of_y)
return difflogic_cuda.forward(x, a, b, w)

@staticmethod
def backward(ctx, grad_y):
x, a, b, w, given_x_indices_of_y_start, given_x_indices_of_y = ctx.saved_tensors
grad_y = grad_y.contiguous()
def __repr__(self):
return f'GroupSum(k={self.k}, tau={self.tau})'

grad_w = grad_x = None
if ctx.needs_input_grad[0]:
grad_x = difflogic_cuda.backward_x(x, a, b, w, grad_y, given_x_indices_of_y_start, given_x_indices_of_y)
if ctx.needs_input_grad[3]:
grad_w = difflogic_cuda.backward_w(x, a, b, grad_y)
return grad_x, None, None, grad_w, None, None, None


########################################################################################################################
8 changes: 4 additions & 4 deletions difflogic/functional.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import torch
import jax.numpy as jnp
import numpy as np

BITS_TO_NP_DTYPE = {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64}
Expand Down Expand Up @@ -29,7 +29,7 @@ def bin_op(a, b, i):
assert a[1].shape == b[1].shape, (a[1].shape, b[1].shape)

if i == 0:
return torch.zeros_like(a)
return jnp.zeros_like(a)
elif i == 1:
return a * b
elif i == 2:
Expand Down Expand Up @@ -59,11 +59,11 @@ def bin_op(a, b, i):
elif i == 14:
return 1 - a * b
elif i == 15:
return torch.ones_like(a)
return jnp.ones_like(a)


def bin_op_s(a, b, i_s):
r = torch.zeros_like(a)
r = jnp.zeros_like(a)
for i in range(16):
u = bin_op(a, b, i)
r = r + i_s[..., i] * u
Expand Down
26 changes: 26 additions & 0 deletions difflogic/tests/test_jax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import jax
import jax.numpy as jnp
from jax.nn import softmax, one_hot
from difflogic import LogicLayer, GroupSum
from difflogic.functional import bin_op_s
import numpy as np

def test_logic_layer_forward():
in_dim = 4
out_dim = 2
layer = LogicLayer(in_dim=in_dim, out_dim=out_dim, implementation="python", connections="unique")
x = jnp.array([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]])
layer.weights = np.random.randn(out_dim, 16)
output = layer(x, training=True)
assert output.shape == (2, 2)

def test_group_sum_forward():
k = 2
group_sum = GroupSum(k=k)
x = jnp.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])
output = group_sum.forward(x)
jnp.testing.assert_allclose(output, jnp.array([[4., 6.], [12., 14.]]))


test_logic_layer_forward()
test_group_sum_forward()
Loading