Open
Description
Code to reproduce error:
from diffusers import StableDiffusionImageVariationPipeline
from PIL import Image
device = "cuda:0"
sd_pipe = StableDiffusionImageVariationPipeline.from_pretrained(
"lambdalabs/sd-image-variations-diffusers",
revision="main",
)
sd_pipe = sd_pipe.to(device)
im = Image.open("path/to/image.jpg")
tform = transforms.Compose([
transforms.ToTensor(),
transforms.Resize(
(224, 224),
interpolation=transforms.InterpolationMode.BICUBIC,
antialias=False,
),
transforms.Normalize(
[0.48145466, 0.4578275, 0.40821073],
[0.26862954, 0.26130258, 0.27577711]),
])
inp = tform(im).to(device)
out = sd_pipe(inp, guidance_scale=3)
Error Traceback:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python3.8/dist-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py", line 367, in __call__
image_embeddings = self._encode_image(image, device, num_images_per_prompt, do_classifier_free_guidance)
File "/usr/local/lib/python3.8/dist-packages/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py", line 180, in _encode_image
image_embeddings = self.image_encoder(image).image_embeds
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/transformers/models/clip/modeling_clip.py", line 1299, in forward
vision_outputs = self.vision_model(
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/transformers/models/clip/modeling_clip.py", line 854, in forward
hidden_states = self.embeddings(pixel_values)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/transformers/models/clip/modeling_clip.py", line 195, in forward
embeddings = torch.cat([class_embeds, patch_embeds], dim=1)
RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 3 but got size 1024 for tensor number 1 in the list.
Same image works with v1.0 revision
Metadata
Metadata
Assignees
Labels
No labels