Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions storybook_generation/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Storybook Generation
This workflow generates a video story from a paragraph of text. It uses `StableDiffusionWalker` on pairwise sentences to generate video clips, captions them with each sentence, and stitches them together into a video.

## Examples
Here's a good starter prompt: `Once upon a time, there was a small bird named Poppy. Poppy was a curious bird who loved to explore the world around her. One day, as she was flying over the fields, she noticed a beautiful flower in the distance. Poppy flew closer to the flower and was amazed by its vibrant colors and sweet fragrance. She landed on the flower and started to sip the nectar from its center.`

## Deploying
Follow our [getting started guide](https://www.sievedata.com/dashboard/welcome) to get your Sieve API key and install the Sieve Python client.

1. Export API keys & install Python client
```
export SIEVE_API_KEY={YOUR_API_KEY}
pip install https://mango.sievedata.com/v1/client_package/sievedata-0.0.1.1.2-py3-none-any.whl
```

2. Deploy a workflow to Sieve
```
git clone [email protected]:sieve-community/examples.git
cd examples/yolo_object_tracking
sieve deploy
```
66 changes: 66 additions & 0 deletions storybook_generation/caption_combine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import sieve

@sieve.function(
name="video-captioner-combiner",
gpu = False,
python_packages=[
"moviepy==1.0.3",
"opencv-python==4.6.0.66",
"uuid==1.30",
],
python_version="3.8",
iterator_input=True,
persist_output=True
)
def caption_and_combine(videos, prompt_pairs) -> sieve.Video:
from moviepy.editor import ImageClip, concatenate_videoclips
import cv2
import textwrap
import uuid

# Sort videos by global ID
videos = sorted(videos, key=lambda video: video.video_number)

# Add captions
images = []
for v, prompt in zip(videos, prompt_pairs):
print("Creating video with caption: ", prompt[0])
cap = cv2.VideoCapture(v.path)
while cap.isOpened():
# Capture frames in the video
ret, frame = cap.read()
if not ret:
break

# Add caption with textwrap
font = cv2.FONT_HERSHEY_SIMPLEX
wrapped_text = textwrap.wrap(prompt[0], width=30)
x, y = 10, 40
font_size = 1
font_thickness = 2

for i, line in enumerate(wrapped_text):
textsize = cv2.getTextSize(line, font, font_size, font_thickness)[0]

gap = textsize[1] + 10

y = int((frame.shape[0] + textsize[1]) / 2) + i * gap
x = int((frame.shape[1] - textsize[0]) / 2)

cv2.putText(frame, line, (x, y), font,
font_size,
(255,255,0),
font_thickness,
lineType = cv2.LINE_AA)

# Add the frame to the list of images
images.append(frame)

# Combine the images into a video
print("Combining all frames into video...")
clips = [ImageClip(m).set_duration(0.25) for m in images]
video = concatenate_videoclips(clips)
video_path = f"{uuid.uuid4()}.mp4"
video.write_videofile(video_path, fps=30)
return sieve.Video(path=video_path)

57 changes: 57 additions & 0 deletions storybook_generation/walker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import sieve

@sieve.Model(
name="run_stable_diff_walk",
python_packages=[
"torch==1.13.1",
"stable_diffusion_videos==0.8.1",
"accelerate==0.16.0"
],
system_packages=["libgl1-mesa-glx", "libglib2.0-0", "ffmpeg", "libavcodec58", "libsndfile1", "git-lfs"],
gpu=True,
machine_type="a100",
run_commands=[
"mkdir -p /root/.cache/models/stable-diffusion-v1-4",
"git lfs install",
"git clone https://huggingface.co/CompVis/stable-diffusion-v1-4 /root/.cache/models/stable-diffusion-v1-4",
],
persist_output=True
)
class StableDiffusionVideo:
def __setup__(self):
import torch
from stable_diffusion_videos import StableDiffusionWalkPipeline

# Load stable diffusion model from local cache
self.pipeline = StableDiffusionWalkPipeline.from_pretrained(
"/root/.cache/models/stable-diffusion-v1-4",
torch_dtype=torch.float16,
revision="fp16",
).to("cuda")

# Keep global ID to sort outputs
self.video_number = 0

def __predict__(self, prompt_pair: tuple) -> sieve.Video:
import torch
from stable_diffusion_videos import StableDiffusionWalkPipeline

# Unpack prompt pair
prompt1, prompt2 = prompt_pair[0], prompt_pair[1]

# Generate and store video output
print("Generating video with prompts: " + prompt1 + " | " + prompt2)
video_path = self.pipeline.walk(
[prompt1, prompt2],
[42, 1337],
fps=5,
num_interpolation_steps=15,
height=512,
width=768,
)

# Increment global id
self.video_number += 1

# Return video
yield sieve.Video(path=video_path, video_number=self.video_number)
37 changes: 37 additions & 0 deletions storybook_generation/workflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
'''
Sieve workflow to generate a storybook video from a piece of writing.
'''

import sieve
from walker import StableDiffusionVideo
from caption_combine import caption_and_combine

# Creates a cleaned up list of sentences from a piece of writing
@sieve.function(name="prompt-to-script")
def prompt_to_script(prompt: str) -> list:
script = prompt.split(".")
script = [s.strip() for s in script if s.strip() != ""]
script = [s + "." for s in script]
return script

# Generates pairs of sentences from a list of sentences
@sieve.function(name="create-prompt-pairs")
def create_prompt_pairs(script: list) -> tuple:
for i in range(len(script) - 1):
yield (script[i], script[i + 1])

@sieve.workflow(name="storybook_generation")
def storybook_generation(prompt: str) -> sieve.Video:
# Create a script (list of sentences) and pair them up
print("Generating script and prompt pairs...")
script = prompt_to_script(prompt)
prompt_pairs = create_prompt_pairs(script)

# Generate videos with StableDiffusionWalker
print("Generating videos...")
videos = StableDiffusionVideo()(prompt_pairs)

# Return a captioned and concatenated video
print("Generating storybook...")
combined_video = caption_and_combine(videos, prompt_pairs)
return combined_video