From 5c4e6c63ef4f4f584d6ca927eea0b594f18cf958 Mon Sep 17 00:00:00 2001 From: cerashdan Date: Tue, 17 Jun 2025 16:02:09 +0000 Subject: [PATCH 01/18] switching region to us-east-1 as sdxl model is no longer available in us-west-2 --- src/strands_tools/generate_image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/strands_tools/generate_image.py b/src/strands_tools/generate_image.py index ea8c2a16..4f60b265 100644 --- a/src/strands_tools/generate_image.py +++ b/src/strands_tools/generate_image.py @@ -170,7 +170,7 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult: steps = tool_input.get("steps", 30) # Create a Bedrock Runtime client - client = boto3.client("bedrock-runtime", region_name="us-west-2") + client = boto3.client("bedrock-runtime", region_name="us-east-1") # Format the request payload native_request = { From b95baf088bd1c665306ed235caf700d5dedde51b Mon Sep 17 00:00:00 2001 From: cerashdan Date: Wed, 18 Jun 2025 20:51:13 +0000 Subject: [PATCH 02/18] fix(image-gen): add support for Amazon Nova Canvas Fix image generation functionality by adding support for Amazon Nova Canvas model in addition to existing Stable Diffusion models. This enhancement allows users to generate images using both model types with appropriate parameters. --- src/strands_tools/generate_image.py | 308 +++++++++++++++++++--------- src/strands_tools/mem0_memory.py | 2 +- tests/test_generate_image.py | 22 +- 3 files changed, 225 insertions(+), 107 deletions(-) diff --git a/src/strands_tools/generate_image.py b/src/strands_tools/generate_image.py index 4f60b265..7c4f01a8 100644 --- a/src/strands_tools/generate_image.py +++ b/src/strands_tools/generate_image.py @@ -1,18 +1,21 @@ """ -Image generation tool for Strands Agent using Stable Diffusion. +Image generation tool for Strands Agent using Stable Diffusion and Nova Canvas models. This module provides functionality to generate high-quality images using Amazon Bedrock's -Stable Diffusion models based on text prompts. It handles the entire image generation +image generation models based on text prompts. It handles the entire image generation process including API integration, parameter management, response processing, and local storage of results. Key Features: 1. Image Generation: - • Text-to-image conversion using Stable Diffusion - • Support for multiple model variants (primarily stable-diffusion-xl-v1) - • Customizable generation parameters (seed, steps, cfg_scale) - • Style preset selection for consistent aesthetics + • Text-to-image conversion using multiple model providers + • Support for the following models: + • stability.sd3-5-large-v1:0 + • stability.stable-image-core-v1:1 + • stability.stable-image-ultra-v1:1 + • amazon.nova-canvas-v1:0 + • Customizable generation parameters (seed, aspect_ratio, output_format, negative_prompt) 2. Output Management: • Automatic local saving with intelligent filename generation @@ -36,14 +39,22 @@ # Basic usage with default parameters agent.tool.generate_image(prompt="A steampunk robot playing chess") -# Advanced usage with custom parameters +# Advanced usage with Stable Diffusion agent.tool.generate_image( prompt="A futuristic city with flying cars", - model_id="stability.stable-diffusion-xl-v1", - seed=42, - steps=50, - cfg_scale=12, - style_preset="cinematic" + model_id="stability.sd3-5-large-v1:0", + aspect_ratio="5:4", + output_format="jpeg", + negative_prompt="bad lighting, harsh lighting, abstract, surreal, twisted, multiple levels", +) + +# Using Nova Canvas +agent.tool.generate_image( + prompt="A photograph of a cup of coffee from the side", + model_id="amazon.nova-canvas-v1:0", + width=1024, + height=1024, + quality="premium", ) ``` @@ -60,9 +71,17 @@ import boto3 from strands.types.tools import ToolResult, ToolUse +STABLE_DIFFUSION_MODEL_ID = [ + "stability.sd3-5-large-v1:0", + "stability.stable-image-core-v1:1", + "stability.stable-image-ultra-v1:1", +] +NOVA_CANVAS_MODEL_ID = "amazon.nova-canvas-v1:0" + + TOOL_SPEC = { "name": "generate_image", - "description": "Generates an image using Stable Diffusion based on a given prompt", + "description": "Generates an image using Stable Diffusion or Nova Canvas based on a given prompt", "inputSchema": { "json": { "type": "object", @@ -73,23 +92,48 @@ }, "model_id": { "type": "string", - "description": "Model id for image model, stability.stable-diffusion-xl-v1.", + "description": "Model id for image model, stability.sd3-5-large-v1:0, \ + stability.stable-image-core-v1:1, stability.stable-image-ultra-v1:1, or amazon.nova-canvas-v1:0", }, "seed": { "type": "integer", "description": "Optional: Seed for random number generation (default: random)", }, - "steps": { + "aspect_ratio": { + "type": "string", + "description": "Optional: Controls the aspect ratio of the generated image for \ + Stable Diffusion models. Default 1:1. Enum: 16:9, 1:1, 21:9, 2:3, 3:2, 4:5, 5:4, 9:16, 9:21", + }, + "output_format": { + "type": "string", + "description": "Optional: Specifies the format of the output image for Stable Diffusion models. \ + Supported formats: JPEG, PNG.", + }, + "negative_prompt": { + "type": "string", + "description": "Optional: Keywords of what you do not wish to see in the output image. \ + Max: 10.000 characters.", + }, + "width": { "type": "integer", - "description": "Optional: Number of steps for image generation (default: 30)", + "description": "Optional: Width of the generated image for Nova Canvas model (default: 1024)", + }, + "height": { + "type": "integer", + "description": "Optional: Height of the generated image for Nova Canvas model (default: 1024)", + }, + "quality": { + "type": "string", + "description": "Optional: Quality setting for Nova Canvas model. Options: 'standard' or 'premium' \ + (default: 'standard')", }, "cfg_scale": { "type": "number", - "description": "Optional: CFG scale for image generation (default: 10)", + "description": "Optional: CFG scale for Nova Canvas model (default: 8.0)", }, - "style_preset": { - "type": "string", - "description": "Optional: Style preset for image generation (default: 'photographic')", + "number_of_images": { + "type": "integer", + "description": "Optional: Number of images to generate for Nova Canvas model (default: 1)", }, }, "required": ["prompt"], @@ -98,19 +142,28 @@ } +# Create a filename based on the prompt +def create_filename(prompt: str) -> str: + """Generate a filename from the prompt text.""" + words = re.findall(r"\w+", prompt.lower())[:5] + filename = "_".join(words) + filename = re.sub(r"[^\w\-_\.]", "_", filename) + return filename[:100] # Limit filename length + + def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult: """ - Generate images from text prompts using Stable Diffusion via Amazon Bedrock. + Generate images from text prompts using Stable Diffusion or Nova Canvas via Amazon Bedrock. This function transforms textual descriptions into high-quality images using - Stable Diffusion models available through Amazon Bedrock. It provides extensive + image generation models available through Amazon Bedrock. It provides extensive customization options and handles the complete process from API interaction to image storage and result formatting. How It Works: ------------ 1. Extracts and validates parameters from the tool input - 2. Configures the request payload with appropriate parameters + 2. Configures the request payload with appropriate parameters based on model type 3. Invokes the Bedrock image generation model through AWS SDK 4. Processes the response to extract the base64-encoded image 5. Creates an appropriate filename based on the prompt content @@ -120,11 +173,22 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult: Generation Parameters: -------------------- - prompt: The textual description of the desired image - - model_id: Specific model to use (defaults to stable-diffusion-xl-v1) + - model_id: Specific model to use (defaults to stability.stable-image-core-v1:1) + + For Stable Diffusion models: + - seed: Controls randomness for reproducible results + - aspect_ratio: Controls the aspect ratio of the generated image + - output_format: Specifies the format of the output image (e.g., png or jpeg) + - negative_prompt: Keywords of what you do not wish to see in the output image + + For Nova Canvas model: + - width: Width of the generated image (default: 1024) + - height: Height of the generated image (default: 1024) + - quality: Quality setting ('standard' or 'premium') + - cfg_scale: CFG scale value (default: 8.0) + - number_of_images: Number of images to generate (default: 1) - seed: Controls randomness for reproducible results - - style_preset: Artistic style to apply (e.g., photographic, cinematic) - - cfg_scale: Controls how closely the image follows the prompt - - steps: Number of diffusion steps (higher = more refined but slower) + - negative_prompt: Keywords of what you do not wish to see in the output image Common Usage Scenarios: --------------------- @@ -137,11 +201,8 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult: Args: tool: ToolUse object containing the parameters for image generation. - prompt: The text prompt describing the desired image. - - model_id: Optional model identifier (default: "stability.stable-diffusion-xl-v1"). - - seed: Optional random seed (default: random integer). - - style_preset: Optional style preset name (default: "photographic"). - - cfg_scale: Optional CFG scale value (default: 10). - - steps: Optional number of diffusion steps (default: 30). + - model_id: Optional model identifier. + - Additional parameters specific to the chosen model type. **kwargs: Additional keyword arguments (unused). Returns: @@ -161,74 +222,131 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult: tool_use_id = tool["toolUseId"] tool_input = tool["input"] - # Extract input parameters + # Extract common input parameters prompt = tool_input.get("prompt", "A stylized picture of a cute old steampunk robot.") - model_id = tool_input.get("model_id", "stability.stable-diffusion-xl-v1") + model_id = tool_input.get("model_id", "stability.stable-image-core-v1:1") + region = ( + tool_input.get("region", "us-east-1") + if model_id not in STABLE_DIFFUSION_MODEL_ID + else tool_input.get("region", "us-west-2") + ) seed = tool_input.get("seed", random.randint(0, 4294967295)) - style_preset = tool_input.get("style_preset", "photographic") - cfg_scale = tool_input.get("cfg_scale", 10) - steps = tool_input.get("steps", 30) + negative_prompt = tool_input.get("negative_prompt", "bad lighting, harsh lighting") # Create a Bedrock Runtime client - client = boto3.client("bedrock-runtime", region_name="us-east-1") - - # Format the request payload - native_request = { - "text_prompts": [{"text": prompt}], - "style_preset": style_preset, - "seed": seed, - "cfg_scale": cfg_scale, - "steps": steps, - } - request = json.dumps(native_request) - - # Invoke the model - response = client.invoke_model(modelId=model_id, body=request) - - # Decode the response body - model_response = json.loads(response["body"].read()) - - # Extract the image data - base64_image_data = model_response["artifacts"][0]["base64"] - - # Create a filename based on the prompt - def create_filename(prompt: str) -> str: - """Generate a filename from the prompt text.""" - words = re.findall(r"\w+", prompt.lower())[:5] - filename = "_".join(words) - filename = re.sub(r"[^\w\-_\.]", "_", filename) - return filename[:100] # Limit filename length - - filename = create_filename(prompt) - - # Save the generated image to a local folder - output_dir = "output" - if not os.path.exists(output_dir): - os.makedirs(output_dir) - - i = 1 - base_image_path = os.path.join(output_dir, f"{filename}.png") - image_path = base_image_path - while os.path.exists(image_path): - image_path = os.path.join(output_dir, f"{filename}_{i}.png") - i += 1 - - with open(image_path, "wb") as file: - file.write(base64.b64decode(base64_image_data)) - - return { - "toolUseId": tool_use_id, - "status": "success", - "content": [ - {"text": f"The generated image has been saved locally to {image_path}. "}, - { - "image": { - "format": "png", - "source": {"bytes": base64.b64decode(base64_image_data)}, - } + client = boto3.client("bedrock-runtime", region_name=region) + + # Initialize variables for later use + base64_image_data = None + output_format = "jpeg" # Default format + + # Format the request payload based on model type + if ( + model_id == "stability.sd3-5-large-v1:0" + or model_id == "stability.stable-image-core-v1:1" + or model_id == "stability.stable-image-ultra-v1:1" + ): + # Stable Diffusion specific parameters + aspect_ratio = tool_input.get("aspect_ratio", "1:1") + output_format = tool_input.get("output_format", "jpeg") + + native_request = { + "prompt": prompt, + "aspect_ratio": aspect_ratio, + "seed": seed, + "output_format": output_format, + "negative_prompt": negative_prompt, + } + request = json.dumps(native_request) + + # Invoke the model + response = client.invoke_model(modelId=model_id, body=request) + + # Decode the response body + model_response = json.loads(response["body"].read().decode("utf-8")) + + # Extract the image data + base64_image_data = model_response["images"][0] + + elif model_id == "amazon.nova-canvas-v1:0": + # Nova Canvas specific parameters + width = tool_input.get("width", 1024) + height = tool_input.get("height", 1024) + quality = tool_input.get("quality", "standard") + cfg_scale = tool_input.get("cfg_scale", 8.0) + number_of_images = tool_input.get("number_of_images", 1) + + # Format the Nova Canvas request + nova_request = { + "taskType": "TEXT_IMAGE", + "textToImageParams": {"text": prompt, "negativeText": negative_prompt}, + "imageGenerationConfig": { + "width": width, + "height": height, + "quality": quality, + "cfgScale": cfg_scale, + "seed": seed, + "numberOfImages": number_of_images, }, - ], - } + } + request = json.dumps(nova_request) + + # Invoke the model + response = client.invoke_model(modelId=model_id, body=request) + + # Decode the response body + model_response = json.loads(response["body"].read().decode("utf-8")) + + # Extract the image data + base64_image_data = model_response["images"][0] + + else: + return { + "toolUseId": tool_use_id, + "status": "error", + "content": [ + { + "text": "Supported models for this tool are: \n \ + 1.stability.sd3-5-large-v1:0 \n \ + 2. stability.stable-image-core-v1:1 \n \ + 3. stability.stable-image-ultra-v1:1 \n \ + 4. amazon.nova-canvas-v1:0" + } + ], + } + + # If we have image data, process and save it + if base64_image_data: + filename = create_filename(prompt) + + # Save the generated image to a local folder + output_dir = "output" + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + i = 1 + base_image_path = os.path.join(output_dir, f"{filename}.png") + image_path = base_image_path + while os.path.exists(image_path): + image_path = os.path.join(output_dir, f"{filename}_{i}.png") + i += 1 + + with open(image_path, "wb") as file: + file.write(base64.b64decode(base64_image_data)) + + return { + "toolUseId": tool_use_id, + "status": "success", + "content": [ + {"text": f"The generated image has been saved locally to {image_path}. "}, + { + "image": { + "format": output_format, + "source": {"bytes": base64.b64decode(base64_image_data)}, + } + }, + ], + } except Exception as e: return { diff --git a/src/strands_tools/mem0_memory.py b/src/strands_tools/mem0_memory.py index d9849814..5840deaa 100644 --- a/src/strands_tools/mem0_memory.py +++ b/src/strands_tools/mem0_memory.py @@ -140,7 +140,7 @@ "description": "Optional metadata to store with the memory", }, }, - "required": ["action"] + "required": ["action"], } }, } diff --git a/tests/test_generate_image.py b/tests/test_generate_image.py index dc2132f6..caf93bb9 100644 --- a/tests/test_generate_image.py +++ b/tests/test_generate_image.py @@ -31,7 +31,7 @@ def mock_boto3_client(): # Set up mock response mock_body = MagicMock() mock_body.read.return_value = json.dumps( - {"artifacts": [{"base64": base64.b64encode(b"mock_image_data").decode("utf-8")}]} + {"images": [base64.b64encode(b"mock_image_data").decode("utf-8")]} ).encode("utf-8") mock_client_instance = MagicMock() @@ -76,9 +76,9 @@ def test_generate_image_direct(mock_boto3_client, mock_os_path_exists, mock_os_m "input": { "prompt": "A cute robot", "seed": 123, - "steps": 30, - "cfg_scale": 10, - "style_preset": "photographic", + "aspect_ratio": "5:4", + "output_format": "png", + "negative_prompt": "blurry, low resolution, pixelated, grainy, unrealistic", }, } @@ -94,11 +94,11 @@ def test_generate_image_direct(mock_boto3_client, mock_os_path_exists, mock_os_m args, kwargs = mock_client_instance.invoke_model.call_args request_body = json.loads(kwargs["body"]) - assert request_body["text_prompts"][0]["text"] == "A cute robot" + assert request_body["prompt"] == "A cute robot" assert request_body["seed"] == 123 - assert request_body["steps"] == 30 - assert request_body["cfg_scale"] == 10 - assert request_body["style_preset"] == "photographic" + assert request_body["aspect_ratio"] == "5:4" + assert request_body["output_format"] == "png" + assert request_body["negative_prompt"] == "blurry, low resolution, pixelated, grainy, unrealistic" # Verify directory creation mock_os_makedirs.assert_called_once() @@ -128,9 +128,9 @@ def test_generate_image_default_params(mock_boto3_client, mock_os_path_exists, m request_body = json.loads(kwargs["body"]) assert request_body["seed"] == 42 # From our mocked random.randint - assert request_body["steps"] == 30 - assert request_body["cfg_scale"] == 10 - assert request_body["style_preset"] == "photographic" + assert request_body["aspect_ratio"] == "1:1" + assert request_body["output_format"] == "jpeg" + assert request_body["negative_prompt"] == "bad lighting, harsh lighting" assert result["status"] == "success" From c85dc3c668487b6a286016349e2ce12fd67c8f48 Mon Sep 17 00:00:00 2001 From: cerashdan Date: Sun, 22 Jun 2025 16:46:25 +0000 Subject: [PATCH 03/18] fix(gen_image): region selection and number of images parmaters --- src/strands_tools/generate_image.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/strands_tools/generate_image.py b/src/strands_tools/generate_image.py index 7c4f01a8..239600e9 100644 --- a/src/strands_tools/generate_image.py +++ b/src/strands_tools/generate_image.py @@ -186,7 +186,7 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult: - height: Height of the generated image (default: 1024) - quality: Quality setting ('standard' or 'premium') - cfg_scale: CFG scale value (default: 8.0) - - number_of_images: Number of images to generate (default: 1) + - number_of_images: Number of images to generate (always: 1) - seed: Controls randomness for reproducible results - negative_prompt: Keywords of what you do not wish to see in the output image @@ -225,11 +225,7 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult: # Extract common input parameters prompt = tool_input.get("prompt", "A stylized picture of a cute old steampunk robot.") model_id = tool_input.get("model_id", "stability.stable-image-core-v1:1") - region = ( - tool_input.get("region", "us-east-1") - if model_id not in STABLE_DIFFUSION_MODEL_ID - else tool_input.get("region", "us-west-2") - ) + region = tool_input.get("region", "us-west-2") seed = tool_input.get("seed", random.randint(0, 4294967295)) negative_prompt = tool_input.get("negative_prompt", "bad lighting, harsh lighting") @@ -274,7 +270,6 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult: height = tool_input.get("height", 1024) quality = tool_input.get("quality", "standard") cfg_scale = tool_input.get("cfg_scale", 8.0) - number_of_images = tool_input.get("number_of_images", 1) # Format the Nova Canvas request nova_request = { @@ -286,7 +281,7 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult: "quality": quality, "cfgScale": cfg_scale, "seed": seed, - "numberOfImages": number_of_images, + "numberOfImages": 1, }, } request = json.dumps(nova_request) From 65a7bf548043061865e245e54196a376a707e9b4 Mon Sep 17 00:00:00 2001 From: cerashdan Date: Sun, 22 Jun 2025 17:19:09 +0000 Subject: [PATCH 04/18] fix: updating toolspec to include region id and remove number of images parameter (defaulting the tool to generate one image always --- src/strands_tools/generate_image.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/strands_tools/generate_image.py b/src/strands_tools/generate_image.py index 239600e9..3a6c933d 100644 --- a/src/strands_tools/generate_image.py +++ b/src/strands_tools/generate_image.py @@ -95,6 +95,10 @@ "description": "Model id for image model, stability.sd3-5-large-v1:0, \ stability.stable-image-core-v1:1, stability.stable-image-ultra-v1:1, or amazon.nova-canvas-v1:0", }, + "region": { + "type": "string", + "description": "AWS region for the image generation model (default: us-west-2)", + }, "seed": { "type": "integer", "description": "Optional: Seed for random number generation (default: random)", @@ -131,10 +135,6 @@ "type": "number", "description": "Optional: CFG scale for Nova Canvas model (default: 8.0)", }, - "number_of_images": { - "type": "integer", - "description": "Optional: Number of images to generate for Nova Canvas model (default: 1)", - }, }, "required": ["prompt"], } From f7a363bba29a63ea832a19e1c0a15540e25c8cf8 Mon Sep 17 00:00:00 2001 From: cerashdan Date: Tue, 24 Jun 2025 11:58:18 +0000 Subject: [PATCH 05/18] fix(generate_image): update the tool to initial state where it only support stable diffusion models --- src/strands_tools/generate_image.py | 93 +++++------------------------ 1 file changed, 16 insertions(+), 77 deletions(-) diff --git a/src/strands_tools/generate_image.py b/src/strands_tools/generate_image.py index 3a6c933d..99e26e2e 100644 --- a/src/strands_tools/generate_image.py +++ b/src/strands_tools/generate_image.py @@ -1,20 +1,19 @@ """ -Image generation tool for Strands Agent using Stable Diffusion and Nova Canvas models. +Image generation tool for Strands Agent using Stable Diffusion. This module provides functionality to generate high-quality images using Amazon Bedrock's -image generation models based on text prompts. It handles the entire image generation +Stable Diffusion models based on text prompts. It handles the entire image generation process including API integration, parameter management, response processing, and local storage of results. Key Features: 1. Image Generation: - • Text-to-image conversion using multiple model providers + • Text-to-image conversion using Stable Diffusion models • Support for the following models: • stability.sd3-5-large-v1:0 • stability.stable-image-core-v1:1 • stability.stable-image-ultra-v1:1 - • amazon.nova-canvas-v1:0 • Customizable generation parameters (seed, aspect_ratio, output_format, negative_prompt) 2. Output Management: @@ -48,13 +47,13 @@ negative_prompt="bad lighting, harsh lighting, abstract, surreal, twisted, multiple levels", ) -# Using Nova Canvas +# Using another Stable Diffusion model agent.tool.generate_image( prompt="A photograph of a cup of coffee from the side", - model_id="amazon.nova-canvas-v1:0", - width=1024, - height=1024, - quality="premium", + model_id="stability.stable-image-ultra-v1:1", + aspect_ratio="1:1", + output_format="png", + negative_prompt="blurry, distorted", ) ``` @@ -76,12 +75,11 @@ "stability.stable-image-core-v1:1", "stability.stable-image-ultra-v1:1", ] -NOVA_CANVAS_MODEL_ID = "amazon.nova-canvas-v1:0" TOOL_SPEC = { "name": "generate_image", - "description": "Generates an image using Stable Diffusion or Nova Canvas based on a given prompt", + "description": "Generates an image using Stable Diffusion models based on a given prompt", "inputSchema": { "json": { "type": "object", @@ -93,7 +91,7 @@ "model_id": { "type": "string", "description": "Model id for image model, stability.sd3-5-large-v1:0, \ - stability.stable-image-core-v1:1, stability.stable-image-ultra-v1:1, or amazon.nova-canvas-v1:0", + stability.stable-image-core-v1:1, or stability.stable-image-ultra-v1:1", }, "region": { "type": "string", @@ -118,23 +116,6 @@ "description": "Optional: Keywords of what you do not wish to see in the output image. \ Max: 10.000 characters.", }, - "width": { - "type": "integer", - "description": "Optional: Width of the generated image for Nova Canvas model (default: 1024)", - }, - "height": { - "type": "integer", - "description": "Optional: Height of the generated image for Nova Canvas model (default: 1024)", - }, - "quality": { - "type": "string", - "description": "Optional: Quality setting for Nova Canvas model. Options: 'standard' or 'premium' \ - (default: 'standard')", - }, - "cfg_scale": { - "type": "number", - "description": "Optional: CFG scale for Nova Canvas model (default: 8.0)", - }, }, "required": ["prompt"], } @@ -153,7 +134,7 @@ def create_filename(prompt: str) -> str: def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult: """ - Generate images from text prompts using Stable Diffusion or Nova Canvas via Amazon Bedrock. + Generate images from text prompts using Stable Diffusion models via Amazon Bedrock. This function transforms textual descriptions into high-quality images using image generation models available through Amazon Bedrock. It provides extensive @@ -181,14 +162,7 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult: - output_format: Specifies the format of the output image (e.g., png or jpeg) - negative_prompt: Keywords of what you do not wish to see in the output image - For Nova Canvas model: - - width: Width of the generated image (default: 1024) - - height: Height of the generated image (default: 1024) - - quality: Quality setting ('standard' or 'premium') - - cfg_scale: CFG scale value (default: 8.0) - - number_of_images: Number of images to generate (always: 1) - - seed: Controls randomness for reproducible results - - negative_prompt: Keywords of what you do not wish to see in the output image + Common Usage Scenarios: --------------------- @@ -237,11 +211,8 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult: output_format = "jpeg" # Default format # Format the request payload based on model type - if ( - model_id == "stability.sd3-5-large-v1:0" - or model_id == "stability.stable-image-core-v1:1" - or model_id == "stability.stable-image-ultra-v1:1" - ): + # Validate model ID is a supported Stable Diffusion model + if model_id in STABLE_DIFFUSION_MODEL_ID: # Stable Diffusion specific parameters aspect_ratio = tool_input.get("aspect_ratio", "1:1") output_format = tool_input.get("output_format", "jpeg") @@ -264,37 +235,6 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult: # Extract the image data base64_image_data = model_response["images"][0] - elif model_id == "amazon.nova-canvas-v1:0": - # Nova Canvas specific parameters - width = tool_input.get("width", 1024) - height = tool_input.get("height", 1024) - quality = tool_input.get("quality", "standard") - cfg_scale = tool_input.get("cfg_scale", 8.0) - - # Format the Nova Canvas request - nova_request = { - "taskType": "TEXT_IMAGE", - "textToImageParams": {"text": prompt, "negativeText": negative_prompt}, - "imageGenerationConfig": { - "width": width, - "height": height, - "quality": quality, - "cfgScale": cfg_scale, - "seed": seed, - "numberOfImages": 1, - }, - } - request = json.dumps(nova_request) - - # Invoke the model - response = client.invoke_model(modelId=model_id, body=request) - - # Decode the response body - model_response = json.loads(response["body"].read().decode("utf-8")) - - # Extract the image data - base64_image_data = model_response["images"][0] - else: return { "toolUseId": tool_use_id, @@ -302,10 +242,9 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult: "content": [ { "text": "Supported models for this tool are: \n \ - 1.stability.sd3-5-large-v1:0 \n \ + 1. stability.sd3-5-large-v1:0 \n \ 2. stability.stable-image-core-v1:1 \n \ - 3. stability.stable-image-ultra-v1:1 \n \ - 4. amazon.nova-canvas-v1:0" + 3. stability.stable-image-ultra-v1:1" } ], } From d908779636c08912919ffe7845becf36c2b99b29 Mon Sep 17 00:00:00 2001 From: Abdullatif <54531553+cerashdan@users.noreply.github.com> Date: Wed, 25 Jun 2025 16:56:49 +0300 Subject: [PATCH 06/18] Update src/strands_tools/generate_image.py Co-authored-by: Mackenzie Zastrow <3211021+zastrowm@users.noreply.github.com> --- src/strands_tools/generate_image.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/strands_tools/generate_image.py b/src/strands_tools/generate_image.py index 99e26e2e..8f94b761 100644 --- a/src/strands_tools/generate_image.py +++ b/src/strands_tools/generate_image.py @@ -155,7 +155,6 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult: -------------------- - prompt: The textual description of the desired image - model_id: Specific model to use (defaults to stability.stable-image-core-v1:1) - For Stable Diffusion models: - seed: Controls randomness for reproducible results - aspect_ratio: Controls the aspect ratio of the generated image From 40c79bf8fcba34c3f27f1c16732271a53293a49c Mon Sep 17 00:00:00 2001 From: cerashdan Date: Wed, 25 Jun 2025 14:27:14 +0000 Subject: [PATCH 07/18] fix(generate_image): removing conditional for model specific --- src/strands_tools/generate_image.py | 70 +++++++++++++---------------- 1 file changed, 30 insertions(+), 40 deletions(-) diff --git a/src/strands_tools/generate_image.py b/src/strands_tools/generate_image.py index 8f94b761..9eef69b9 100644 --- a/src/strands_tools/generate_image.py +++ b/src/strands_tools/generate_image.py @@ -114,7 +114,8 @@ "negative_prompt": { "type": "string", "description": "Optional: Keywords of what you do not wish to see in the output image. \ - Max: 10.000 characters.", + Default: bad lighting, harsh lighting. \ + Max: 10.000 characters.", }, }, "required": ["prompt"], @@ -195,7 +196,9 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult: tool_use_id = tool["toolUseId"] tool_input = tool["input"] - # Extract common input parameters + # Extract common and Stable Diffusion input parameters + aspect_ratio = tool_input.get("aspect_ratio", "1:1") + output_format = tool_input.get("output_format", "jpeg") prompt = tool_input.get("prompt", "A stylized picture of a cute old steampunk robot.") model_id = tool_input.get("model_id", "stability.stable-image-core-v1:1") region = tool_input.get("region", "us-west-2") @@ -207,46 +210,25 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult: # Initialize variables for later use base64_image_data = None - output_format = "jpeg" # Default format - - # Format the request payload based on model type - # Validate model ID is a supported Stable Diffusion model - if model_id in STABLE_DIFFUSION_MODEL_ID: - # Stable Diffusion specific parameters - aspect_ratio = tool_input.get("aspect_ratio", "1:1") - output_format = tool_input.get("output_format", "jpeg") - - native_request = { - "prompt": prompt, - "aspect_ratio": aspect_ratio, - "seed": seed, - "output_format": output_format, - "negative_prompt": negative_prompt, - } - request = json.dumps(native_request) - # Invoke the model - response = client.invoke_model(modelId=model_id, body=request) + # create the request body + native_request = { + "prompt": prompt, + "aspect_ratio": aspect_ratio, + "seed": seed, + "output_format": output_format, + "negative_prompt": negative_prompt, + } + request = json.dumps(native_request) - # Decode the response body - model_response = json.loads(response["body"].read().decode("utf-8")) + # Invoke the model + response = client.invoke_model(modelId=model_id, body=request) - # Extract the image data - base64_image_data = model_response["images"][0] + # Decode the response body + model_response = json.loads(response["body"].read().decode("utf-8")) - else: - return { - "toolUseId": tool_use_id, - "status": "error", - "content": [ - { - "text": "Supported models for this tool are: \n \ - 1. stability.sd3-5-large-v1:0 \n \ - 2. stability.stable-image-core-v1:1 \n \ - 3. stability.stable-image-ultra-v1:1" - } - ], - } + # Extract the image data + base64_image_data = model_response["images"][0] # If we have image data, process and save it if base64_image_data: @@ -280,10 +262,18 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult: }, ], } - + else: + raise Exception("No image data found in the response.") except Exception as e: return { "toolUseId": tool_use_id, "status": "error", - "content": [{"text": f"Error generating image: {str(e)}"}], + "content": [ + { + "text": f"Error generating image: {str(e)} \n Try other supported models for this tool are: \n \ + 1. stability.sd3-5-large-v1:0 \n \ + 2. stability.stable-image-core-v1:1 \n \ + 3. stability.stable-image-ultra-v1:1" + } + ], } From aea61b7680f5a10ec5eac75392c014813a503ccc Mon Sep 17 00:00:00 2001 From: Abdullatif <54531553+cerashdan@users.noreply.github.com> Date: Thu, 26 Jun 2025 01:18:35 +0300 Subject: [PATCH 08/18] Update src/strands_tools/generate_image.py Co-authored-by: Mackenzie Zastrow <3211021+zastrowm@users.noreply.github.com> --- src/strands_tools/generate_image.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/strands_tools/generate_image.py b/src/strands_tools/generate_image.py index 9eef69b9..0fd30f35 100644 --- a/src/strands_tools/generate_image.py +++ b/src/strands_tools/generate_image.py @@ -156,7 +156,6 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult: -------------------- - prompt: The textual description of the desired image - model_id: Specific model to use (defaults to stability.stable-image-core-v1:1) - For Stable Diffusion models: - seed: Controls randomness for reproducible results - aspect_ratio: Controls the aspect ratio of the generated image - output_format: Specifies the format of the output image (e.g., png or jpeg) From 6d568071e6622e31425160719cfe368ecf508600 Mon Sep 17 00:00:00 2001 From: cerashdan Date: Tue, 15 Jul 2025 10:21:50 +0000 Subject: [PATCH 09/18] adding nova canvas tool --- src/strands_tools/nova_canvas.py | 617 +++++++++++++++++++++++++++++++ 1 file changed, 617 insertions(+) create mode 100644 src/strands_tools/nova_canvas.py diff --git a/src/strands_tools/nova_canvas.py b/src/strands_tools/nova_canvas.py new file mode 100644 index 00000000..3a8bb5cd --- /dev/null +++ b/src/strands_tools/nova_canvas.py @@ -0,0 +1,617 @@ +""" +Image generation tool for Strands Agent using Nova Canvas on Amazon Bedrock. + +This module provides functionality to generate high-quality images using Amazon Bedrock's +Nova Canvas model based on text prompts. It handles the entire image generation +process including API integration, parameter management, response processing, and +local storage of results. + +Key Features: + +1. Image Generation: + • Text-to-image generation using Amazon Nova Canvas + • Customizable generation parameters (height, width, quality, cfg_scale, + seed, style, negative_prompt) + • Support for pre-defined visual styles: + "3D_ANIMATED_FAMILY_FILM" - A style that alludes to 3D animated + films. Featuring realistic rendering and characters with cartoonish + or exaggerated physical features. + + "DESIGN_SKETCH" - A style featuring hand-drawn line-art without a + lot of wash or fill that is not too refined. This style is used to + convey concepts and ideas. + + "FLAT_VECTOR_ILLUSTRATION" - A flat-color illustration style that + is popular in business communications. + + "GRAPHIC_NOVEL_ILLUSTRATION" - A vivid ink illustration style. + Characters do not have exaggerated features, as with some other more + cartoon-ish styles. + + "MAXIMALISM" - Bright, elaborate, bold, and complex with strong + shapes, and rich details. + + "MIDCENTURY_RETRO" - Alludes to graphic design trends from the + 1940s through 1960s. + + "PHOTOREALISM" - Realistic photography style, including different + repertoires such as stock photography, editorial photography, + journalistic photography, and more. + + "SOFT_DIGITAL_PAINTING" - This style has more finish and refinement + than a sketch. It includes shading, three dimensionality, and texture + that might be lacking in other styles. + +2. Virtual try-on: + • Virtual try-on is an image-guided use case of inpainting in which the + contents of a reference image are superimposed into a source image based + on the guidance of a mask image. + • Use case examples for Virtual try-on are: + 1. Adding a logo or text to an image + 2. Use a human and garment image to generate an image with that same + person wearing it + 3. Place a couch in a living room + +3. Background removal: + • Automatically remove the background of any image, replacing the + background with transparent pixels. + • Useful when you want to later composite the image with other elements + in an image editing app, presentation, or website. + +4. Output Management: + • Automatic local saving with intelligent filename generation + • Base64 encoding/decoding for transmission + • Duplicate filename detection and resolution + • Organized output directory structure + +5. Response Format: + • Rich response with both text and image data + • Status tracking and error handling + • Direct base64 image data for immediate display + • File path reference for local access + +Usage with Strands Agent: +```python +from strands import Agent +from strands_tools import nova_canvas +#TODO +1. add one example for standard generation +2. another for generation with extra parameters +3. virutal try on garmen example +agent = Agent(tools=[generate_image]) + +# Basic usage with default parameters +agent.tool.generate_image(prompt="A steampunk robot playing chess") + +# Advanced usage with Stable Diffusion +agent.tool.generate_image( + prompt="A futuristic city with flying cars", + model_id="stability.sd3-5-large-v1:0", + aspect_ratio="5:4", + output_format="jpeg", + negative_prompt="bad lighting, harsh lighting, abstract, surreal, twisted, multiple levels", +) + +# Using another Stable Diffusion model +agent.tool.generate_image( + prompt="A photograph of a cup of coffee from the side", + model_id="stability.stable-image-ultra-v1:1", + aspect_ratio="1:1", + output_format="png", + negative_prompt="blurry, distorted", +) +``` + +See the generate_image function docstring for more details on parameters and options. +""" + +import base64 +import json +import os +import random +import re +import time +from typing import Any + +import boto3 +from strands.types.tools import ToolResult, ToolUse + + +TOOL_SPEC = { + "name": "nova_canvas", + "description": "Use Amazon Nova Canvas for image generation, virtual try-on, and background removal tasks", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "task_type": { + "type": "string", + "description": "The task type for Amazon Nova Canvas", + "enum": ["TEXT_IMAGE", "VIRTUAL_TRY_ON", "BACKGROUND_REMOVAL"], + "default": "TEXT_IMAGE" + }, + # TEXT_IMAGE parameters + "text": { + "type": "string", + "description": "Text prompt for image generation (required for TEXT_IMAGE)" + }, + "negative_text": { + "type": "string", + "description": "Negative text prompt (TEXT_IMAGE only)" + }, + "style": { + "type": "string", + "description": "Style for image generation (TEXT_IMAGE only)", + "enum": ["3D_ANIMATED_FAMILY_FILM", "DESIGN_SKETCH", "FLAT_VECTOR_ILLUSTRATION", "GRAPHIC_NOVEL_ILLUSTRATION", "MAXIMALISM", "MIDCENTURY_RETRO", "PHOTOREALISM", "SOFT_DIGITAL_PAINTING"] + }, + "width": { + "type": "integer", + "description": "Image width in pixels (TEXT_IMAGE only)" + }, + "height": { + "type": "integer", + "description": "Image height in pixels (TEXT_IMAGE only)" + }, + # VIRTUAL_TRY_ON parameters + "image_path": { + "type": "string", + "description": "Path to source image file to modify (required for VIRTUAL_TRY_ON and BACKGROUND_REMOVAL)" + }, + "reference_image_path": { + "type": "string", + "description": "Path to reference image file containing the object to superimpose (required for VIRTUAL_TRY_ON)" + }, + "mask_type": { + "type": "string", + "description": "Specifies whether the mask is provided as an image, prompt, or garment mask (required for VIRTUAL_TRY_ON)", + "enum": ["IMAGE", "GARMENT", "PROMPT"] + }, + "mask_image_path": { + "type": "string", + "description": "Path to mask image file defining areas to edit (black) and ignore (white). Required when mask_type is IMAGE" + }, + "mask_shape": { + "type": "string", + "description": "Defines the shape of the mask bounding box, affecting how reference image is transferred", + "enum": ["CONTOUR", "BOUNDING_BOX", "DEFAULT"] + }, + "garment_class": { + "type": "string", + "description": "Defines the article of clothing being transferred. Required when mask_type is GARMENT", + "enum": ["UPPER_BODY", "LOWER_BODY", "FULL_BODY", "FOOTWEAR", "LONG_SLEEVE_SHIRT", "SHORT_SLEEVE_SHIRT", "NO_SLEEVE_SHIRT", "OTHER_UPPER_BODY", "LONG_PANTS", "SHORT_PANTS", "OTHER_LOWER_BODY", "LONG_DRESS", "SHORT_DRESS", "FULL_BODY_OUTFIT", "OTHER_FULL_BODY", "SHOES", "BOOTS", "OTHER_FOOTWEAR"] + }, + "long_sleeve_style": { + "type": "string", + "description": "Styling for long sleeve garments (applies only to upper body garments)", + "enum": ["SLEEVE_DOWN", "SLEEVE_UP"] + }, + "tucking_style": { + "type": "string", + "description": "Tucking style option (applies only to upper body garments)", + "enum": ["UNTUCKED", "TUCKED"] + }, + "outer_layer_style": { + "type": "string", + "description": "Styling for outer layer garments (applies only to outer layer, upper body garments)", + "enum": ["CLOSED", "OPEN"] + }, + "mask_prompt": { + "type": "string", + "description": "Natural language text prompt describing regions to edit. Required when mask_type is PROMPT" + }, + "preserve_body_pose": { + "type": "string", + "description": "Whether to preserve the body pose in the output image when a person is detected", + "enum": ["ON", "OFF", "DEFAULT"] + }, + "preserve_hands": { + "type": "string", + "description": "Whether to preserve hands in the output image when a person is detected", + "enum": ["ON", "OFF", "DEFAULT"] + }, + "preserve_face": { + "type": "string", + "description": "Whether to preserve the face in the output image when a person is detected", + "enum": ["OFF", "ON", "DEFAULT"] + }, + "merge_style": { + "type": "string", + "description": "Determines how source and reference images are stitched together", + "enum": ["BALANCED", "SEAMLESS", "DETAILED"], + "default": "BALANCED" + }, + "return_mask": { + "type": "boolean", + "description": "Whether to return the mask image with the output image", + "default": false + }, + # BACKGROUND_REMOVAL parameters + # (uses image_path parameter defined above) + + # Common parameters + "quality": { + "type": "string", + "description": "Image quality", + "enum": ["standard", "premium"], + "default": "standard" + }, + "cfg_scale": { + "type": "number", + "description": "How strictly to adhere to the prompt. Range: 1.1-10", + "minimum": 1.1, + "maximum": 10, + "default": 6.5 + }, + "seed": { + "type": "integer", + "description": "Seed for reproducible results" + }, + "model_id": { + "type": "string", + "description": "Model ID", + "default": "amazon.nova-canvas-v1:0" + }, + "region": { + "type": "string", + "description": "AWS region", + "default": "us-east-1" + } + }, + "required": [] + } + }, +} + + +def create_filename(prompt: str) -> str: + """Generate a filename from the prompt text.""" + words = re.findall(r"\w+", prompt.lower())[:5] + filename = "_".join(words) + filename = re.sub(r"[^\w\-_\.]", "_", filename) + return filename[:100] # Limit filename length + + +def nova_canvas(tool: ToolUse, **kwargs: Any) -> ToolResult: + """ + Use Amazon Nova Canvas for image generation, virtual try-on, and background removal. + + This function supports three main task types: + + 1. TEXT_IMAGE - Generate images from text prompts with optional style parameters + 2. VIRTUAL_TRY_ON - Superimpose objects from a reference image onto a source image + 3. BACKGROUND_REMOVAL - Remove the background from an image + """ + try: + tool_use_id = tool["toolUseId"] + tool_input = tool["input"] + + task_type = tool_input.get("task_type", "TEXT_IMAGE") + model_id = tool_input.get("model_id", "amazon.nova-canvas-v1:0") + region = tool_input.get("region", "us-east-1") + + client = boto3.client("bedrock-runtime", region_name=region) + + # Build request based on task type + if task_type == "TEXT_IMAGE": + request_body = { + "taskType": "TEXT_IMAGE", + "textToImageParams": { + "text": tool_input.get("text", "A beautiful landscape") + }, + "imageGenerationConfig": { + "quality": tool_input.get("quality", "standard") + } + } + + # Add optional TEXT_IMAGE parameters + if "negative_text" in tool_input: + request_body["textToImageParams"]["negativeText"] = tool_input["negative_text"] + if "style" in tool_input: + request_body["textToImageParams"]["style"] = tool_input["style"] + if "width" in tool_input: + request_body["imageGenerationConfig"]["width"] = tool_input["width"] + if "height" in tool_input: + request_body["imageGenerationConfig"]["height"] = tool_input["height"] + if "cfg_scale" in tool_input: + request_body["imageGenerationConfig"]["cfgScale"] = tool_input["cfg_scale"] + if "seed" in tool_input: + request_body["imageGenerationConfig"]["seed"] = tool_input["seed"] + + elif task_type == "VIRTUAL_TRY_ON": + # Validate required parameters + if "source_image" not in tool_input: + raise ValueError("source_image is required for VIRTUAL_TRY_ON") + if "reference_image" not in tool_input: + raise ValueError("reference_image is required for VIRTUAL_TRY_ON") + if "mask_type" not in tool_input: + raise ValueError("mask_type is required for VIRTUAL_TRY_ON") + + # Initialize request structure + request_body = { + "taskType": "VIRTUAL_TRY_ON", + "virtualTryOnParams": { + "sourceImage": tool_input["source_image"], + "referenceImage": tool_input["reference_image"], + "maskType": tool_input["mask_type"] + }, + "imageGenerationConfig": { + "quality": tool_input.get("quality", "standard") + } + } + + # Handle mask type specific parameters + mask_type = tool_input["mask_type"] + + if mask_type == "IMAGE": + if "mask_image" not in tool_input: + raise ValueError("mask_image is required when mask_type is IMAGE") + request_body["virtualTryOnParams"]["imageBasedMask"] = { + "maskImage": tool_input["mask_image"] + } + + elif mask_type == "GARMENT": + if "garment_class" not in tool_input: + raise ValueError("garment_class is required when mask_type is GARMENT") + + garment_mask = { + "garmentClass": tool_input["garment_class"] + } + + if "mask_shape" in tool_input: + garment_mask["maskShape"] = tool_input["mask_shape"] + + # Add garment styling if any styling options are provided + styling_params = ["long_sleeve_style", "tucking_style", "outer_layer_style"] + if any(param in tool_input for param in styling_params): + garment_mask["garmentStyling"] = {} + + if "long_sleeve_style" in tool_input: + garment_mask["garmentStyling"]["longSleeveStyle"] = tool_input["long_sleeve_style"] + if "tucking_style" in tool_input: + garment_mask["garmentStyling"]["tuckingStyle"] = tool_input["tucking_style"] + if "outer_layer_style" in tool_input: + garment_mask["garmentStyling"]["outerLayerStyle"] = tool_input["outer_layer_style"] + + request_body["virtualTryOnParams"]["garmentBasedMask"] = garment_mask + + elif mask_type == "PROMPT": + if "mask_prompt" not in tool_input: + raise ValueError("mask_prompt is required when mask_type is PROMPT") + + prompt_mask = { + "maskPrompt": tool_input["mask_prompt"] + } + + if "mask_shape" in tool_input: + prompt_mask["maskShape"] = tool_input["mask_shape"] + + request_body["virtualTryOnParams"]["promptBasedMask"] = prompt_mask + + # Add mask exclusions if any are provided + exclusion_params = ["preserve_body_pose", "preserve_hands", "preserve_face"] + if any(param in tool_input for param in exclusion_params): + request_body["virtualTryOnParams"]["maskExclusions"] = {} + + if "preserve_body_pose" in tool_input: + request_body["virtualTryOnParams"]["maskExclusions"]["preserveBodyPose"] = tool_input["preserve_body_pose"] + if "preserve_hands" in tool_input: + request_body["virtualTryOnParams"]["maskExclusions"]["preserveHands"] = tool_input["preserve_hands"] + if "preserve_face" in tool_input: + request_body["virtualTryOnParams"]["maskExclusions"]["preserveFace"] = tool_input["preserve_face"] + + # Add merge style and return mask options + if "merge_style" in tool_input: + request_body["virtualTryOnParams"]["mergeStyle"] = tool_input["merge_style"] + if "return_mask" in tool_input: + request_body["virtualTryOnParams"]["returnMask"] = tool_input["return_mask"] + + # Add common generation config parameters + if "cfg_scale" in tool_input: + request_body["imageGenerationConfig"]["cfgScale"] = tool_input["cfg_scale"] + if "seed" in tool_input: + request_body["imageGenerationConfig"]["seed"] = tool_input["seed"] + + elif task_type == "BACKGROUND_REMOVAL": + if "image" not in tool_input: + raise ValueError("image is required for BACKGROUND_REMOVAL") + + request_body = { + "taskType": "BACKGROUND_REMOVAL", + "backgroundRemovalParams": { + "image": tool_input["image"] + } + } + else: + raise ValueError(f"Unsupported task type: {task_type}") + + # Invoke the model + response = client.invoke_model( + modelId=model_id, + body=json.dumps(request_body) + ) + + # Process response + model_response = json.loads(response["body"].read().decode("utf-8")) + + # Extract image data + if "images" in model_response and len(model_response["images"]) > 0: + base64_image_data = model_response["images"][0] + + # Create filename based on task type + if task_type == "TEXT_IMAGE": + filename = create_filename(tool_input.get("text", "generated_image")) + elif task_type == "VIRTUAL_TRY_ON": + filename = f"virtual_try_on_{int(time.time())}" + else: # BACKGROUND_REMOVAL + filename = f"background_removal_{int(time.time())}" + + # Save image + output_dir = "output" + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + i = 1 + base_image_path = os.path.join(output_dir, f"{filename}.png") + image_path = base_image_path + while os.path.exists(image_path): + image_path = os.path.join(output_dir, f"{filename}_{i}.png") + i += 1 + + with open(image_path, "wb") as file: + file.write(base64.b64decode(base64_image_data)) + + # Handle mask if returned + mask_message = "" + if task_type == "VIRTUAL_TRY_ON" and tool_input.get("return_mask") and "maskImage" in model_response: + mask_data = model_response["maskImage"] + mask_path = os.path.join(output_dir, f"{filename}_mask.png") + with open(mask_path, "wb") as file: + file.write(base64.b64decode(mask_data)) + mask_message = f" Mask saved to {mask_path}." + + return { + "toolUseId": tool_use_id, + "status": "success", + "content": [ + {"text": f"{task_type} task completed successfully. Image saved to {image_path}.{mask_message}"}, + { + "image": { + "format": "png", + "source": {"bytes": base64.b64decode(base64_image_data)}, + } + }, + ], + } + else: + raise ValueError("No image data found in the response") + + except Exception as e: + return { + "toolUseId": tool_use_id, + "status": "error", + "content": [{"text": f"Error: {str(e)}"}], + }style"] + if "width" in tool_input: + request_body["imageGenerationConfig"]["width"] = tool_input["width"] + if "height" in tool_input: + request_body["imageGenerationConfig"]["height"] = tool_input["height"] + if "cfg_scale" in tool_input: + request_body["imageGenerationConfig"]["cfgScale"] = tool_input["cfg_scale"] + if "seed" in tool_input: + request_body["imageGenerationConfig"]["seed"] = tool_input["seed"] + + elif task_type == "VIRTUAL_TRY_ON": + request_body = { + "taskType": "VIRTUAL_TRY_ON", + "virtualTryOnParams": { + "sourceImage": tool_input["source_image"], + "referenceImage": tool_input["reference_image"], + "maskType": tool_input.get("mask_type", "GARMENT") + }, + "imageGenerationConfig": { + "quality": tool_input.get("quality", "standard") + } + } + + # Add mask-specific parameters + if tool_input.get("mask_type") == "IMAGE" and "mask_image" in tool_input: + request_body["virtualTryOnParams"]["imageBasedMask"] = { + "maskImage": tool_input["mask_image"] + } + elif tool_input.get("mask_type") == "GARMENT": + garment_mask = {} + if "mask_shape" in tool_input: + garment_mask["maskShape"] = tool_input["mask_shape"] + if "garment_class" in tool_input: + garment_mask["garmentClass"] = tool_input["garment_class"] + if any(k in tool_input for k in ["long_sleeve_style", "tucking_style", "outer_layer_style"]): + garment_mask["garmentStyling"] = {} + if "long_sleeve_style" in tool_input: + garment_mask["garmentStyling"]["longSleeveStyle"] = tool_input["long_sleeve_style"] + if "tucking_style" in tool_input: + garment_mask["garmentStyling"]["tuckingStyle"] = tool_input["tucking_style"] + if "outer_layer_style" in tool_input: + garment_mask["garmentStyling"]["outerLayerStyle"] = tool_input["outer_layer_style"] + request_body["virtualTryOnParams"]["garmentBasedMask"] = garment_mask + elif tool_input.get("mask_type") == "PROMPT" and "mask_prompt" in tool_input: + prompt_mask = {"maskPrompt": tool_input["mask_prompt"]} + if "mask_shape" in tool_input: + prompt_mask["maskShape"] = tool_input["mask_shape"] + request_body["virtualTryOnParams"]["promptBasedMask"] = prompt_mask + + # Add mask exclusions + if any(k in tool_input for k in ["preserve_body_pose", "preserve_hands", "preserve_face"]): + request_body["virtualTryOnParams"]["maskExclusions"] = {} + if "preserve_body_pose" in tool_input: + request_body["virtualTryOnParams"]["maskExclusions"]["preserveBodyPose"] = tool_input["preserve_body_pose"] + if "preserve_hands" in tool_input: + request_body["virtualTryOnParams"]["maskExclusions"]["preserveHands"] = tool_input["preserve_hands"] + if "preserve_face" in tool_input: + request_body["virtualTryOnParams"]["maskExclusions"]["preserveFace"] = tool_input["preserve_face"] + + # Add other virtual try-on parameters + if "merge_style" in tool_input: + request_body["virtualTryOnParams"]["mergeStyle"] = tool_input["merge_style"] + if "return_mask" in tool_input: + request_body["virtualTryOnParams"]["returnMask"] = tool_input["return_mask"] + if "cfg_scale" in tool_input: + request_body["imageGenerationConfig"]["cfgScale"] = tool_input["cfg_scale"] + if "seed" in tool_input: + request_body["imageGenerationConfig"]["seed"] = tool_input["seed"] + + elif task_type == "BACKGROUND_REMOVAL": + request_body = { + "taskType": "BACKGROUND_REMOVAL", + "backgroundRemovalParams": { + "image": tool_input["image"] + } + } + else: + raise ValueError(f"Unsupported task type: {task_type}") + + # Invoke the model + response = client.invoke_model( + modelId=model_id, + body=json.dumps(request_body) + ) + + # Process response + model_response = json.loads(response["body"].read().decode("utf-8")) + base64_image_data = model_response["images"][0] + + # Save image + filename = create_filename(tool_input.get("text", task_type.lower())) + output_dir = "output" + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + i = 1 + base_image_path = os.path.join(output_dir, f"{filename}.png") + image_path = base_image_path + while os.path.exists(image_path): + image_path = os.path.join(output_dir, f"{filename}_{i}.png") + i += 1 + + with open(image_path, "wb") as file: + file.write(base64.b64decode(base64_image_data)) + + return { + "toolUseId": tool_use_id, + "status": "success", + "content": [ + {"text": f"Task {task_type} completed. Image saved to {image_path}"}, + { + "image": { + "format": "png", + "source": {"bytes": base64.b64decode(base64_image_data)}, + } + }, + ], + } + + except Exception as e: + return { + "toolUseId": tool_use_id, + "status": "error", + "content": [{"text": f"Error: {str(e)}"}], + } From 23c46119b757447e16171326184a519aea0b5895 Mon Sep 17 00:00:00 2001 From: cerashdan Date: Tue, 15 Jul 2025 11:58:35 +0000 Subject: [PATCH 10/18] adding integration test for image generation and background removal --- tests-integ/test_nova_canvas.py | 118 ++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 tests-integ/test_nova_canvas.py diff --git a/tests-integ/test_nova_canvas.py b/tests-integ/test_nova_canvas.py new file mode 100644 index 00000000..2e3e4fcd --- /dev/null +++ b/tests-integ/test_nova_canvas.py @@ -0,0 +1,118 @@ +import os + +import pytest +from strands import Agent +from strands_tools import nova_canvas, image_reader + + +@pytest.fixture +def agent(): + """Agent with image generation and reader tools.""" + return Agent(tools=[nova_canvas, image_reader]) + + +def test_generate_and_read_image(agent, tmp_path): + # 1. Generate a lovely dog picture + prompt = "A corgi riding a skateboard in Times Square" + image_gen_result = agent.tool.nova_canvas( + prompt=prompt, + task_type="TEXT_IMAGE", + model_id="amazon.nova-canvas-v1:0", + negative_prompt="blurry, low quality", + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save image to temp directory + image_path = tmp_path / "generated.png" + with open(image_path, "wb") as f: + f.write(found_image) + + # 2. use image_reader tool to verify it's a real image + assert os.path.exists(image_path), f"Image file not found at {image_path}" + read_result = agent.tool.image_reader(image_path=str(image_path)) + assert read_result["status"] == "success", str(read_result) + image_content = read_result["content"][0]["image"] + assert image_content["format"] == "png" + assert isinstance(image_content["source"]["bytes"], bytes) + assert len(image_content["source"]["bytes"]) > 1000 + + # 3. test semantic usage to check if it recognizes dog/corgi + semantic_result = agent(f"What is the image at `{image_path}`") + assert "dog" in str(semantic_result).lower() or "corgi" in str(semantic_result).lower() + +def test_remove_background(agent, tmp_path): + # 1. Generate an image + prompt = "A corgi riding a skateboard in Times Square" + image_gen_result = agent.tool.nova_canvas( + prompt=prompt, + task_type="TEXT_IMAGE", + model_id="amazon.nova-canvas-v1:0", + negative_prompt="blurry, low quality", + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save image to temp directory + image_path = tmp_path / "generated.png" + with open(image_path, "wb") as f: + f.write(found_image) + + #2. Remove the background from the generated image + image_gen_result = agent.tool.nova_canvas( + task_type="BACKGROUND_REMOVAL", + model_id="amazon.nova-canvas-v1:0", + image_path=str(image_path), + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify the image with removed background bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save image to temp directory + image_path_no_bg = tmp_path / "generated_no_bg.png" + with open(image_path_no_bg, "wb") as f: + f.write(found_image) + + # 2. use image_reader tool to verify it's a real image + assert os.path.exists(image_path_no_bg), f"Image file not found at {image_path_no_bg}" + read_result = agent.tool.image_reader(image_path=str(image_path_no_bg)) + assert read_result["status"] == "success", str(read_result) + image_content = read_result["content"][0]["image"] + assert image_content["format"] == "png" + assert isinstance(image_content["source"]["bytes"], bytes) + assert len(image_content["source"]["bytes"]) > 1000 + + # 3. test semantic usage to check if it recognizes dog/corgi + semantic_result = agent(f"Has the background been removed from the image at `{image_path_no_bg} - compare with image at {image_path}` \ + respond with yes or no first") + print(f"Agent response: {semantic_result}") + assert "yes" in str(semantic_result).lower() \ No newline at end of file From 3095ca6d2ebad2f20663175d7bd86f70b4dd67a9 Mon Sep 17 00:00:00 2001 From: cerashdan Date: Tue, 15 Jul 2025 11:58:58 +0000 Subject: [PATCH 11/18] updating the main tool to pass integ tests --- src/strands_tools/nova_canvas.py | 217 ++++++++----------------------- 1 file changed, 53 insertions(+), 164 deletions(-) diff --git a/src/strands_tools/nova_canvas.py b/src/strands_tools/nova_canvas.py index 3a8bb5cd..0f50870a 100644 --- a/src/strands_tools/nova_canvas.py +++ b/src/strands_tools/nova_canvas.py @@ -126,7 +126,7 @@ "properties": { "task_type": { "type": "string", - "description": "The task type for Amazon Nova Canvas", + "description": "Required: the task type for Amazon Nova Canvas", "enum": ["TEXT_IMAGE", "VIRTUAL_TRY_ON", "BACKGROUND_REMOVAL"], "default": "TEXT_IMAGE" }, @@ -137,20 +137,20 @@ }, "negative_text": { "type": "string", - "description": "Negative text prompt (TEXT_IMAGE only)" + "description": "Optional: negative text prompt (TEXT_IMAGE only)" }, "style": { "type": "string", - "description": "Style for image generation (TEXT_IMAGE only)", + "description": "Optional: style for image generation (TEXT_IMAGE only)", "enum": ["3D_ANIMATED_FAMILY_FILM", "DESIGN_SKETCH", "FLAT_VECTOR_ILLUSTRATION", "GRAPHIC_NOVEL_ILLUSTRATION", "MAXIMALISM", "MIDCENTURY_RETRO", "PHOTOREALISM", "SOFT_DIGITAL_PAINTING"] }, "width": { "type": "integer", - "description": "Image width in pixels (TEXT_IMAGE only)" + "description": "Optional: image width in pixels (TEXT_IMAGE only)" }, "height": { "type": "integer", - "description": "Image height in pixels (TEXT_IMAGE only)" + "description": "Optional: image height in pixels (TEXT_IMAGE only)" }, # VIRTUAL_TRY_ON parameters "image_path": { @@ -163,16 +163,12 @@ }, "mask_type": { "type": "string", - "description": "Specifies whether the mask is provided as an image, prompt, or garment mask (required for VIRTUAL_TRY_ON)", - "enum": ["IMAGE", "GARMENT", "PROMPT"] - }, - "mask_image_path": { - "type": "string", - "description": "Path to mask image file defining areas to edit (black) and ignore (white). Required when mask_type is IMAGE" + "description": "Specifies whether the mask is provided as prompt, or garment mask (required for VIRTUAL_TRY_ON)", + "enum": ["GARMENT", "PROMPT"] }, "mask_shape": { "type": "string", - "description": "Defines the shape of the mask bounding box, affecting how reference image is transferred", + "description": "Defines the shape of the mask bounding box, affecting how reference image is transferred (optional for mask_type GARMET and PROMPT)", "enum": ["CONTOUR", "BOUNDING_BOX", "DEFAULT"] }, "garment_class": { @@ -182,17 +178,17 @@ }, "long_sleeve_style": { "type": "string", - "description": "Styling for long sleeve garments (applies only to upper body garments)", + "description": "Styling for long sleeve garments (optional for GARMET mask_type and applies only to upper body garments)", "enum": ["SLEEVE_DOWN", "SLEEVE_UP"] }, "tucking_style": { "type": "string", - "description": "Tucking style option (applies only to upper body garments)", + "description": "Tucking style option (optional for GARMET mask_type and applies only to upper body garments)", "enum": ["UNTUCKED", "TUCKED"] }, "outer_layer_style": { "type": "string", - "description": "Styling for outer layer garments (applies only to outer layer, upper body garments)", + "description": "Styling for outer layer garments (optional for GARMET mask_type and applies only to outer layer, upper body garments)", "enum": ["CLOSED", "OPEN"] }, "mask_prompt": { @@ -201,30 +197,25 @@ }, "preserve_body_pose": { "type": "string", - "description": "Whether to preserve the body pose in the output image when a person is detected", + "description": "Optional: whether to preserve the body pose in the output image when a person is detected", "enum": ["ON", "OFF", "DEFAULT"] }, "preserve_hands": { "type": "string", - "description": "Whether to preserve hands in the output image when a person is detected", + "description": "Optional: whether to preserve hands in the output image when a person is detected", "enum": ["ON", "OFF", "DEFAULT"] }, "preserve_face": { "type": "string", - "description": "Whether to preserve the face in the output image when a person is detected", + "description": "Optional: whether to preserve the face in the output image when a person is detected", "enum": ["OFF", "ON", "DEFAULT"] }, "merge_style": { "type": "string", - "description": "Determines how source and reference images are stitched together", + "description": "Optional: determines how source and reference images are stitched together", "enum": ["BALANCED", "SEAMLESS", "DETAILED"], "default": "BALANCED" }, - "return_mask": { - "type": "boolean", - "description": "Whether to return the mask image with the output image", - "default": false - }, # BACKGROUND_REMOVAL parameters # (uses image_path parameter defined above) @@ -271,6 +262,12 @@ def create_filename(prompt: str) -> str: return filename[:100] # Limit filename length +def encode_image_file(file_path): + """Read an image file and return its base64 encoded string.""" + with open(file_path, "rb") as image_file: + return base64.b64encode(image_file.read()).decode("utf-8") + + def nova_canvas(tool: ToolUse, **kwargs: Any) -> ToolResult: """ Use Amazon Nova Canvas for image generation, virtual try-on, and background removal. @@ -296,7 +293,7 @@ def nova_canvas(tool: ToolUse, **kwargs: Any) -> ToolResult: request_body = { "taskType": "TEXT_IMAGE", "textToImageParams": { - "text": tool_input.get("text", "A beautiful landscape") + "text": tool_input.get("prompt", "A beautiful landscape") }, "imageGenerationConfig": { "quality": tool_input.get("quality", "standard") @@ -305,7 +302,7 @@ def nova_canvas(tool: ToolUse, **kwargs: Any) -> ToolResult: # Add optional TEXT_IMAGE parameters if "negative_text" in tool_input: - request_body["textToImageParams"]["negativeText"] = tool_input["negative_text"] + request_body["textToImageParams"]["negativeText"] = tool_input["negative_prompt"] if "style" in tool_input: request_body["textToImageParams"]["style"] = tool_input["style"] if "width" in tool_input: @@ -319,19 +316,23 @@ def nova_canvas(tool: ToolUse, **kwargs: Any) -> ToolResult: elif task_type == "VIRTUAL_TRY_ON": # Validate required parameters - if "source_image" not in tool_input: - raise ValueError("source_image is required for VIRTUAL_TRY_ON") - if "reference_image" not in tool_input: - raise ValueError("reference_image is required for VIRTUAL_TRY_ON") + if "image_path" not in tool_input: + raise ValueError("image_path is required for VIRTUAL_TRY_ON") + if "reference_image_path" not in tool_input: + raise ValueError("reference_image_path is required for VIRTUAL_TRY_ON") if "mask_type" not in tool_input: raise ValueError("mask_type is required for VIRTUAL_TRY_ON") + + # Read and encode images + source_image_b64 = encode_image_file(tool_input["image_path"]) + reference_image_b64 = encode_image_file(tool_input["reference_image_path"]) # Initialize request structure request_body = { "taskType": "VIRTUAL_TRY_ON", "virtualTryOnParams": { - "sourceImage": tool_input["source_image"], - "referenceImage": tool_input["reference_image"], + "sourceImage": source_image_b64, + "referenceImage": reference_image_b64, "maskType": tool_input["mask_type"] }, "imageGenerationConfig": { @@ -343,10 +344,12 @@ def nova_canvas(tool: ToolUse, **kwargs: Any) -> ToolResult: mask_type = tool_input["mask_type"] if mask_type == "IMAGE": - if "mask_image" not in tool_input: - raise ValueError("mask_image is required when mask_type is IMAGE") + if "mask_image_path" not in tool_input: + raise ValueError("mask_image_path is required when mask_type is IMAGE") + + mask_image_b64 = encode_image_file(tool_input["mask_image_path"]) request_body["virtualTryOnParams"]["imageBasedMask"] = { - "maskImage": tool_input["mask_image"] + "maskImage": mask_image_b64 } elif mask_type == "GARMENT": @@ -412,13 +415,16 @@ def nova_canvas(tool: ToolUse, **kwargs: Any) -> ToolResult: request_body["imageGenerationConfig"]["seed"] = tool_input["seed"] elif task_type == "BACKGROUND_REMOVAL": - if "image" not in tool_input: - raise ValueError("image is required for BACKGROUND_REMOVAL") + if "image_path" not in tool_input: + raise ValueError("image_path is required for BACKGROUND_REMOVAL") + + # Read and encode image + image_b64 = encode_image_file(tool_input["image_path"]) request_body = { "taskType": "BACKGROUND_REMOVAL", "backgroundRemovalParams": { - "image": tool_input["image"] + "image": image_b64 } } else: @@ -441,9 +447,15 @@ def nova_canvas(tool: ToolUse, **kwargs: Any) -> ToolResult: if task_type == "TEXT_IMAGE": filename = create_filename(tool_input.get("text", "generated_image")) elif task_type == "VIRTUAL_TRY_ON": - filename = f"virtual_try_on_{int(time.time())}" + # Extract filename from source image path + source_filename = os.path.basename(tool_input["image_path"]) + base_name = os.path.splitext(source_filename)[0] + filename = f"{base_name}_try_on" else: # BACKGROUND_REMOVAL - filename = f"background_removal_{int(time.time())}" + # Extract filename from image path + source_filename = os.path.basename(tool_input["image_path"]) + base_name = os.path.splitext(source_filename)[0] + filename = f"{base_name}_no_bg" # Save image output_dir = "output" @@ -485,133 +497,10 @@ def nova_canvas(tool: ToolUse, **kwargs: Any) -> ToolResult: else: raise ValueError("No image data found in the response") - except Exception as e: - return { - "toolUseId": tool_use_id, - "status": "error", - "content": [{"text": f"Error: {str(e)}"}], - }style"] - if "width" in tool_input: - request_body["imageGenerationConfig"]["width"] = tool_input["width"] - if "height" in tool_input: - request_body["imageGenerationConfig"]["height"] = tool_input["height"] - if "cfg_scale" in tool_input: - request_body["imageGenerationConfig"]["cfgScale"] = tool_input["cfg_scale"] - if "seed" in tool_input: - request_body["imageGenerationConfig"]["seed"] = tool_input["seed"] - - elif task_type == "VIRTUAL_TRY_ON": - request_body = { - "taskType": "VIRTUAL_TRY_ON", - "virtualTryOnParams": { - "sourceImage": tool_input["source_image"], - "referenceImage": tool_input["reference_image"], - "maskType": tool_input.get("mask_type", "GARMENT") - }, - "imageGenerationConfig": { - "quality": tool_input.get("quality", "standard") - } - } - - # Add mask-specific parameters - if tool_input.get("mask_type") == "IMAGE" and "mask_image" in tool_input: - request_body["virtualTryOnParams"]["imageBasedMask"] = { - "maskImage": tool_input["mask_image"] - } - elif tool_input.get("mask_type") == "GARMENT": - garment_mask = {} - if "mask_shape" in tool_input: - garment_mask["maskShape"] = tool_input["mask_shape"] - if "garment_class" in tool_input: - garment_mask["garmentClass"] = tool_input["garment_class"] - if any(k in tool_input for k in ["long_sleeve_style", "tucking_style", "outer_layer_style"]): - garment_mask["garmentStyling"] = {} - if "long_sleeve_style" in tool_input: - garment_mask["garmentStyling"]["longSleeveStyle"] = tool_input["long_sleeve_style"] - if "tucking_style" in tool_input: - garment_mask["garmentStyling"]["tuckingStyle"] = tool_input["tucking_style"] - if "outer_layer_style" in tool_input: - garment_mask["garmentStyling"]["outerLayerStyle"] = tool_input["outer_layer_style"] - request_body["virtualTryOnParams"]["garmentBasedMask"] = garment_mask - elif tool_input.get("mask_type") == "PROMPT" and "mask_prompt" in tool_input: - prompt_mask = {"maskPrompt": tool_input["mask_prompt"]} - if "mask_shape" in tool_input: - prompt_mask["maskShape"] = tool_input["mask_shape"] - request_body["virtualTryOnParams"]["promptBasedMask"] = prompt_mask - - # Add mask exclusions - if any(k in tool_input for k in ["preserve_body_pose", "preserve_hands", "preserve_face"]): - request_body["virtualTryOnParams"]["maskExclusions"] = {} - if "preserve_body_pose" in tool_input: - request_body["virtualTryOnParams"]["maskExclusions"]["preserveBodyPose"] = tool_input["preserve_body_pose"] - if "preserve_hands" in tool_input: - request_body["virtualTryOnParams"]["maskExclusions"]["preserveHands"] = tool_input["preserve_hands"] - if "preserve_face" in tool_input: - request_body["virtualTryOnParams"]["maskExclusions"]["preserveFace"] = tool_input["preserve_face"] - - # Add other virtual try-on parameters - if "merge_style" in tool_input: - request_body["virtualTryOnParams"]["mergeStyle"] = tool_input["merge_style"] - if "return_mask" in tool_input: - request_body["virtualTryOnParams"]["returnMask"] = tool_input["return_mask"] - if "cfg_scale" in tool_input: - request_body["imageGenerationConfig"]["cfgScale"] = tool_input["cfg_scale"] - if "seed" in tool_input: - request_body["imageGenerationConfig"]["seed"] = tool_input["seed"] - - elif task_type == "BACKGROUND_REMOVAL": - request_body = { - "taskType": "BACKGROUND_REMOVAL", - "backgroundRemovalParams": { - "image": tool_input["image"] - } - } - else: - raise ValueError(f"Unsupported task type: {task_type}") - - # Invoke the model - response = client.invoke_model( - modelId=model_id, - body=json.dumps(request_body) - ) - - # Process response - model_response = json.loads(response["body"].read().decode("utf-8")) - base64_image_data = model_response["images"][0] - - # Save image - filename = create_filename(tool_input.get("text", task_type.lower())) - output_dir = "output" - if not os.path.exists(output_dir): - os.makedirs(output_dir) - - i = 1 - base_image_path = os.path.join(output_dir, f"{filename}.png") - image_path = base_image_path - while os.path.exists(image_path): - image_path = os.path.join(output_dir, f"{filename}_{i}.png") - i += 1 - - with open(image_path, "wb") as file: - file.write(base64.b64decode(base64_image_data)) - - return { - "toolUseId": tool_use_id, - "status": "success", - "content": [ - {"text": f"Task {task_type} completed. Image saved to {image_path}"}, - { - "image": { - "format": "png", - "source": {"bytes": base64.b64decode(base64_image_data)}, - } - }, - ], - } - except Exception as e: return { "toolUseId": tool_use_id, "status": "error", "content": [{"text": f"Error: {str(e)}"}], } + \ No newline at end of file From ab095a7c0260ac5db5c908e85fbedccd8546f4a5 Mon Sep 17 00:00:00 2001 From: cerashdan Date: Tue, 15 Jul 2025 13:56:28 +0000 Subject: [PATCH 12/18] completing nova canvas integrating tests --- src/strands_tools/nova_canvas.py | 25 +--- tests-integ/test_nova_canvas.py | 196 ++++++++++++++++++++++++++++++- 2 files changed, 198 insertions(+), 23 deletions(-) diff --git a/src/strands_tools/nova_canvas.py b/src/strands_tools/nova_canvas.py index 0f50870a..366e9722 100644 --- a/src/strands_tools/nova_canvas.py +++ b/src/strands_tools/nova_canvas.py @@ -343,16 +343,8 @@ def nova_canvas(tool: ToolUse, **kwargs: Any) -> ToolResult: # Handle mask type specific parameters mask_type = tool_input["mask_type"] - if mask_type == "IMAGE": - if "mask_image_path" not in tool_input: - raise ValueError("mask_image_path is required when mask_type is IMAGE") - - mask_image_b64 = encode_image_file(tool_input["mask_image_path"]) - request_body["virtualTryOnParams"]["imageBasedMask"] = { - "maskImage": mask_image_b64 - } - - elif mask_type == "GARMENT": + + if mask_type == "GARMENT": if "garment_class" not in tool_input: raise ValueError("garment_class is required when mask_type is GARMENT") @@ -445,7 +437,7 @@ def nova_canvas(tool: ToolUse, **kwargs: Any) -> ToolResult: # Create filename based on task type if task_type == "TEXT_IMAGE": - filename = create_filename(tool_input.get("text", "generated_image")) + filename = create_filename(tool_input.get("prompt", "generated_image")) elif task_type == "VIRTUAL_TRY_ON": # Extract filename from source image path source_filename = os.path.basename(tool_input["image_path"]) @@ -472,20 +464,11 @@ def nova_canvas(tool: ToolUse, **kwargs: Any) -> ToolResult: with open(image_path, "wb") as file: file.write(base64.b64decode(base64_image_data)) - # Handle mask if returned - mask_message = "" - if task_type == "VIRTUAL_TRY_ON" and tool_input.get("return_mask") and "maskImage" in model_response: - mask_data = model_response["maskImage"] - mask_path = os.path.join(output_dir, f"{filename}_mask.png") - with open(mask_path, "wb") as file: - file.write(base64.b64decode(mask_data)) - mask_message = f" Mask saved to {mask_path}." - return { "toolUseId": tool_use_id, "status": "success", "content": [ - {"text": f"{task_type} task completed successfully. Image saved to {image_path}.{mask_message}"}, + {"text": f"{task_type} task completed successfully. Image saved to {image_path}"}, { "image": { "format": "png", diff --git a/tests-integ/test_nova_canvas.py b/tests-integ/test_nova_canvas.py index 2e3e4fcd..56155059 100644 --- a/tests-integ/test_nova_canvas.py +++ b/tests-integ/test_nova_canvas.py @@ -114,5 +114,197 @@ def test_remove_background(agent, tmp_path): # 3. test semantic usage to check if it recognizes dog/corgi semantic_result = agent(f"Has the background been removed from the image at `{image_path_no_bg} - compare with image at {image_path}` \ respond with yes or no first") - print(f"Agent response: {semantic_result}") - assert "yes" in str(semantic_result).lower() \ No newline at end of file + assert "yes" in str(semantic_result).lower() + +def test_virtual_try_on_mask_garment(agent, tmp_path): + # 1. Generate an image of an empty living room + prompt = "full body person with a warm, genuine smile standing facing directly at the camera. \ + in a sunny neighberhood with green nature." + image_gen_result = agent.tool.nova_canvas( + prompt=prompt, + task_type="TEXT_IMAGE", + model_id="amazon.nova-canvas-v1:0", + negative_prompt="blurry, low quality", + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save the empty living room with red couch image to temp directory + living_room_image_path = tmp_path / "human_standing.png" + with open(living_room_image_path, "wb") as f: + f.write(found_image) + + # 2. Generate an image of a yellow couch + prompt = "Generate a vibrant tech hoodie with AWS written on it" + image_gen_result = agent.tool.nova_canvas( + prompt=prompt, + task_type="TEXT_IMAGE", + model_id="amazon.nova-canvas-v1:0", + negative_prompt="blurry, low quality", + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save the couch image to temp directory + couch_image_path = tmp_path / "ai_hoodie_aws.png" + with open(couch_image_path, "wb") as f: + f.write(found_image) + + # 3. Virtual try on the couch on the empty living room + image_gen_result = agent.tool.nova_canvas( + task_type="VIRTUAL_TRY_ON", + model_id="amazon.nova-canvas-v1:0", + image_path=str(living_room_image_path), + reference_image_path=str(couch_image_path), + mask_type="GARMENT", + garment_class="UPPER_BODY", + longSleeveStyle="SLEEVE_DOWN" + ) + + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save image to temp directory + image_path = tmp_path / "hoodie_ai_garmet_try_on.png" + with open(image_path, "wb") as f: + f.write(found_image) + + # 2. use image_reader tool to verify it's a real image + assert os.path.exists(image_path), f"Image file not found at {image_path}" + read_result = agent.tool.image_reader(image_path=str(image_path)) + assert read_result["status"] == "success", str(read_result) + image_content = read_result["content"][0]["image"] + assert image_content["format"] == "png" + assert isinstance(image_content["source"]["bytes"], bytes) + assert len(image_content["source"]["bytes"]) > 1000 + + # 3. test semantic usage to check if it recognizes dog/corgi + semantic_result = agent(f"Does the image at path `{image_path}` contain a a person wearing an AWS hoodie?\ + respond with yes or no first") + assert "yes" in str(semantic_result).lower() + +# def test_virtual_try_on_prompt_mask(agent, tmp_path): + # 1. Generate an image of an empty living room + prompt = "a living room with a white background and a purple couch in the middle" + image_gen_result = agent.tool.nova_canvas( + prompt=prompt, + task_type="TEXT_IMAGE", + model_id="amazon.nova-canvas-v1:0", + negative_prompt="blurry, low quality", + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save the empty living room with red couch image to temp directory + living_room_image_path = tmp_path / "empty_room_blue_couch.png" + with open(living_room_image_path, "wb") as f: + f.write(found_image) + + # 2. Generate an image of a yellow couch + prompt = "Generate a green couch with white background" + image_gen_result = agent.tool.nova_canvas( + prompt=prompt, + task_type="TEXT_IMAGE", + model_id="amazon.nova-canvas-v1:0", + negative_prompt="blurry, low quality", + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save the couch image to temp directory + couch_image_path = tmp_path / "couch.png" + with open(couch_image_path, "wb") as f: + f.write(found_image) + + # 3. Virtual try on the couch on the empty living room + image_gen_result = agent.tool.nova_canvas( + task_type="VIRTUAL_TRY_ON", + model_id="amazon.nova-canvas-v1:0", + image_path=str(living_room_image_path), + reference_image_path=str(couch_image_path), + mask_type="PROMPT", + mask_prompt="replace the couch with yellow couch" + ) + + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save image to temp directory + image_path = tmp_path / "living_room_couch_try_on.png" + with open(image_path, "wb") as f: + f.write(found_image) + + # 2. use image_reader tool to verify it's a real image + assert os.path.exists(image_path), f"Image file not found at {image_path}" + read_result = agent.tool.image_reader(image_path=str(image_path)) + assert read_result["status"] == "success", str(read_result) + image_content = read_result["content"][0]["image"] + assert image_content["format"] == "png" + assert isinstance(image_content["source"]["bytes"], bytes) + assert len(image_content["source"]["bytes"]) > 1000 + + # 3. test semantic usage to check if it recognizes dog/corgi + semantic_result = agent(f"Does the image at path `{image_path}` contain a green couch in an empty living room?\ + respond with yes or no first") + print(f"\n Agent response: {semantic_result}") + assert "yes" in str(semantic_result).lower() and "green" in str(semantic_result).lower() \ No newline at end of file From 1d684108e4b15c78b27e1767033c29063886327c Mon Sep 17 00:00:00 2001 From: cerashdan Date: Tue, 22 Jul 2025 09:33:18 +0000 Subject: [PATCH 13/18] editing tool usage in docstring --- src/strands_tools/nova_canvas.py | 46 +++++++++++++++++--------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/src/strands_tools/nova_canvas.py b/src/strands_tools/nova_canvas.py index 366e9722..786d2721 100644 --- a/src/strands_tools/nova_canvas.py +++ b/src/strands_tools/nova_canvas.py @@ -74,31 +74,35 @@ ```python from strands import Agent from strands_tools import nova_canvas -#TODO -1. add one example for standard generation -2. another for generation with extra parameters -3. virutal try on garmen example -agent = Agent(tools=[generate_image]) -# Basic usage with default parameters -agent.tool.generate_image(prompt="A steampunk robot playing chess") +agent = Agent(tools=[nova_canvas]) -# Advanced usage with Stable Diffusion -agent.tool.generate_image( - prompt="A futuristic city with flying cars", - model_id="stability.sd3-5-large-v1:0", - aspect_ratio="5:4", - output_format="jpeg", - negative_prompt="bad lighting, harsh lighting, abstract, surreal, twisted, multiple levels", +# Basic text-to-image generation +agent.tool.nova_canvas( + task_type="TEXT_IMAGE", + text="A steampunk robot playing chess" ) -# Using another Stable Diffusion model -agent.tool.generate_image( - prompt="A photograph of a cup of coffee from the side", - model_id="stability.stable-image-ultra-v1:1", - aspect_ratio="1:1", - output_format="png", - negative_prompt="blurry, distorted", +# Advanced text-to-image with style and parameters +agent.tool.nova_canvas( + task_type="TEXT_IMAGE", + text="A futuristic city with flying cars", + style="PHOTOREALISM", + width=1024, + height=768, + negative_text="bad lighting, harsh lighting, abstract", + cfg_scale=7.5, + quality="premium" +) + +# Virtual try-on with garment +agent.tool.nova_canvas( + task_type="VIRTUAL_TRY_ON", + image_path="person.jpg", + reference_image_path="shirt.jpg", + mask_type="GARMENT", + garment_class="SHORT_SLEEVE_SHIRT", + preserve_face="ON" ) ``` From 6ba6b735c7178b11ca643e76ff7f03518ce36234 Mon Sep 17 00:00:00 2001 From: cerashdan Date: Tue, 22 Jul 2025 09:34:11 +0000 Subject: [PATCH 14/18] updating unit and integ tests for nova canvas --- tests-integ/test_nova_canvas.py | 2 +- tests/test_nova_canvas.py | 185 ++++++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+), 1 deletion(-) create mode 100644 tests/test_nova_canvas.py diff --git a/tests-integ/test_nova_canvas.py b/tests-integ/test_nova_canvas.py index 56155059..28a38342 100644 --- a/tests-integ/test_nova_canvas.py +++ b/tests-integ/test_nova_canvas.py @@ -213,7 +213,7 @@ def test_virtual_try_on_mask_garment(agent, tmp_path): respond with yes or no first") assert "yes" in str(semantic_result).lower() -# def test_virtual_try_on_prompt_mask(agent, tmp_path): +def test_virtual_try_on_prompt_mask(agent, tmp_path): # 1. Generate an image of an empty living room prompt = "a living room with a white background and a purple couch in the middle" image_gen_result = agent.tool.nova_canvas( diff --git a/tests/test_nova_canvas.py b/tests/test_nova_canvas.py new file mode 100644 index 00000000..78dd664e --- /dev/null +++ b/tests/test_nova_canvas.py @@ -0,0 +1,185 @@ +""" +Tests for the generate_image tool. +""" + +import base64 +import json +from unittest.mock import MagicMock, patch + +import pytest +from strands import Agent +from strands_tools import nova_canvas + + +@pytest.fixture +def agent(): + """Create an agent with the generate_image tool loaded.""" + return Agent(tools=[nova_canvas]) + + +def extract_result_text(result): + """Extract the result text from the agent response.""" + if isinstance(result, dict) and "content" in result and isinstance(result["content"], list): + return result["content"][0]["text"] + return str(result) + + +@pytest.fixture +def mock_boto3_client(): + """Mock boto3 client for testing.""" + with patch("boto3.client") as mock_client: + # Set up mock response + mock_body = MagicMock() + mock_body.read.return_value = json.dumps( + {"images": [base64.b64encode(b"mock_image_data").decode("utf-8")]} + ).encode("utf-8") + + mock_client_instance = MagicMock() + mock_client_instance.invoke_model.return_value = {"body": mock_body} + mock_client.return_value = mock_client_instance + + yield mock_client + + +@pytest.fixture +def mock_os_path_exists(): + """Mock os.path.exists for testing.""" + with patch("os.path.exists") as mock_exists: + # First return False for output directory check, then True for file check to test filename incrementing + mock_exists.side_effect = [False, True, True, False] + yield mock_exists + + +@pytest.fixture +def mock_os_makedirs(): + """Mock os.makedirs for testing.""" + with patch("os.makedirs") as mock_makedirs: + yield mock_makedirs + + +@pytest.fixture +def mock_file_open(): + """Mock file open for testing.""" + mock_file = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_file + + with patch("builtins.open", return_value=mock_context) as mock_open: + yield mock_open, mock_file + + +def test_generate_image_direct(mock_boto3_client, mock_os_path_exists, mock_os_makedirs, mock_file_open): + """Test direct invocation of the generate_image tool.""" + # Create a tool use dictionary similar to how the agent would call it + tool_use = { + "toolUseId": "test-tool-use-id", + "input": { + "prompt": "A cute robot", + "seed": 123, + "aspect_ratio": "5:4", + "output_format": "png", + "negative_prompt": "blurry, low resolution, pixelated, grainy, unrealistic", + }, + } + + # Call the generate_image function directly + result = nova_canvas.nova_canvas(tool=tool_use) + + # Verify the function was called with correct parameters + mock_boto3_client.assert_called_once_with("bedrock-runtime", region_name="us-east-1") + mock_client_instance = mock_boto3_client.return_value + mock_client_instance.invoke_model.assert_called_once() + + # Check the parameters passed to invoke_model + args, kwargs = mock_client_instance.invoke_model.call_args + request_body = json.loads(kwargs["body"]) + + assert request_body["prompt"] == "A cute robot" + assert request_body["seed"] == 123 + assert request_body["aspect_ratio"] == "5:4" + assert request_body["output_format"] == "png" + assert request_body["negative_prompt"] == "blurry, low resolution, pixelated, grainy, unrealistic" + + # Verify directory creation + mock_os_makedirs.assert_called_once() + + # Verify file operations + mock_open, mock_file = mock_file_open + mock_file.write.assert_called_once() + + # Check the result + assert result["toolUseId"] == "test-tool-use-id" + assert result["status"] == "success" + assert "The generated image has been saved locally" in result["content"][0]["text"] + assert result["content"][1]["image"]["format"] == "png" + assert isinstance(result["content"][1]["image"]["source"]["bytes"], bytes) + + +def test_generate_image_default_params(mock_boto3_client, mock_os_path_exists, mock_os_makedirs, mock_file_open): + """Test generate_image with default parameters.""" + tool_use = {"toolUseId": "test-tool-use-id", "input": {"prompt": "A cute robot"}} + + with patch("random.randint", return_value=42): + result = nova_canvas.nova_canvas(tool=tool_use) + + # Check the default parameters were used + mock_client_instance = mock_boto3_client.return_value + args, kwargs = mock_client_instance.invoke_model.call_args + request_body = json.loads(kwargs["body"]) + + assert request_body["seed"] == 42 # From our mocked random.randint + assert request_body["aspect_ratio"] == "1:1" + assert request_body["output_format"] == "jpeg" + assert request_body["negative_prompt"] == "bad lighting, harsh lighting" + + assert result["status"] == "success" + + +def test_generate_image_error_handling(mock_boto3_client): + """Test error handling in generate_image.""" + # Setup boto3 client to raise an exception + mock_client_instance = mock_boto3_client.return_value + mock_client_instance.invoke_model.side_effect = Exception("API error") + + tool_use = {"toolUseId": "test-tool-use-id", "input": {"prompt": "A cute robot"}} + + result = nova_canvas.nova_canvas(tool=tool_use) + + # Verify error handling + assert result["status"] == "error" + assert "Error generating image: API error" in result["content"][0]["text"] + + +def test_filename_creation(): + """Test the filename creation logic using regex patterns similar to create_filename.""" + + # Since create_filename is defined inside the function, we'll replicate its functionality + def create_filename_test(prompt: str) -> str: + import re + + words = re.findall(r"\w+", prompt.lower())[:5] + filename = "_".join(words) + filename = re.sub(r"[^\w\-_\.]", "_", filename) + return filename[:100] + + # Test normal prompt + filename = create_filename_test("A cute robot dancing in the rain") + assert filename == "a_cute_robot_dancing_in" + + # Test prompt with special characters + filename = create_filename_test("A cute robot! With @#$% special chars") + assert filename == "a_cute_robot_with_special" + + # Test long prompt + long_prompt = "This is a very long prompt " + "word " * 50 + filename = create_filename_test(long_prompt) + assert len(filename) <= 100 + + +def test_generate_image_via_agent(agent, mock_boto3_client, mock_os_path_exists, mock_os_makedirs, mock_file_open): + """Test image generation via the agent interface.""" + # This simulates how the tool would be used through the Agent interface + result = agent.tool.nova_canvas(prompt="Test via agent") + + result_text = extract_result_text(result) + assert "The generated image has been saved locally" in result_text From 4996d6d4618c1ad9f626d74d0c416912689e3493 Mon Sep 17 00:00:00 2001 From: cerashdan Date: Tue, 22 Jul 2025 14:10:13 +0000 Subject: [PATCH 15/18] feat(nova_canvas): adding nova canvas tool with image generation, background removal and virtual-try-on features --- src/strands_tools/nova_canvas.py | 264 +++++++++++++------------- tests-integ/test_nova_canvas.py | 310 ------------------------------ tests/test_nova_canvas.py | 93 +++++++-- tests_integ/test_nova_canvas.py | 313 +++++++++++++++++++++++++++++++ 4 files changed, 520 insertions(+), 460 deletions(-) delete mode 100644 tests-integ/test_nova_canvas.py create mode 100644 tests_integ/test_nova_canvas.py diff --git a/src/strands_tools/nova_canvas.py b/src/strands_tools/nova_canvas.py index 786d2721..089c2cac 100644 --- a/src/strands_tools/nova_canvas.py +++ b/src/strands_tools/nova_canvas.py @@ -11,33 +11,33 @@ 1. Image Generation: • Text-to-image generation using Amazon Nova Canvas • Customizable generation parameters (height, width, quality, cfg_scale, - seed, style, negative_prompt) + seed, style, negative_text) • Support for pre-defined visual styles: "3D_ANIMATED_FAMILY_FILM" - A style that alludes to 3D animated films. Featuring realistic rendering and characters with cartoonish or exaggerated physical features. - + "DESIGN_SKETCH" - A style featuring hand-drawn line-art without a lot of wash or fill that is not too refined. This style is used to convey concepts and ideas. - + "FLAT_VECTOR_ILLUSTRATION" - A flat-color illustration style that is popular in business communications. - + "GRAPHIC_NOVEL_ILLUSTRATION" - A vivid ink illustration style. Characters do not have exaggerated features, as with some other more cartoon-ish styles. - + "MAXIMALISM" - Bright, elaborate, bold, and complex with strong shapes, and rich details. - + "MIDCENTURY_RETRO" - Alludes to graphic design trends from the 1940s through 1960s. - + "PHOTOREALISM" - Realistic photography style, including different repertoires such as stock photography, editorial photography, journalistic photography, and more. - + "SOFT_DIGITAL_PAINTING" - This style has more finish and refinement than a sketch. It includes shading, three dimensionality, and texture that might be lacking in other styles. @@ -56,7 +56,7 @@ • Automatically remove the background of any image, replacing the background with transparent pixels. • Useful when you want to later composite the image with other elements - in an image editing app, presentation, or website. + in an image editing app, presentation, or website. 4. Output Management: • Automatic local saving with intelligent filename generation @@ -114,13 +114,11 @@ import os import random import re -import time from typing import Any import boto3 from strands.types.tools import ToolResult, ToolUse - TOOL_SPEC = { "name": "nova_canvas", "description": "Use Amazon Nova Canvas for image generation, virtual try-on, and background removal tasks", @@ -132,127 +130,142 @@ "type": "string", "description": "Required: the task type for Amazon Nova Canvas", "enum": ["TEXT_IMAGE", "VIRTUAL_TRY_ON", "BACKGROUND_REMOVAL"], - "default": "TEXT_IMAGE" + "default": "TEXT_IMAGE", }, # TEXT_IMAGE parameters - "text": { - "type": "string", - "description": "Text prompt for image generation (required for TEXT_IMAGE)" - }, - "negative_text": { - "type": "string", - "description": "Optional: negative text prompt (TEXT_IMAGE only)" - }, + "text": {"type": "string", "description": "Text prompt for image generation (required for TEXT_IMAGE)"}, + "negative_text": {"type": "string", "description": "Optional: negative text prompt (TEXT_IMAGE only)"}, "style": { "type": "string", "description": "Optional: style for image generation (TEXT_IMAGE only)", - "enum": ["3D_ANIMATED_FAMILY_FILM", "DESIGN_SKETCH", "FLAT_VECTOR_ILLUSTRATION", "GRAPHIC_NOVEL_ILLUSTRATION", "MAXIMALISM", "MIDCENTURY_RETRO", "PHOTOREALISM", "SOFT_DIGITAL_PAINTING"] - }, - "width": { - "type": "integer", - "description": "Optional: image width in pixels (TEXT_IMAGE only)" - }, - "height": { - "type": "integer", - "description": "Optional: image height in pixels (TEXT_IMAGE only)" + "enum": [ + "3D_ANIMATED_FAMILY_FILM", + "DESIGN_SKETCH", + "FLAT_VECTOR_ILLUSTRATION", + "GRAPHIC_NOVEL_ILLUSTRATION", + "MAXIMALISM", + "MIDCENTURY_RETRO", + "PHOTOREALISM", + "SOFT_DIGITAL_PAINTING", + ], }, + "width": {"type": "integer", "description": "Optional: image width in pixels (TEXT_IMAGE only)"}, + "height": {"type": "integer", "description": "Optional: image height in pixels (TEXT_IMAGE only)"}, # VIRTUAL_TRY_ON parameters "image_path": { "type": "string", - "description": "Path to source image file to modify (required for VIRTUAL_TRY_ON and BACKGROUND_REMOVAL)" + "description": "Path to source image file to modify (required for VIRTUAL_TRY_ON and \ + BACKGROUND_REMOVAL)", }, "reference_image_path": { "type": "string", - "description": "Path to reference image file containing the object to superimpose (required for VIRTUAL_TRY_ON)" + "description": "Path to reference image file containing the object to superimpose \ + (required for VIRTUAL_TRY_ON)", }, "mask_type": { "type": "string", - "description": "Specifies whether the mask is provided as prompt, or garment mask (required for VIRTUAL_TRY_ON)", - "enum": ["GARMENT", "PROMPT"] + "description": "Specifies whether the mask is provided as prompt, or garment mask \ + (required for VIRTUAL_TRY_ON)", + "enum": ["GARMENT", "PROMPT"], }, "mask_shape": { "type": "string", - "description": "Defines the shape of the mask bounding box, affecting how reference image is transferred (optional for mask_type GARMET and PROMPT)", - "enum": ["CONTOUR", "BOUNDING_BOX", "DEFAULT"] + "description": "Defines the shape of the mask bounding box, affecting how reference image \ + is transferred (optional for mask_type GARMET and PROMPT)", + "enum": ["CONTOUR", "BOUNDING_BOX", "DEFAULT"], }, "garment_class": { "type": "string", - "description": "Defines the article of clothing being transferred. Required when mask_type is GARMENT", - "enum": ["UPPER_BODY", "LOWER_BODY", "FULL_BODY", "FOOTWEAR", "LONG_SLEEVE_SHIRT", "SHORT_SLEEVE_SHIRT", "NO_SLEEVE_SHIRT", "OTHER_UPPER_BODY", "LONG_PANTS", "SHORT_PANTS", "OTHER_LOWER_BODY", "LONG_DRESS", "SHORT_DRESS", "FULL_BODY_OUTFIT", "OTHER_FULL_BODY", "SHOES", "BOOTS", "OTHER_FOOTWEAR"] + "description": "Defines the article of clothing being transferred. Required when mask_type \ + is GARMENT", + "enum": [ + "UPPER_BODY", + "LOWER_BODY", + "FULL_BODY", + "FOOTWEAR", + "LONG_SLEEVE_SHIRT", + "SHORT_SLEEVE_SHIRT", + "NO_SLEEVE_SHIRT", + "OTHER_UPPER_BODY", + "LONG_PANTS", + "SHORT_PANTS", + "OTHER_LOWER_BODY", + "LONG_DRESS", + "SHORT_DRESS", + "FULL_BODY_OUTFIT", + "OTHER_FULL_BODY", + "SHOES", + "BOOTS", + "OTHER_FOOTWEAR", + ], }, "long_sleeve_style": { "type": "string", - "description": "Styling for long sleeve garments (optional for GARMET mask_type and applies only to upper body garments)", - "enum": ["SLEEVE_DOWN", "SLEEVE_UP"] + "description": "Styling for long sleeve garments (optional for GARMET mask_type and applies \ + only to upper body garments)", + "enum": ["SLEEVE_DOWN", "SLEEVE_UP"], }, "tucking_style": { "type": "string", - "description": "Tucking style option (optional for GARMET mask_type and applies only to upper body garments)", - "enum": ["UNTUCKED", "TUCKED"] + "description": "Tucking style option (optional for GARMET mask_type and applies only to upper \ + body garments)", + "enum": ["UNTUCKED", "TUCKED"], }, "outer_layer_style": { "type": "string", - "description": "Styling for outer layer garments (optional for GARMET mask_type and applies only to outer layer, upper body garments)", - "enum": ["CLOSED", "OPEN"] + "description": "Styling for outer layer garments (optional for GARMET mask_type and applies only \ + to outer layer, upper body garments)", + "enum": ["CLOSED", "OPEN"], }, "mask_prompt": { "type": "string", - "description": "Natural language text prompt describing regions to edit. Required when mask_type is PROMPT" + "description": "Natural language text prompt describing regions to edit. Required when mask_type \ + is PROMPT", }, "preserve_body_pose": { "type": "string", - "description": "Optional: whether to preserve the body pose in the output image when a person is detected", - "enum": ["ON", "OFF", "DEFAULT"] + "description": "Optional: whether to preserve the body pose in the output image when a person is \ + detected", + "enum": ["ON", "OFF", "DEFAULT"], }, "preserve_hands": { "type": "string", "description": "Optional: whether to preserve hands in the output image when a person is detected", - "enum": ["ON", "OFF", "DEFAULT"] + "enum": ["ON", "OFF", "DEFAULT"], }, "preserve_face": { "type": "string", - "description": "Optional: whether to preserve the face in the output image when a person is detected", - "enum": ["OFF", "ON", "DEFAULT"] + "description": "Optional: whether to preserve the face in the output image when a person is \ + detected", + "enum": ["OFF", "ON", "DEFAULT"], }, "merge_style": { "type": "string", "description": "Optional: determines how source and reference images are stitched together", "enum": ["BALANCED", "SEAMLESS", "DETAILED"], - "default": "BALANCED" + "default": "BALANCED", }, # BACKGROUND_REMOVAL parameters # (uses image_path parameter defined above) - # Common parameters "quality": { "type": "string", "description": "Image quality", "enum": ["standard", "premium"], - "default": "standard" + "default": "standard", }, "cfg_scale": { "type": "number", "description": "How strictly to adhere to the prompt. Range: 1.1-10", "minimum": 1.1, "maximum": 10, - "default": 6.5 - }, - "seed": { - "type": "integer", - "description": "Seed for reproducible results" + "default": 6.5, }, - "model_id": { - "type": "string", - "description": "Model ID", - "default": "amazon.nova-canvas-v1:0" - }, - "region": { - "type": "string", - "description": "AWS region", - "default": "us-east-1" - } + "seed": {"type": "integer", "description": "Seed for reproducible results"}, + "model_id": {"type": "string", "description": "Model ID", "default": "amazon.nova-canvas-v1:0"}, + "region": {"type": "string", "description": "AWS region", "default": "us-east-1"}, }, - "required": [] + "required": [], } }, } @@ -275,9 +288,9 @@ def encode_image_file(file_path): def nova_canvas(tool: ToolUse, **kwargs: Any) -> ToolResult: """ Use Amazon Nova Canvas for image generation, virtual try-on, and background removal. - + This function supports three main task types: - + 1. TEXT_IMAGE - Generate images from text prompts with optional style parameters 2. VIRTUAL_TRY_ON - Superimpose objects from a reference image onto a source image 3. BACKGROUND_REMOVAL - Remove the background from an image @@ -285,28 +298,29 @@ def nova_canvas(tool: ToolUse, **kwargs: Any) -> ToolResult: try: tool_use_id = tool["toolUseId"] tool_input = tool["input"] - + task_type = tool_input.get("task_type", "TEXT_IMAGE") model_id = tool_input.get("model_id", "amazon.nova-canvas-v1:0") region = tool_input.get("region", "us-east-1") - + client = boto3.client("bedrock-runtime", region_name=region) - + # Build request based on task type if task_type == "TEXT_IMAGE": request_body = { "taskType": "TEXT_IMAGE", "textToImageParams": { - "text": tool_input.get("prompt", "A beautiful landscape") + "text": tool_input.get("text", "A beautiful landscape"), }, "imageGenerationConfig": { - "quality": tool_input.get("quality", "standard") - } + "quality": tool_input.get("quality", "standard"), + "seed": tool_input.get("seed", random.randint(0, 2147483646)), + }, } - + # Add optional TEXT_IMAGE parameters if "negative_text" in tool_input: - request_body["textToImageParams"]["negativeText"] = tool_input["negative_prompt"] + request_body["textToImageParams"]["negativeText"] = tool_input["negative_text"] if "style" in tool_input: request_body["textToImageParams"]["style"] = tool_input["style"] if "width" in tool_input: @@ -315,9 +329,7 @@ def nova_canvas(tool: ToolUse, **kwargs: Any) -> ToolResult: request_body["imageGenerationConfig"]["height"] = tool_input["height"] if "cfg_scale" in tool_input: request_body["imageGenerationConfig"]["cfgScale"] = tool_input["cfg_scale"] - if "seed" in tool_input: - request_body["imageGenerationConfig"]["seed"] = tool_input["seed"] - + elif task_type == "VIRTUAL_TRY_ON": # Validate required parameters if "image_path" not in tool_input: @@ -326,119 +338,106 @@ def nova_canvas(tool: ToolUse, **kwargs: Any) -> ToolResult: raise ValueError("reference_image_path is required for VIRTUAL_TRY_ON") if "mask_type" not in tool_input: raise ValueError("mask_type is required for VIRTUAL_TRY_ON") - + # Read and encode images source_image_b64 = encode_image_file(tool_input["image_path"]) reference_image_b64 = encode_image_file(tool_input["reference_image_path"]) - + # Initialize request structure request_body = { "taskType": "VIRTUAL_TRY_ON", "virtualTryOnParams": { "sourceImage": source_image_b64, "referenceImage": reference_image_b64, - "maskType": tool_input["mask_type"] + "maskType": tool_input["mask_type"], }, - "imageGenerationConfig": { - "quality": tool_input.get("quality", "standard") - } + "imageGenerationConfig": {"quality": tool_input.get("quality", "standard")}, } - + # Handle mask type specific parameters mask_type = tool_input["mask_type"] - if mask_type == "GARMENT": if "garment_class" not in tool_input: raise ValueError("garment_class is required when mask_type is GARMENT") - - garment_mask = { - "garmentClass": tool_input["garment_class"] - } - + + garment_mask = {"garmentClass": tool_input["garment_class"]} + if "mask_shape" in tool_input: garment_mask["maskShape"] = tool_input["mask_shape"] - + # Add garment styling if any styling options are provided styling_params = ["long_sleeve_style", "tucking_style", "outer_layer_style"] if any(param in tool_input for param in styling_params): garment_mask["garmentStyling"] = {} - + if "long_sleeve_style" in tool_input: garment_mask["garmentStyling"]["longSleeveStyle"] = tool_input["long_sleeve_style"] if "tucking_style" in tool_input: garment_mask["garmentStyling"]["tuckingStyle"] = tool_input["tucking_style"] if "outer_layer_style" in tool_input: garment_mask["garmentStyling"]["outerLayerStyle"] = tool_input["outer_layer_style"] - + request_body["virtualTryOnParams"]["garmentBasedMask"] = garment_mask - + elif mask_type == "PROMPT": if "mask_prompt" not in tool_input: raise ValueError("mask_prompt is required when mask_type is PROMPT") - - prompt_mask = { - "maskPrompt": tool_input["mask_prompt"] - } - + + prompt_mask = {"maskPrompt": tool_input["mask_prompt"]} + if "mask_shape" in tool_input: prompt_mask["maskShape"] = tool_input["mask_shape"] - + request_body["virtualTryOnParams"]["promptBasedMask"] = prompt_mask - + # Add mask exclusions if any are provided exclusion_params = ["preserve_body_pose", "preserve_hands", "preserve_face"] if any(param in tool_input for param in exclusion_params): request_body["virtualTryOnParams"]["maskExclusions"] = {} - + if "preserve_body_pose" in tool_input: - request_body["virtualTryOnParams"]["maskExclusions"]["preserveBodyPose"] = tool_input["preserve_body_pose"] + request_body["virtualTryOnParams"]["maskExclusions"]["preserveBodyPose"] = tool_input[ + "preserve_body_pose" + ] if "preserve_hands" in tool_input: request_body["virtualTryOnParams"]["maskExclusions"]["preserveHands"] = tool_input["preserve_hands"] if "preserve_face" in tool_input: request_body["virtualTryOnParams"]["maskExclusions"]["preserveFace"] = tool_input["preserve_face"] - + # Add merge style and return mask options if "merge_style" in tool_input: request_body["virtualTryOnParams"]["mergeStyle"] = tool_input["merge_style"] if "return_mask" in tool_input: request_body["virtualTryOnParams"]["returnMask"] = tool_input["return_mask"] - + # Add common generation config parameters if "cfg_scale" in tool_input: request_body["imageGenerationConfig"]["cfgScale"] = tool_input["cfg_scale"] if "seed" in tool_input: request_body["imageGenerationConfig"]["seed"] = tool_input["seed"] - + elif task_type == "BACKGROUND_REMOVAL": if "image_path" not in tool_input: raise ValueError("image_path is required for BACKGROUND_REMOVAL") - + # Read and encode image image_b64 = encode_image_file(tool_input["image_path"]) - - request_body = { - "taskType": "BACKGROUND_REMOVAL", - "backgroundRemovalParams": { - "image": image_b64 - } - } + + request_body = {"taskType": "BACKGROUND_REMOVAL", "backgroundRemovalParams": {"image": image_b64}} else: raise ValueError(f"Unsupported task type: {task_type}") - + # Invoke the model - response = client.invoke_model( - modelId=model_id, - body=json.dumps(request_body) - ) - + response = client.invoke_model(modelId=model_id, body=json.dumps(request_body)) + # Process response model_response = json.loads(response["body"].read().decode("utf-8")) - + # Extract image data if "images" in model_response and len(model_response["images"]) > 0: base64_image_data = model_response["images"][0] - + # Create filename based on task type if task_type == "TEXT_IMAGE": filename = create_filename(tool_input.get("prompt", "generated_image")) @@ -452,22 +451,22 @@ def nova_canvas(tool: ToolUse, **kwargs: Any) -> ToolResult: source_filename = os.path.basename(tool_input["image_path"]) base_name = os.path.splitext(source_filename)[0] filename = f"{base_name}_no_bg" - + # Save image output_dir = "output" if not os.path.exists(output_dir): os.makedirs(output_dir) - + i = 1 base_image_path = os.path.join(output_dir, f"{filename}.png") image_path = base_image_path while os.path.exists(image_path): image_path = os.path.join(output_dir, f"{filename}_{i}.png") i += 1 - + with open(image_path, "wb") as file: file.write(base64.b64decode(base64_image_data)) - + return { "toolUseId": tool_use_id, "status": "success", @@ -483,11 +482,10 @@ def nova_canvas(tool: ToolUse, **kwargs: Any) -> ToolResult: } else: raise ValueError("No image data found in the response") - + except Exception as e: return { "toolUseId": tool_use_id, "status": "error", - "content": [{"text": f"Error: {str(e)}"}], + "content": [{"text": f"Error generating image: {str(e)}"}], } - \ No newline at end of file diff --git a/tests-integ/test_nova_canvas.py b/tests-integ/test_nova_canvas.py deleted file mode 100644 index 28a38342..00000000 --- a/tests-integ/test_nova_canvas.py +++ /dev/null @@ -1,310 +0,0 @@ -import os - -import pytest -from strands import Agent -from strands_tools import nova_canvas, image_reader - - -@pytest.fixture -def agent(): - """Agent with image generation and reader tools.""" - return Agent(tools=[nova_canvas, image_reader]) - - -def test_generate_and_read_image(agent, tmp_path): - # 1. Generate a lovely dog picture - prompt = "A corgi riding a skateboard in Times Square" - image_gen_result = agent.tool.nova_canvas( - prompt=prompt, - task_type="TEXT_IMAGE", - model_id="amazon.nova-canvas-v1:0", - negative_prompt="blurry, low quality", - ) - assert image_gen_result["status"] == "success", str(image_gen_result) - content = image_gen_result["content"] - - # Extract and verify image bytes from result - found_image = None - for item in content: - if "image" in item and "source" in item["image"]: - found_image = item["image"]["source"]["bytes"] - assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" - assert len(found_image) > 1000, "Returned image is too small to be valid" - break - assert found_image is not None, "No image bytes found in result" - - # Save image to temp directory - image_path = tmp_path / "generated.png" - with open(image_path, "wb") as f: - f.write(found_image) - - # 2. use image_reader tool to verify it's a real image - assert os.path.exists(image_path), f"Image file not found at {image_path}" - read_result = agent.tool.image_reader(image_path=str(image_path)) - assert read_result["status"] == "success", str(read_result) - image_content = read_result["content"][0]["image"] - assert image_content["format"] == "png" - assert isinstance(image_content["source"]["bytes"], bytes) - assert len(image_content["source"]["bytes"]) > 1000 - - # 3. test semantic usage to check if it recognizes dog/corgi - semantic_result = agent(f"What is the image at `{image_path}`") - assert "dog" in str(semantic_result).lower() or "corgi" in str(semantic_result).lower() - -def test_remove_background(agent, tmp_path): - # 1. Generate an image - prompt = "A corgi riding a skateboard in Times Square" - image_gen_result = agent.tool.nova_canvas( - prompt=prompt, - task_type="TEXT_IMAGE", - model_id="amazon.nova-canvas-v1:0", - negative_prompt="blurry, low quality", - ) - assert image_gen_result["status"] == "success", str(image_gen_result) - content = image_gen_result["content"] - - # Extract and verify image bytes from result - found_image = None - for item in content: - if "image" in item and "source" in item["image"]: - found_image = item["image"]["source"]["bytes"] - assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" - assert len(found_image) > 1000, "Returned image is too small to be valid" - break - assert found_image is not None, "No image bytes found in result" - - # Save image to temp directory - image_path = tmp_path / "generated.png" - with open(image_path, "wb") as f: - f.write(found_image) - - #2. Remove the background from the generated image - image_gen_result = agent.tool.nova_canvas( - task_type="BACKGROUND_REMOVAL", - model_id="amazon.nova-canvas-v1:0", - image_path=str(image_path), - ) - assert image_gen_result["status"] == "success", str(image_gen_result) - content = image_gen_result["content"] - - # Extract and verify the image with removed background bytes from result - found_image = None - for item in content: - if "image" in item and "source" in item["image"]: - found_image = item["image"]["source"]["bytes"] - assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" - assert len(found_image) > 1000, "Returned image is too small to be valid" - break - assert found_image is not None, "No image bytes found in result" - - # Save image to temp directory - image_path_no_bg = tmp_path / "generated_no_bg.png" - with open(image_path_no_bg, "wb") as f: - f.write(found_image) - - # 2. use image_reader tool to verify it's a real image - assert os.path.exists(image_path_no_bg), f"Image file not found at {image_path_no_bg}" - read_result = agent.tool.image_reader(image_path=str(image_path_no_bg)) - assert read_result["status"] == "success", str(read_result) - image_content = read_result["content"][0]["image"] - assert image_content["format"] == "png" - assert isinstance(image_content["source"]["bytes"], bytes) - assert len(image_content["source"]["bytes"]) > 1000 - - # 3. test semantic usage to check if it recognizes dog/corgi - semantic_result = agent(f"Has the background been removed from the image at `{image_path_no_bg} - compare with image at {image_path}` \ - respond with yes or no first") - assert "yes" in str(semantic_result).lower() - -def test_virtual_try_on_mask_garment(agent, tmp_path): - # 1. Generate an image of an empty living room - prompt = "full body person with a warm, genuine smile standing facing directly at the camera. \ - in a sunny neighberhood with green nature." - image_gen_result = agent.tool.nova_canvas( - prompt=prompt, - task_type="TEXT_IMAGE", - model_id="amazon.nova-canvas-v1:0", - negative_prompt="blurry, low quality", - ) - assert image_gen_result["status"] == "success", str(image_gen_result) - content = image_gen_result["content"] - - # Extract and verify image bytes from result - found_image = None - for item in content: - if "image" in item and "source" in item["image"]: - found_image = item["image"]["source"]["bytes"] - assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" - assert len(found_image) > 1000, "Returned image is too small to be valid" - break - assert found_image is not None, "No image bytes found in result" - - # Save the empty living room with red couch image to temp directory - living_room_image_path = tmp_path / "human_standing.png" - with open(living_room_image_path, "wb") as f: - f.write(found_image) - - # 2. Generate an image of a yellow couch - prompt = "Generate a vibrant tech hoodie with AWS written on it" - image_gen_result = agent.tool.nova_canvas( - prompt=prompt, - task_type="TEXT_IMAGE", - model_id="amazon.nova-canvas-v1:0", - negative_prompt="blurry, low quality", - ) - assert image_gen_result["status"] == "success", str(image_gen_result) - content = image_gen_result["content"] - - # Extract and verify image bytes from result - found_image = None - for item in content: - if "image" in item and "source" in item["image"]: - found_image = item["image"]["source"]["bytes"] - assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" - assert len(found_image) > 1000, "Returned image is too small to be valid" - break - assert found_image is not None, "No image bytes found in result" - - # Save the couch image to temp directory - couch_image_path = tmp_path / "ai_hoodie_aws.png" - with open(couch_image_path, "wb") as f: - f.write(found_image) - - # 3. Virtual try on the couch on the empty living room - image_gen_result = agent.tool.nova_canvas( - task_type="VIRTUAL_TRY_ON", - model_id="amazon.nova-canvas-v1:0", - image_path=str(living_room_image_path), - reference_image_path=str(couch_image_path), - mask_type="GARMENT", - garment_class="UPPER_BODY", - longSleeveStyle="SLEEVE_DOWN" - ) - - assert image_gen_result["status"] == "success", str(image_gen_result) - content = image_gen_result["content"] - - # Extract and verify image bytes from result - found_image = None - for item in content: - if "image" in item and "source" in item["image"]: - found_image = item["image"]["source"]["bytes"] - assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" - assert len(found_image) > 1000, "Returned image is too small to be valid" - break - assert found_image is not None, "No image bytes found in result" - - # Save image to temp directory - image_path = tmp_path / "hoodie_ai_garmet_try_on.png" - with open(image_path, "wb") as f: - f.write(found_image) - - # 2. use image_reader tool to verify it's a real image - assert os.path.exists(image_path), f"Image file not found at {image_path}" - read_result = agent.tool.image_reader(image_path=str(image_path)) - assert read_result["status"] == "success", str(read_result) - image_content = read_result["content"][0]["image"] - assert image_content["format"] == "png" - assert isinstance(image_content["source"]["bytes"], bytes) - assert len(image_content["source"]["bytes"]) > 1000 - - # 3. test semantic usage to check if it recognizes dog/corgi - semantic_result = agent(f"Does the image at path `{image_path}` contain a a person wearing an AWS hoodie?\ - respond with yes or no first") - assert "yes" in str(semantic_result).lower() - -def test_virtual_try_on_prompt_mask(agent, tmp_path): - # 1. Generate an image of an empty living room - prompt = "a living room with a white background and a purple couch in the middle" - image_gen_result = agent.tool.nova_canvas( - prompt=prompt, - task_type="TEXT_IMAGE", - model_id="amazon.nova-canvas-v1:0", - negative_prompt="blurry, low quality", - ) - assert image_gen_result["status"] == "success", str(image_gen_result) - content = image_gen_result["content"] - - # Extract and verify image bytes from result - found_image = None - for item in content: - if "image" in item and "source" in item["image"]: - found_image = item["image"]["source"]["bytes"] - assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" - assert len(found_image) > 1000, "Returned image is too small to be valid" - break - assert found_image is not None, "No image bytes found in result" - - # Save the empty living room with red couch image to temp directory - living_room_image_path = tmp_path / "empty_room_blue_couch.png" - with open(living_room_image_path, "wb") as f: - f.write(found_image) - - # 2. Generate an image of a yellow couch - prompt = "Generate a green couch with white background" - image_gen_result = agent.tool.nova_canvas( - prompt=prompt, - task_type="TEXT_IMAGE", - model_id="amazon.nova-canvas-v1:0", - negative_prompt="blurry, low quality", - ) - assert image_gen_result["status"] == "success", str(image_gen_result) - content = image_gen_result["content"] - - # Extract and verify image bytes from result - found_image = None - for item in content: - if "image" in item and "source" in item["image"]: - found_image = item["image"]["source"]["bytes"] - assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" - assert len(found_image) > 1000, "Returned image is too small to be valid" - break - assert found_image is not None, "No image bytes found in result" - - # Save the couch image to temp directory - couch_image_path = tmp_path / "couch.png" - with open(couch_image_path, "wb") as f: - f.write(found_image) - - # 3. Virtual try on the couch on the empty living room - image_gen_result = agent.tool.nova_canvas( - task_type="VIRTUAL_TRY_ON", - model_id="amazon.nova-canvas-v1:0", - image_path=str(living_room_image_path), - reference_image_path=str(couch_image_path), - mask_type="PROMPT", - mask_prompt="replace the couch with yellow couch" - ) - - assert image_gen_result["status"] == "success", str(image_gen_result) - content = image_gen_result["content"] - - # Extract and verify image bytes from result - found_image = None - for item in content: - if "image" in item and "source" in item["image"]: - found_image = item["image"]["source"]["bytes"] - assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" - assert len(found_image) > 1000, "Returned image is too small to be valid" - break - assert found_image is not None, "No image bytes found in result" - - # Save image to temp directory - image_path = tmp_path / "living_room_couch_try_on.png" - with open(image_path, "wb") as f: - f.write(found_image) - - # 2. use image_reader tool to verify it's a real image - assert os.path.exists(image_path), f"Image file not found at {image_path}" - read_result = agent.tool.image_reader(image_path=str(image_path)) - assert read_result["status"] == "success", str(read_result) - image_content = read_result["content"][0]["image"] - assert image_content["format"] == "png" - assert isinstance(image_content["source"]["bytes"], bytes) - assert len(image_content["source"]["bytes"]) > 1000 - - # 3. test semantic usage to check if it recognizes dog/corgi - semantic_result = agent(f"Does the image at path `{image_path}` contain a green couch in an empty living room?\ - respond with yes or no first") - print(f"\n Agent response: {semantic_result}") - assert "yes" in str(semantic_result).lower() and "green" in str(semantic_result).lower() \ No newline at end of file diff --git a/tests/test_nova_canvas.py b/tests/test_nova_canvas.py index 78dd664e..2565041e 100644 --- a/tests/test_nova_canvas.py +++ b/tests/test_nova_canvas.py @@ -74,11 +74,11 @@ def test_generate_image_direct(mock_boto3_client, mock_os_path_exists, mock_os_m tool_use = { "toolUseId": "test-tool-use-id", "input": { - "prompt": "A cute robot", + "task_type": "TEXT_IMAGE", + "text": "A cute robot", "seed": 123, - "aspect_ratio": "5:4", - "output_format": "png", - "negative_prompt": "blurry, low resolution, pixelated, grainy, unrealistic", + "negative_text": "blurry, low resolution, pixelated, grainy, unrealistic", + "style": "DESIGN_SKETCH", }, } @@ -94,11 +94,10 @@ def test_generate_image_direct(mock_boto3_client, mock_os_path_exists, mock_os_m args, kwargs = mock_client_instance.invoke_model.call_args request_body = json.loads(kwargs["body"]) - assert request_body["prompt"] == "A cute robot" - assert request_body["seed"] == 123 - assert request_body["aspect_ratio"] == "5:4" - assert request_body["output_format"] == "png" - assert request_body["negative_prompt"] == "blurry, low resolution, pixelated, grainy, unrealistic" + assert request_body["textToImageParams"]["text"] == "A cute robot" + assert request_body["textToImageParams"]["style"] == "DESIGN_SKETCH" + assert request_body["textToImageParams"]["negativeText"] == "blurry, low resolution, pixelated, grainy, unrealistic" + assert request_body["imageGenerationConfig"]["seed"] == 123 # Verify directory creation mock_os_makedirs.assert_called_once() @@ -110,7 +109,7 @@ def test_generate_image_direct(mock_boto3_client, mock_os_path_exists, mock_os_m # Check the result assert result["toolUseId"] == "test-tool-use-id" assert result["status"] == "success" - assert "The generated image has been saved locally" in result["content"][0]["text"] + assert "TEXT_IMAGE task completed successfully." in result["content"][0]["text"] assert result["content"][1]["image"]["format"] == "png" assert isinstance(result["content"][1]["image"]["source"]["bytes"], bytes) @@ -127,11 +126,8 @@ def test_generate_image_default_params(mock_boto3_client, mock_os_path_exists, m args, kwargs = mock_client_instance.invoke_model.call_args request_body = json.loads(kwargs["body"]) - assert request_body["seed"] == 42 # From our mocked random.randint - assert request_body["aspect_ratio"] == "1:1" - assert request_body["output_format"] == "jpeg" - assert request_body["negative_prompt"] == "bad lighting, harsh lighting" - + assert request_body["imageGenerationConfig"]["seed"] == 42 # From our mocked random.randint + assert request_body["imageGenerationConfig"]["quality"] == "standard" assert result["status"] == "success" @@ -150,6 +146,69 @@ def test_generate_image_error_handling(mock_boto3_client): assert "Error generating image: API error" in result["content"][0]["text"] +def test_virtual_try_on(mock_boto3_client, mock_os_path_exists, mock_os_makedirs, mock_file_open): + """Test virtual try-on functionality.""" + # Mock file reading for images + with patch("builtins.open", mock_file_open[0]): + with patch("strands_tools.nova_canvas.encode_image_file") as mock_encode: + mock_encode.side_effect = ["source_image_b64", "reference_image_b64"] + + tool_use = { + "toolUseId": "test-tool-use-id", + "input": { + "task_type": "VIRTUAL_TRY_ON", + "image_path": "person.jpg", + "reference_image_path": "shirt.jpg", + "mask_type": "GARMENT", + "garment_class": "SHORT_SLEEVE_SHIRT", + "preserve_face": "ON", + }, + } + + result = nova_canvas.nova_canvas(tool=tool_use) + + # Verify the function was called with correct parameters + mock_client_instance = mock_boto3_client.return_value + args, kwargs = mock_client_instance.invoke_model.call_args + request_body = json.loads(kwargs["body"]) + + assert request_body["taskType"] == "VIRTUAL_TRY_ON" + assert request_body["virtualTryOnParams"]["sourceImage"] == "source_image_b64" + assert request_body["virtualTryOnParams"]["referenceImage"] == "reference_image_b64" + assert request_body["virtualTryOnParams"]["maskType"] == "GARMENT" + assert request_body["virtualTryOnParams"]["garmentBasedMask"]["garmentClass"] == "SHORT_SLEEVE_SHIRT" + assert request_body["virtualTryOnParams"]["maskExclusions"]["preserveFace"] == "ON" + + assert result["status"] == "success" + assert "VIRTUAL_TRY_ON task completed successfully" in result["content"][0]["text"] + + +def test_background_removal(mock_boto3_client, mock_os_path_exists, mock_os_makedirs, mock_file_open): + """Test background removal functionality.""" + # Mock file reading for image + with patch("builtins.open", mock_file_open[0]): + with patch("strands_tools.nova_canvas.encode_image_file") as mock_encode: + mock_encode.return_value = "image_b64_data" + + tool_use = { + "toolUseId": "test-tool-use-id", + "input": {"task_type": "BACKGROUND_REMOVAL", "image_path": "photo.jpg"}, + } + + result = nova_canvas.nova_canvas(tool=tool_use) + + # Verify the function was called with correct parameters + mock_client_instance = mock_boto3_client.return_value + args, kwargs = mock_client_instance.invoke_model.call_args + request_body = json.loads(kwargs["body"]) + + assert request_body["taskType"] == "BACKGROUND_REMOVAL" + assert request_body["backgroundRemovalParams"]["image"] == "image_b64_data" + + assert result["status"] == "success" + assert "BACKGROUND_REMOVAL task completed successfully" in result["content"][0]["text"] + + def test_filename_creation(): """Test the filename creation logic using regex patterns similar to create_filename.""" @@ -177,9 +236,9 @@ def create_filename_test(prompt: str) -> str: def test_generate_image_via_agent(agent, mock_boto3_client, mock_os_path_exists, mock_os_makedirs, mock_file_open): - """Test image generation via the agent interface.""" + """Test image generation (default tool) via the agent interface.""" # This simulates how the tool would be used through the Agent interface result = agent.tool.nova_canvas(prompt="Test via agent") result_text = extract_result_text(result) - assert "The generated image has been saved locally" in result_text + assert "TEXT_IMAGE task completed successfully." in result_text diff --git a/tests_integ/test_nova_canvas.py b/tests_integ/test_nova_canvas.py new file mode 100644 index 00000000..bcdcba3b --- /dev/null +++ b/tests_integ/test_nova_canvas.py @@ -0,0 +1,313 @@ +import os + +import pytest +from strands import Agent +from strands_tools import image_reader, nova_canvas + + +@pytest.fixture +def agent(): + """Agent with image generation and reader tools.""" + return Agent(tools=[nova_canvas, image_reader]) + + +# def test_generate_and_read_image(agent, tmp_path): +# # 1. Generate a lovely dog picture +# prompt = "A corgi riding a skateboard in Times Square" +# image_gen_result = agent.tool.nova_canvas( +# text=prompt, +# task_type="TEXT_IMAGE", +# model_id="amazon.nova-canvas-v1:0", +# negative_prompt="blurry, low quality", +# ) +# assert image_gen_result["status"] == "success", str(image_gen_result) +# content = image_gen_result["content"] + +# # Extract and verify image bytes from result +# found_image = None +# for item in content: +# if "image" in item and "source" in item["image"]: +# found_image = item["image"]["source"]["bytes"] +# assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" +# assert len(found_image) > 1000, "Returned image is too small to be valid" +# break +# assert found_image is not None, "No image bytes found in result" + +# # Save image to temp directory +# image_path = tmp_path / "generated.png" +# with open(image_path, "wb") as f: +# f.write(found_image) + +# # 2. use image_reader tool to verify it's a real image +# assert os.path.exists(image_path), f"Image file not found at {image_path}" +# read_result = agent.tool.image_reader(image_path=str(image_path)) +# assert read_result["status"] == "success", str(read_result) +# image_content = read_result["content"][0]["image"] +# assert image_content["format"] == "png" +# assert isinstance(image_content["source"]["bytes"], bytes) +# assert len(image_content["source"]["bytes"]) > 1000 + +# # 3. test semantic usage to check if it recognizes dog/corgi +# semantic_result = agent(f"What is the image at `{image_path}`") +# assert "dog" in str(semantic_result).lower() or "corgi" in str(semantic_result).lower() + + +# def test_remove_background(agent, tmp_path): +# # 1. Generate an image +# prompt = "A corgi riding a skateboard in Times Square" +# image_gen_result = agent.tool.nova_canvas( +# text=prompt, +# task_type="TEXT_IMAGE", +# model_id="amazon.nova-canvas-v1:0", +# negative_prompt="blurry, low quality", +# ) +# assert image_gen_result["status"] == "success", str(image_gen_result) +# content = image_gen_result["content"] + +# # Extract and verify image bytes from result +# found_image = None +# for item in content: +# if "image" in item and "source" in item["image"]: +# found_image = item["image"]["source"]["bytes"] +# assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" +# assert len(found_image) > 1000, "Returned image is too small to be valid" +# break +# assert found_image is not None, "No image bytes found in result" + +# # Save image to temp directory +# image_path = tmp_path / "generated.png" +# with open(image_path, "wb") as f: +# f.write(found_image) + +# # 2. Remove the background from the generated image +# image_gen_result = agent.tool.nova_canvas( +# task_type="BACKGROUND_REMOVAL", +# model_id="amazon.nova-canvas-v1:0", +# image_path=str(image_path), +# ) +# assert image_gen_result["status"] == "success", str(image_gen_result) +# content = image_gen_result["content"] + +# # Extract and verify the image with removed background bytes from result +# found_image = None +# for item in content: +# if "image" in item and "source" in item["image"]: +# found_image = item["image"]["source"]["bytes"] +# assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" +# assert len(found_image) > 1000, "Returned image is too small to be valid" +# break +# assert found_image is not None, "No image bytes found in result" + +# # Save image to temp directory +# image_path_no_bg = tmp_path / "generated_no_bg.png" +# with open(image_path_no_bg, "wb") as f: +# f.write(found_image) + +# # 2. use image_reader tool to verify it's a real image +# assert os.path.exists(image_path_no_bg), f"Image file not found at {image_path_no_bg}" +# read_result = agent.tool.image_reader(image_path=str(image_path_no_bg)) +# assert read_result["status"] == "success", str(read_result) +# image_content = read_result["content"][0]["image"] +# assert image_content["format"] == "png" +# assert isinstance(image_content["source"]["bytes"], bytes) +# assert len(image_content["source"]["bytes"]) > 1000 + +# # 3. test semantic usage to check if it recognizes dog/corgi +# semantic_result = agent(f"Has the background been removed from the image at `{image_path_no_bg} \ +# - compare with image at {image_path}` respond with yes or no first") +# assert "yes" in str(semantic_result).lower() + + +# def test_virtual_try_on_mask_garment(agent, tmp_path): +# # 1. Generate an image of an empty living room +# prompt = "full body person with a warm, genuine smile standing facing directly at the camera. \ +# in a sunny neighberhood with green nature." +# image_gen_result = agent.tool.nova_canvas( +# text=prompt, +# task_type="TEXT_IMAGE", +# model_id="amazon.nova-canvas-v1:0", +# negative_prompt="blurry, low quality", +# ) +# assert image_gen_result["status"] == "success", str(image_gen_result) +# content = image_gen_result["content"] + +# # Extract and verify image bytes from result +# found_image = None +# for item in content: +# if "image" in item and "source" in item["image"]: +# found_image = item["image"]["source"]["bytes"] +# assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" +# assert len(found_image) > 1000, "Returned image is too small to be valid" +# break +# assert found_image is not None, "No image bytes found in result" + +# # Save the empty living room with red couch image to temp directory +# living_room_image_path = tmp_path / "human_standing.png" +# with open(living_room_image_path, "wb") as f: +# f.write(found_image) + +# # 2. Generate an image of a yellow couch +# prompt = "Generate a vibrant tech hoodie with AWS written on it" +# image_gen_result = agent.tool.nova_canvas( +# text=prompt, +# task_type="TEXT_IMAGE", +# model_id="amazon.nova-canvas-v1:0", +# negative_prompt="blurry, low quality", +# ) +# assert image_gen_result["status"] == "success", str(image_gen_result) +# content = image_gen_result["content"] + +# # Extract and verify image bytes from result +# found_image = None +# for item in content: +# if "image" in item and "source" in item["image"]: +# found_image = item["image"]["source"]["bytes"] +# assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" +# assert len(found_image) > 1000, "Returned image is too small to be valid" +# break +# assert found_image is not None, "No image bytes found in result" + +# # Save the couch image to temp directory +# couch_image_path = tmp_path / "ai_hoodie_aws.png" +# with open(couch_image_path, "wb") as f: +# f.write(found_image) + +# # 3. Virtual try on the couch on the empty living room +# image_gen_result = agent.tool.nova_canvas( +# task_type="VIRTUAL_TRY_ON", +# model_id="amazon.nova-canvas-v1:0", +# image_path=str(living_room_image_path), +# reference_image_path=str(couch_image_path), +# mask_type="GARMENT", +# garment_class="UPPER_BODY", +# longSleeveStyle="SLEEVE_DOWN", +# ) + +# assert image_gen_result["status"] == "success", str(image_gen_result) +# content = image_gen_result["content"] + +# # Extract and verify image bytes from result +# found_image = None +# for item in content: +# if "image" in item and "source" in item["image"]: +# found_image = item["image"]["source"]["bytes"] +# assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" +# assert len(found_image) > 1000, "Returned image is too small to be valid" +# break +# assert found_image is not None, "No image bytes found in result" + +# # Save image to temp directory +# image_path = tmp_path / "hoodie_ai_garmet_try_on.png" +# with open(image_path, "wb") as f: +# f.write(found_image) + +# # 2. use image_reader tool to verify it's a real image +# assert os.path.exists(image_path), f"Image file not found at {image_path}" +# read_result = agent.tool.image_reader(image_path=str(image_path)) +# assert read_result["status"] == "success", str(read_result) +# image_content = read_result["content"][0]["image"] +# assert image_content["format"] == "png" +# assert isinstance(image_content["source"]["bytes"], bytes) +# assert len(image_content["source"]["bytes"]) > 1000 + +# # 3. test semantic usage to check if it recognizes dog/corgi +# semantic_result = agent(f"Does the image at path `{image_path}` contain a a person wearing an AWS hoodie?\ +# respond with yes or no first") +# assert "yes" in str(semantic_result).lower() + + +def test_virtual_try_on_prompt_mask(agent, tmp_path): + # 1. Generate an image of an empty living room + prompt = "an empty room with a white background and a purple couch in the middle" + image_gen_result = agent.tool.nova_canvas( + text=prompt, + task_type="TEXT_IMAGE", + model_id="amazon.nova-canvas-v1:0", + negative_prompt="blurry, low quality", + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save the empty living room with red couch image to temp directory + living_room_image_path = tmp_path / "empty_room_purple_couch.png" + with open(living_room_image_path, "wb") as f: + f.write(found_image) + + # 2. Generate an image of a yellow couch + prompt = "Generate a green couch with white background" + image_gen_result = agent.tool.nova_canvas( + text=prompt, + task_type="TEXT_IMAGE", + model_id="amazon.nova-canvas-v1:0", + negative_prompt="blurry, low quality", + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save the couch image to temp directory + couch_image_path = tmp_path / "green_couch.png" + with open(couch_image_path, "wb") as f: + f.write(found_image) + + # 3. Virtual try on the couch on the empty living room + image_gen_result = agent.tool.nova_canvas( + task_type="VIRTUAL_TRY_ON", + model_id="amazon.nova-canvas-v1:0", + image_path=str(living_room_image_path), + reference_image_path=str(couch_image_path), + mask_type="PROMPT", + mask_prompt="purple couch", + ) + + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save image to temp directory + image_path = tmp_path / "living_room_couch_try_on.png" + with open(image_path, "wb") as f: + f.write(found_image) + + # 2. use image_reader tool to verify it's a real image + assert os.path.exists(image_path), f"Image file not found at {image_path}" + read_result = agent.tool.image_reader(image_path=str(image_path)) + assert read_result["status"] == "success", str(read_result) + image_content = read_result["content"][0]["image"] + assert image_content["format"] == "png" + assert isinstance(image_content["source"]["bytes"], bytes) + assert len(image_content["source"]["bytes"]) > 1000 + + # 3. test semantic usage to check if it recognizes dog/corgi + semantic_result = agent(f"Does the image at path `{image_path}` contain a green couch in an empty living room?\ + respond with yes or no first") + print(f"\n Agent response: {semantic_result}") + assert "yes" in str(semantic_result).lower() and "green" in str(semantic_result).lower() From 41adf784f379dbdc8f679563d1a7348695d70e8d Mon Sep 17 00:00:00 2001 From: cerashdan Date: Sun, 3 Aug 2025 17:12:14 +0000 Subject: [PATCH 16/18] uncommenting commented lines --- tests_integ/test_nova_canvas.py | 406 ++++++++++++++++---------------- 1 file changed, 203 insertions(+), 203 deletions(-) diff --git a/tests_integ/test_nova_canvas.py b/tests_integ/test_nova_canvas.py index bcdcba3b..63c33b6e 100644 --- a/tests_integ/test_nova_canvas.py +++ b/tests_integ/test_nova_canvas.py @@ -11,209 +11,209 @@ def agent(): return Agent(tools=[nova_canvas, image_reader]) -# def test_generate_and_read_image(agent, tmp_path): -# # 1. Generate a lovely dog picture -# prompt = "A corgi riding a skateboard in Times Square" -# image_gen_result = agent.tool.nova_canvas( -# text=prompt, -# task_type="TEXT_IMAGE", -# model_id="amazon.nova-canvas-v1:0", -# negative_prompt="blurry, low quality", -# ) -# assert image_gen_result["status"] == "success", str(image_gen_result) -# content = image_gen_result["content"] - -# # Extract and verify image bytes from result -# found_image = None -# for item in content: -# if "image" in item and "source" in item["image"]: -# found_image = item["image"]["source"]["bytes"] -# assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" -# assert len(found_image) > 1000, "Returned image is too small to be valid" -# break -# assert found_image is not None, "No image bytes found in result" - -# # Save image to temp directory -# image_path = tmp_path / "generated.png" -# with open(image_path, "wb") as f: -# f.write(found_image) - -# # 2. use image_reader tool to verify it's a real image -# assert os.path.exists(image_path), f"Image file not found at {image_path}" -# read_result = agent.tool.image_reader(image_path=str(image_path)) -# assert read_result["status"] == "success", str(read_result) -# image_content = read_result["content"][0]["image"] -# assert image_content["format"] == "png" -# assert isinstance(image_content["source"]["bytes"], bytes) -# assert len(image_content["source"]["bytes"]) > 1000 - -# # 3. test semantic usage to check if it recognizes dog/corgi -# semantic_result = agent(f"What is the image at `{image_path}`") -# assert "dog" in str(semantic_result).lower() or "corgi" in str(semantic_result).lower() - - -# def test_remove_background(agent, tmp_path): -# # 1. Generate an image -# prompt = "A corgi riding a skateboard in Times Square" -# image_gen_result = agent.tool.nova_canvas( -# text=prompt, -# task_type="TEXT_IMAGE", -# model_id="amazon.nova-canvas-v1:0", -# negative_prompt="blurry, low quality", -# ) -# assert image_gen_result["status"] == "success", str(image_gen_result) -# content = image_gen_result["content"] - -# # Extract and verify image bytes from result -# found_image = None -# for item in content: -# if "image" in item and "source" in item["image"]: -# found_image = item["image"]["source"]["bytes"] -# assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" -# assert len(found_image) > 1000, "Returned image is too small to be valid" -# break -# assert found_image is not None, "No image bytes found in result" - -# # Save image to temp directory -# image_path = tmp_path / "generated.png" -# with open(image_path, "wb") as f: -# f.write(found_image) - -# # 2. Remove the background from the generated image -# image_gen_result = agent.tool.nova_canvas( -# task_type="BACKGROUND_REMOVAL", -# model_id="amazon.nova-canvas-v1:0", -# image_path=str(image_path), -# ) -# assert image_gen_result["status"] == "success", str(image_gen_result) -# content = image_gen_result["content"] - -# # Extract and verify the image with removed background bytes from result -# found_image = None -# for item in content: -# if "image" in item and "source" in item["image"]: -# found_image = item["image"]["source"]["bytes"] -# assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" -# assert len(found_image) > 1000, "Returned image is too small to be valid" -# break -# assert found_image is not None, "No image bytes found in result" - -# # Save image to temp directory -# image_path_no_bg = tmp_path / "generated_no_bg.png" -# with open(image_path_no_bg, "wb") as f: -# f.write(found_image) - -# # 2. use image_reader tool to verify it's a real image -# assert os.path.exists(image_path_no_bg), f"Image file not found at {image_path_no_bg}" -# read_result = agent.tool.image_reader(image_path=str(image_path_no_bg)) -# assert read_result["status"] == "success", str(read_result) -# image_content = read_result["content"][0]["image"] -# assert image_content["format"] == "png" -# assert isinstance(image_content["source"]["bytes"], bytes) -# assert len(image_content["source"]["bytes"]) > 1000 - -# # 3. test semantic usage to check if it recognizes dog/corgi -# semantic_result = agent(f"Has the background been removed from the image at `{image_path_no_bg} \ -# - compare with image at {image_path}` respond with yes or no first") -# assert "yes" in str(semantic_result).lower() - - -# def test_virtual_try_on_mask_garment(agent, tmp_path): -# # 1. Generate an image of an empty living room -# prompt = "full body person with a warm, genuine smile standing facing directly at the camera. \ -# in a sunny neighberhood with green nature." -# image_gen_result = agent.tool.nova_canvas( -# text=prompt, -# task_type="TEXT_IMAGE", -# model_id="amazon.nova-canvas-v1:0", -# negative_prompt="blurry, low quality", -# ) -# assert image_gen_result["status"] == "success", str(image_gen_result) -# content = image_gen_result["content"] - -# # Extract and verify image bytes from result -# found_image = None -# for item in content: -# if "image" in item and "source" in item["image"]: -# found_image = item["image"]["source"]["bytes"] -# assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" -# assert len(found_image) > 1000, "Returned image is too small to be valid" -# break -# assert found_image is not None, "No image bytes found in result" - -# # Save the empty living room with red couch image to temp directory -# living_room_image_path = tmp_path / "human_standing.png" -# with open(living_room_image_path, "wb") as f: -# f.write(found_image) - -# # 2. Generate an image of a yellow couch -# prompt = "Generate a vibrant tech hoodie with AWS written on it" -# image_gen_result = agent.tool.nova_canvas( -# text=prompt, -# task_type="TEXT_IMAGE", -# model_id="amazon.nova-canvas-v1:0", -# negative_prompt="blurry, low quality", -# ) -# assert image_gen_result["status"] == "success", str(image_gen_result) -# content = image_gen_result["content"] - -# # Extract and verify image bytes from result -# found_image = None -# for item in content: -# if "image" in item and "source" in item["image"]: -# found_image = item["image"]["source"]["bytes"] -# assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" -# assert len(found_image) > 1000, "Returned image is too small to be valid" -# break -# assert found_image is not None, "No image bytes found in result" - -# # Save the couch image to temp directory -# couch_image_path = tmp_path / "ai_hoodie_aws.png" -# with open(couch_image_path, "wb") as f: -# f.write(found_image) - -# # 3. Virtual try on the couch on the empty living room -# image_gen_result = agent.tool.nova_canvas( -# task_type="VIRTUAL_TRY_ON", -# model_id="amazon.nova-canvas-v1:0", -# image_path=str(living_room_image_path), -# reference_image_path=str(couch_image_path), -# mask_type="GARMENT", -# garment_class="UPPER_BODY", -# longSleeveStyle="SLEEVE_DOWN", -# ) - -# assert image_gen_result["status"] == "success", str(image_gen_result) -# content = image_gen_result["content"] - -# # Extract and verify image bytes from result -# found_image = None -# for item in content: -# if "image" in item and "source" in item["image"]: -# found_image = item["image"]["source"]["bytes"] -# assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" -# assert len(found_image) > 1000, "Returned image is too small to be valid" -# break -# assert found_image is not None, "No image bytes found in result" - -# # Save image to temp directory -# image_path = tmp_path / "hoodie_ai_garmet_try_on.png" -# with open(image_path, "wb") as f: -# f.write(found_image) - -# # 2. use image_reader tool to verify it's a real image -# assert os.path.exists(image_path), f"Image file not found at {image_path}" -# read_result = agent.tool.image_reader(image_path=str(image_path)) -# assert read_result["status"] == "success", str(read_result) -# image_content = read_result["content"][0]["image"] -# assert image_content["format"] == "png" -# assert isinstance(image_content["source"]["bytes"], bytes) -# assert len(image_content["source"]["bytes"]) > 1000 - -# # 3. test semantic usage to check if it recognizes dog/corgi -# semantic_result = agent(f"Does the image at path `{image_path}` contain a a person wearing an AWS hoodie?\ -# respond with yes or no first") -# assert "yes" in str(semantic_result).lower() +def test_generate_and_read_image(agent, tmp_path): + # 1. Generate a lovely dog picture + prompt = "A corgi riding a skateboard in Times Square" + image_gen_result = agent.tool.nova_canvas( + text=prompt, + task_type="TEXT_IMAGE", + model_id="amazon.nova-canvas-v1:0", + negative_prompt="blurry, low quality", + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save image to temp directory + image_path = tmp_path / "generated.png" + with open(image_path, "wb") as f: + f.write(found_image) + + # 2. use image_reader tool to verify it's a real image + assert os.path.exists(image_path), f"Image file not found at {image_path}" + read_result = agent.tool.image_reader(image_path=str(image_path)) + assert read_result["status"] == "success", str(read_result) + image_content = read_result["content"][0]["image"] + assert image_content["format"] == "png" + assert isinstance(image_content["source"]["bytes"], bytes) + assert len(image_content["source"]["bytes"]) > 1000 + + # 3. test semantic usage to check if it recognizes dog/corgi + semantic_result = agent(f"What is the image at `{image_path}`") + assert "dog" in str(semantic_result).lower() or "corgi" in str(semantic_result).lower() + + +def test_remove_background(agent, tmp_path): + # 1. Generate an image + prompt = "A corgi riding a skateboard in Times Square" + image_gen_result = agent.tool.nova_canvas( + text=prompt, + task_type="TEXT_IMAGE", + model_id="amazon.nova-canvas-v1:0", + negative_prompt="blurry, low quality", + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save image to temp directory + image_path = tmp_path / "generated.png" + with open(image_path, "wb") as f: + f.write(found_image) + + # 2. Remove the background from the generated image + image_gen_result = agent.tool.nova_canvas( + task_type="BACKGROUND_REMOVAL", + model_id="amazon.nova-canvas-v1:0", + image_path=str(image_path), + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify the image with removed background bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save image to temp directory + image_path_no_bg = tmp_path / "generated_no_bg.png" + with open(image_path_no_bg, "wb") as f: + f.write(found_image) + + # 2. use image_reader tool to verify it's a real image + assert os.path.exists(image_path_no_bg), f"Image file not found at {image_path_no_bg}" + read_result = agent.tool.image_reader(image_path=str(image_path_no_bg)) + assert read_result["status"] == "success", str(read_result) + image_content = read_result["content"][0]["image"] + assert image_content["format"] == "png" + assert isinstance(image_content["source"]["bytes"], bytes) + assert len(image_content["source"]["bytes"]) > 1000 + + # 3. test semantic usage to check if it recognizes dog/corgi + semantic_result = agent(f"Has the background been removed from the image at `{image_path_no_bg} \ + - compare with image at {image_path}` respond with yes or no first") + assert "yes" in str(semantic_result).lower() + + +def test_virtual_try_on_mask_garment(agent, tmp_path): + # 1. Generate an image of an empty living room + prompt = "full body person with a warm, genuine smile standing facing directly at the camera. \ + in a sunny neighberhood with green nature." + image_gen_result = agent.tool.nova_canvas( + text=prompt, + task_type="TEXT_IMAGE", + model_id="amazon.nova-canvas-v1:0", + negative_prompt="blurry, low quality", + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save the empty living room with red couch image to temp directory + living_room_image_path = tmp_path / "human_standing.png" + with open(living_room_image_path, "wb") as f: + f.write(found_image) + + # 2. Generate an image of a yellow couch + prompt = "Generate a vibrant tech hoodie with AWS written on it" + image_gen_result = agent.tool.nova_canvas( + text=prompt, + task_type="TEXT_IMAGE", + model_id="amazon.nova-canvas-v1:0", + negative_prompt="blurry, low quality", + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save the couch image to temp directory + couch_image_path = tmp_path / "ai_hoodie_aws.png" + with open(couch_image_path, "wb") as f: + f.write(found_image) + + # 3. Virtual try on the couch on the empty living room + image_gen_result = agent.tool.nova_canvas( + task_type="VIRTUAL_TRY_ON", + model_id="amazon.nova-canvas-v1:0", + image_path=str(living_room_image_path), + reference_image_path=str(couch_image_path), + mask_type="GARMENT", + garment_class="UPPER_BODY", + longSleeveStyle="SLEEVE_DOWN", + ) + + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save image to temp directory + image_path = tmp_path / "hoodie_ai_garmet_try_on.png" + with open(image_path, "wb") as f: + f.write(found_image) + + # 2. use image_reader tool to verify it's a real image + assert os.path.exists(image_path), f"Image file not found at {image_path}" + read_result = agent.tool.image_reader(image_path=str(image_path)) + assert read_result["status"] == "success", str(read_result) + image_content = read_result["content"][0]["image"] + assert image_content["format"] == "png" + assert isinstance(image_content["source"]["bytes"], bytes) + assert len(image_content["source"]["bytes"]) > 1000 + + # 3. test semantic usage to check if it recognizes dog/corgi + semantic_result = agent(f"Does the image at path `{image_path}` contain a a person wearing an AWS hoodie?\ + respond with yes or no first") + assert "yes" in str(semantic_result).lower() def test_virtual_try_on_prompt_mask(agent, tmp_path): From 827bd28758a6eae7bbbf3f2725e227fd1650cb89 Mon Sep 17 00:00:00 2001 From: cerashdan Date: Sun, 3 Aug 2025 17:20:24 +0000 Subject: [PATCH 17/18] editing variable names for virtual-try on garmet mode --- tests_integ/test_nova_canvas.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests_integ/test_nova_canvas.py b/tests_integ/test_nova_canvas.py index 63c33b6e..239b3e0c 100644 --- a/tests_integ/test_nova_canvas.py +++ b/tests_integ/test_nova_canvas.py @@ -142,8 +142,8 @@ def test_virtual_try_on_mask_garment(agent, tmp_path): assert found_image is not None, "No image bytes found in result" # Save the empty living room with red couch image to temp directory - living_room_image_path = tmp_path / "human_standing.png" - with open(living_room_image_path, "wb") as f: + human_image_path = tmp_path / "human_standing.png" + with open(human_image_path, "wb") as f: f.write(found_image) # 2. Generate an image of a yellow couch @@ -168,16 +168,16 @@ def test_virtual_try_on_mask_garment(agent, tmp_path): assert found_image is not None, "No image bytes found in result" # Save the couch image to temp directory - couch_image_path = tmp_path / "ai_hoodie_aws.png" - with open(couch_image_path, "wb") as f: + hoodie_image_path = tmp_path / "ai_hoodie_aws.png" + with open(hoodie_image_path, "wb") as f: f.write(found_image) # 3. Virtual try on the couch on the empty living room image_gen_result = agent.tool.nova_canvas( task_type="VIRTUAL_TRY_ON", model_id="amazon.nova-canvas-v1:0", - image_path=str(living_room_image_path), - reference_image_path=str(couch_image_path), + image_path=str(human_image_path), + reference_image_path=str(hoodie_image_path), mask_type="GARMENT", garment_class="UPPER_BODY", longSleeveStyle="SLEEVE_DOWN", From 72f54d68351a4546cd66fbbe0d78bf99a5372cee Mon Sep 17 00:00:00 2001 From: cerashdan Date: Sun, 3 Aug 2025 17:43:41 +0000 Subject: [PATCH 18/18] updating commets in integrations tests --- tests_integ/test_nova_canvas.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tests_integ/test_nova_canvas.py b/tests_integ/test_nova_canvas.py index 239b3e0c..3cf625ca 100644 --- a/tests_integ/test_nova_canvas.py +++ b/tests_integ/test_nova_canvas.py @@ -119,7 +119,7 @@ def test_remove_background(agent, tmp_path): def test_virtual_try_on_mask_garment(agent, tmp_path): - # 1. Generate an image of an empty living room + # 1. Generate an image of a human standing prompt = "full body person with a warm, genuine smile standing facing directly at the camera. \ in a sunny neighberhood with green nature." image_gen_result = agent.tool.nova_canvas( @@ -141,12 +141,12 @@ def test_virtual_try_on_mask_garment(agent, tmp_path): break assert found_image is not None, "No image bytes found in result" - # Save the empty living room with red couch image to temp directory + # Save the human standing to temp directory human_image_path = tmp_path / "human_standing.png" with open(human_image_path, "wb") as f: f.write(found_image) - # 2. Generate an image of a yellow couch + # 2. Generate an image of a vibrant tech hoodie with AWS written on it prompt = "Generate a vibrant tech hoodie with AWS written on it" image_gen_result = agent.tool.nova_canvas( text=prompt, @@ -167,12 +167,12 @@ def test_virtual_try_on_mask_garment(agent, tmp_path): break assert found_image is not None, "No image bytes found in result" - # Save the couch image to temp directory + # Save the hoodie image to temp directory hoodie_image_path = tmp_path / "ai_hoodie_aws.png" with open(hoodie_image_path, "wb") as f: f.write(found_image) - # 3. Virtual try on the couch on the empty living room + # 3. Virtual try on the hoodie on human image generated image_gen_result = agent.tool.nova_canvas( task_type="VIRTUAL_TRY_ON", model_id="amazon.nova-canvas-v1:0", @@ -238,12 +238,12 @@ def test_virtual_try_on_prompt_mask(agent, tmp_path): break assert found_image is not None, "No image bytes found in result" - # Save the empty living room with red couch image to temp directory + # Save the empty living room with purple couch image to temp directory living_room_image_path = tmp_path / "empty_room_purple_couch.png" with open(living_room_image_path, "wb") as f: f.write(found_image) - # 2. Generate an image of a yellow couch + # 2. Generate an image of a green couch prompt = "Generate a green couch with white background" image_gen_result = agent.tool.nova_canvas( text=prompt, @@ -269,7 +269,8 @@ def test_virtual_try_on_prompt_mask(agent, tmp_path): with open(couch_image_path, "wb") as f: f.write(found_image) - # 3. Virtual try on the couch on the empty living room + # 3. Virtual try on to superimpose the green couch in place of the purple couch + # in the living room with. image_gen_result = agent.tool.nova_canvas( task_type="VIRTUAL_TRY_ON", model_id="amazon.nova-canvas-v1:0",