vibing-ai · theoteske · Apr 16, 2025 · Apr 17, 2025 · Apr 17, 2025 · Apr 17, 2025
diff --git a/app/services/schemas.py b/app/services/schemas.py
@@ -8,7 +8,7 @@ class User(BaseModel):
     id: str
     fullName: str
     email: str
-    
+
 class Role(str, Enum):
     human = "human"
     ai = "ai"
@@ -28,30 +28,30 @@ class Message(BaseModel):
     type: MessageType
     timestamp: Optional[Any] = None
     payload: MessagePayload
-    
+
 class RequestType(str, Enum):
     chat = "chat"
     tool = "tool"
 
 class GenericRequest(BaseModel):
     user: User
     type: RequestType
-    
+
 class ChatRequest(GenericRequest):
     messages: List[Message]
 
 class GenericAssistantRequest(BaseModel):
     assistant_inputs: AssistantInputs
-    
+
 class ToolRequest(GenericRequest):
     tool_data: BaseTool
-    
+
 class ChatResponse(BaseModel):
     data: List[Message]
 
 class ToolResponse(BaseModel):
     data: Any
-    
+
 class ChatMessage(BaseModel):
     role: str
     type: str
@@ -67,7 +67,7 @@ class QuizzifyArgs(BaseModel):
 class WorksheetQuestion(BaseModel):
     question_type: str
     number: int
-    
+
 class WorksheetQuestionModel(BaseModel):
     worksheet_question_list: List[WorksheetQuestion]
 
@@ -78,7 +78,7 @@ class WorksheetGeneratorArgs(BaseModel):
     file_url: str
     file_type: str
     lang: Optional[str] = "en"
-    
+
 class SyllabusGeneratorArgsModel(BaseModel):
     grade_level: str
     subject: str
@@ -92,20 +92,20 @@ class SyllabusGeneratorArgsModel(BaseModel):
     file_url: str
     file_type: str
     lang: Optional[str] = "en"
-    
+
 class AIResistantArgs(BaseModel):
     assignment: str = Field(..., max_length=255, description="The given assignment")
     grade_level: Literal["pre-k", "kindergarten", "elementary", "middle", "high", "university", "professional"] = Field(..., description="Educational level to which the content is directed")
     file_type: str = Field(..., description="Type of file being handled, according to the defined enumeration")
     file_url: str = Field(..., description="URL or path of the file to be processed")
     lang: str = Field(..., description="Language in which the file or content is written")
-    
+
 class ConnectWithThemArgs(BaseModel):
     grade_level: str = Field(..., description="The grade level the teacher is instructing.")
     task_description: str = Field(..., description="A brief description of the subject or topic the teacher is instructing.")
     students_description: str = Field(..., description="A description of the students including age group, interests, location, and any relevant cultural or social factors.")
-    task_description_file_url: str 
-    task_description_file_type: str 
+    task_description_file_url: str
+    task_description_file_type: str
     student_description_file_url: str
     student_description_file_type: str
     lang: str = Field(..., description="The language in which the subject is being taught.")
@@ -175,4 +175,10 @@ class SlideGeneratorInput(BaseModel):
     slides_titles: List[str]
     instructional_level: str
     topic: str
-    lang: Optional[str] = "en"
+    lang: Optional[str] = "en"
+
+class ImageGeneratorArgs(BaseModel):
+    prompt: str = Field(..., description="The text prompt to generate an image from")
+    subject: Optional[str] = Field(None, description="The educational subject (e.g., 'math', 'science')")
+    grade_level: Optional[str] = Field(None, description="The grade level (e.g., 'elementary', 'middle school', 'high school')")
+    lang: str = Field("en", description="The language for text in the image")
diff --git a/app/tools/image_generator/README.md b/app/tools/image_generator/README.md
@@ -0,0 +1,159 @@
+# Image Generator
+
+This tool generates high-quality educational images from text prompts using Black Forest Labs' Flux 1.1 Pro model and automatically stores them in Google Cloud Storage for persistent access.
+
+## Features
+
+- Generate educational images from text prompts
+- Enhance prompts with educational context
+- Safety filtering to ensure appropriate content
+- Integration with Black Forest Labs Flux 1.1 Pro API
+- Automatic storage in Google Cloud Storage (when configured)
+- Content type detection (diagrams, concepts, processes, etc.)
+
+## Setup
+
+1. Install the required dependencies:
+   ```
+   # From the marvel-ai-backend directory
+   pip install -r requirements.txt
+   ```
+
+   Note: All required dependencies are included in the main project's requirements.txt file.
+
+2. Set up your Black Forest Labs API key in the .env file:
+
+   Add the following line to your `.env` file in the `marvel-ai-backend/app/` directory:
+   ```
+   BFL_API_KEY=your_api_key_here
+   ```
+
+   You can obtain an API key by registering at [api.bfl.ml](https://api.bfl.ml/).
+
+3. Set up Google Cloud Storage for image persistence:
+
+   a. Create a storage bucket in the GCP project associated with the PROJECT_ID environment variable in your .env file (see GCP Storage Configuration below)
+
+   b. Add the following to your `.env` file:
+   ```
+   GCP_STORAGE_BUCKET=your-gcp-bucket-name
+   GOOGLE_APPLICATION_CREDENTIALS=/absolute/path/to/your/credentials.json
+   ```
+
+4. When running in Docker, mount the credentials file:
+
+   ```bash
+   docker run \
+     -v /path/to/credentials.json:/app/credentials.json:ro \
+     -e GOOGLE_APPLICATION_CREDENTIALS=/app/credentials.json \
+     -p 8000:8000 \
+     --env-file ./app/.env \
+     your-image-name
+   ```
+
+## Usage
+
+### API Request Format
+
+```json
+{
+  "user": {
+    "id": "string",
+    "fullName": "string",
+    "email": "string"
+  },
+  "type": "tool",
+  "tool_data": {
+    "tool_id": "image-generator",
+    "inputs": [
+      {
+        "name": "prompt",
+        "value": "A diagram of the solar system"
+      },
+      {
+        "name": "subject",
+        "value": "astronomy"
+      },
+      {
+        "name": "grade_level",
+        "value": "middle school"
+      },
+      {
+        "name": "lang",
+        "value": "en"
+      }
+    ]
+  }
+}
+```
+
+### Input Parameters
+
+- `prompt` (required): The text prompt to generate an image from
+- `subject` (optional): The educational subject (e.g., 'math', 'science')
+- `grade_level` (optional): The grade level (e.g., 'elementary', 'middle school', 'high school')
+- `lang` (optional, default: "en"): The language for text in the image
+
+### Response Format
+
+```json
+{
+  "image_b64": "base64_encoded_image_data",
+  "prompt_used": "A diagram of the solar system, educational context: astronomy for middle school level",
+  "educational_context": "astronomy for middle school level",
+  "safety_applied": true,
+  "gcp_url": "https://storage.googleapis.com/your-bucket/generated_images/image_20250422_123456_solar_system_abcd1234.png"
+}
+```
+
+The `gcp_url` field will be included if GCP storage is configured and the image was successfully uploaded.
+
+## Implementation Details
+
+The image generator uses Black Forest Labs' Flux 1.1 Pro model, which is a state-of-the-art text-to-image model. The tool enhances the prompt with educational context and applies safety filtering to ensure the generated images are appropriate for educational use.
+
+## Dependencies
+
+- requests
+- Pillow
+- langchain-google-genai
+- pydantic
+- google-cloud-storage (for GCP integration)
+
+## GCP Storage Configuration
+
+### Creating a GCP Bucket
+
+1. Go to the [Google Cloud Console](https://console.cloud.google.com/)
+2. Navigate to "Cloud Storage" > "Buckets"
+3. Click "CREATE BUCKET"
+4. Enter a globally unique name
+5. Choose your preferred region
+6. Set access control to "Fine-grained"
+7. Click "CREATE"
+
+### Setting Bucket Permissions
+
+1. Click on your newly created bucket
+2. Go to the "Permissions" tab
+3. Click "GRANT ACCESS"
+4. Enter `allUsers` in the "New principals" field
+5. Select "Cloud Storage" > "Storage Object Viewer" for the role
+6. Click "SAVE"
+
+### Creating a Service Account
+
+1. Navigate to "IAM & Admin" > "Service Accounts"
+2. Click "CREATE SERVICE ACCOUNT"
+3. Enter a name and description
+4. Add the "Storage Object Admin" role
+5. Create a key (JSON format)
+6. Download the key file
+
+### Troubleshooting GCP Storage
+
+- Check that your service account has the correct permissions
+- Verify that the credentials file path is correct and accessible
+- Ensure the bucket exists and is publicly readable
+- Check the logs for detailed error messages
+- When using Docker, make sure the credentials file is mounted correctly
diff --git a/app/tools/image_generator/__init__.py b/app/tools/image_generator/__init__.py
diff --git a/app/tools/image_generator/core.py b/app/tools/image_generator/core.py
@@ -0,0 +1,69 @@
+from app.services.logger import setup_logger
+from app.tools.image_generator.tools import ImageGenerator, ImageGeneratorArgs
+from app.api.error_utilities import ImageHandlerError, ToolExecutorError
+
+logger = setup_logger(__name__)
+
+def executor(
+    prompt: str,
+    subject: str = None,
+    grade_level: str = None,
+    lang: str = "en",
+    verbose: bool = False
+):
+    """
+    Executor function for the Image Generator tool.
+
+    Args:
+        prompt (str): The text prompt to generate an image from.
+        subject (str, optional): The educational subject (e.g., 'math', 'science').
+        grade_level (str, optional): The grade level (e.g., 'elementary', 'middle school', 'high school').
+        lang (str, optional): The language for text in the image. Defaults to "en".
+        verbose (bool, optional): Flag for verbose logging. Defaults to False.
+
+    Returns:
+        dict: Generated image data including base64 encoded image and metadata.
+              If GCP storage is configured, the result will also include a gcp_url field.
+
+    Raises:
+        ToolExecutorError: If there's an error in the image generation process.
+    """
+    try:
+        if verbose:
+            logger.info(f"Generating image with prompt: {prompt}")
+            if subject:
+                logger.info(f"Subject: {subject}")
+            if grade_level:
+                logger.info(f"Grade level: {grade_level}")
+            logger.info(f"Language: {lang}")
+
+        # Create arguments for the image generator
+        image_generator_args = ImageGeneratorArgs(
+            prompt=prompt,
+            subject=subject,
+            grade_level=grade_level,
+            lang=lang
+        )
+
+        # Initialize the image generator
+        generator = ImageGenerator(args=image_generator_args, verbose=verbose)
+
+        # Generate the image
+        result = generator.generate_educational_image()
+
+        # Log success
+        logger.info(f"Image generated successfully for prompt: {prompt}")
+
+        # Return the result as a dictionary
+        # Use model_dump() instead of dict() for Pydantic v2 compatibility
+        return result.model_dump()
+
+    except ImageHandlerError as e:
+        error_message = str(e)
+        logger.error(f"Image Handler Error: {error_message}")
+        raise ToolExecutorError(error_message)
+
+    except Exception as e:
+        error_message = f"Error in Image Generator: {str(e)}"
+        logger.error(error_message)
+        raise ToolExecutorError(error_message)
diff --git a/app/tools/image_generator/metadata.json b/app/tools/image_generator/metadata.json
@@ -0,0 +1,36 @@
+{
+    "name": "Image Generator",
+    "description": "Generate educational images from text prompts using Black Forest Labs Flux 1.1 Pro API.",
+    "version": "1.0.0",
+    "inputs": [
+        {
+            "name": "prompt",
+            "type": "string",
+            "description": "The text prompt to generate an image from",
+            "required": true
+        },
+        {
+            "name": "subject",
+            "type": "string",
+            "description": "The educational subject (e.g., 'math', 'science')",
+            "required": false
+        },
+        {
+            "name": "grade_level",
+            "type": "string",
+            "description": "The grade level (e.g., 'elementary', 'middle school', 'high school')",
+            "required": false
+        },
+        {
+            "name": "lang",
+            "type": "string",
+            "description": "The language for text in the image",
+            "required": false,
+            "default": "en"
+        }
+    ],
+    "output": {
+        "type": "object",
+        "description": "Generated image data including base64 encoded image, metadata, and optional GCP storage URL"
+    }
+}
diff --git a/app/tools/image_generator/prompt/image-generator-prompt.txt b/app/tools/image_generator/prompt/image-generator-prompt.txt
@@ -0,0 +1,28 @@
+You are an expert educational visual designer specializing in creating high-quality images for classroom instruction. Your task is to generate clear, precise, pedagogically effective and high-quality images based on the provided prompt.
+
+INSTRUCTIONS:
+1. CLARITY: Create images with clear visual hierarchy, proper labeling, and appropriate text size for classroom visibility.
+2. EDUCATIONAL ACCURACY: Ensure all content is factually correct and aligned with educational standards.
+3. PEDAGOGICAL EFFECTIVENESS: Design images that support specific learning objectives and cognitive processes and are helpful for teaching or learning the subject matter.
+4. ACCESSIBILITY: Use high contrast, colorblind-friendly palettes, and clear distinctions between elements.
+5. AGE APPROPRIATENESS: Adjust complexity and style to match the developmental stage of the specified grade level.
+6. SAFETY: Ensure the image does not contain any inappropriate content.
+7. FOCUS: Keep the design clean and free of unnecessary elements by focusing on the core learning objective.
+8. TEXT CORRECTNESS: Ensure that all text is correctly spelled and grammatically correct.
+
+PROMPT: {prompt}
+
+EDUCATIONAL CONTEXT: {educational_context}
+
+LANGUAGE: {lang}
+
+DESIGN CHECKLIST:
+- Does the image directly address the learning objective?
+- Are all visual elements necessary and purposeful?
+- Is text clear, concise, and appropriately sized for classroom viewing?
+- Does the design use color strategically to enhance understanding?
+- Are relationships between concepts clearly visualized?
+- Does the image avoid visual clutter and unnecessary decoration?
+- Is the content developmentally appropriate for the specified grade level?
+
+Generate an image that educators can effectively use to explain concepts, demonstrate processes, or illustrate examples in their classroom teaching.