From 16ab0c1c2861d02b84f5cb7102c54b8b0ada57b0 Mon Sep 17 00:00:00 2001 From: melisa48 <60372223+melisa48@users.noreply.github.com> Date: Mon, 24 Feb 2025 15:19:38 -0800 Subject: [PATCH 1/2] Enhance syllabus generator with hybrid model and improved prompts --- app/services/schemas.py | 3 + app/tools/syllabus_generator/core.py | 45 +-- .../syllabus_generator/tests/test_core.py | 266 +++++----------- app/tools/syllabus_generator/tools.py | 294 ++++++++---------- 4 files changed, 238 insertions(+), 370 deletions(-) diff --git a/app/services/schemas.py b/app/services/schemas.py index c3c69634..dda47587 100644 --- a/app/services/schemas.py +++ b/app/services/schemas.py @@ -92,6 +92,9 @@ class SyllabusGeneratorArgsModel(BaseModel): file_url: str file_type: str lang: Optional[str] = "en" + unit_time: Optional[str] = "Week" # New + unit_time_value: Optional[int] = 10 # New + start_date: Optional[str] = "2025-03-01" # New class AIResistantArgs(BaseModel): assignment: str = Field(..., max_length=255, description="The given assignment") diff --git a/app/tools/syllabus_generator/core.py b/app/tools/syllabus_generator/core.py index c3d73d66..ab415df8 100644 --- a/app/tools/syllabus_generator/core.py +++ b/app/tools/syllabus_generator/core.py @@ -2,8 +2,8 @@ from app.tools.syllabus_generator.tools import SyllabusRequestArgs from app.tools.syllabus_generator.tools import generate_syllabus from app.utils.document_loaders_summarization import ( - generate_summary_from_img, - summarize_transcript_youtube_url, + generate_summary_from_img, + summarize_transcript_youtube_url, get_summary ) from app.api.error_utilities import SyllabusGeneratorError @@ -23,13 +23,16 @@ def executor(grade_level: str, file_url: str, file_type: str, lang: str, + unit_time: str = "Week", # New: Default unit of time + unit_time_value: int = 10, # New: Default unit value + start_date: str = "2025-03-01", # New: Default start date verbose: bool = True): if verbose: logger.info(f"File URL loaded: {file_url}") try: - + # Generate summary based on file type if file_type == 'img': summary = generate_summary_from_img(file_url) elif file_type == 'youtube_url': @@ -37,24 +40,30 @@ def executor(grade_level: str, else: summary = get_summary(file_url, file_type, verbose=verbose) + # Update SyllabusGeneratorArgsModel to include new fields syllabus_args_model = SyllabusGeneratorArgsModel( - grade_level = grade_level, - subject = subject, - course_description = course_description, - objectives = objectives, - required_materials = required_materials, - grading_policy = grading_policy, - policies_expectations = policies_expectations, - course_outline = course_outline, - additional_notes = additional_notes, - file_url = file_url, - file_type = file_type, - lang = lang + grade_level=grade_level, + subject=subject, + course_description=course_description, + objectives=objectives, + required_materials=required_materials, + grading_policy=grading_policy, + policies_expectations=policies_expectations, + course_outline=course_outline, + additional_notes=additional_notes, + file_url=file_url, + file_type=file_type, + lang=lang, + # Pass new fields to the model (assumes model is updated) + unit_time=unit_time, + unit_time_value=unit_time_value, + start_date=start_date ) request_args = SyllabusRequestArgs( - syllabus_args_model, - summary) + syllabus_args_model, + summary + ) syllabus = generate_syllabus(request_args, verbose=verbose) @@ -62,4 +71,4 @@ def executor(grade_level: str, logger.error(f"Failed to generate syllabus: {str(e)}") raise SyllabusGeneratorError(f"Failed to generate syllabus: {str(e)}") from e - return syllabus + return syllabus \ No newline at end of file diff --git a/app/tools/syllabus_generator/tests/test_core.py b/app/tools/syllabus_generator/tests/test_core.py index 38584dcb..b8586f62 100644 --- a/app/tools/syllabus_generator/tests/test_core.py +++ b/app/tools/syllabus_generator/tests/test_core.py @@ -1,11 +1,10 @@ import pytest from app.api.error_utilities import SyllabusGeneratorError from app.tools.syllabus_generator.core import executor - from dotenv import load_dotenv, find_dotenv + load_dotenv(find_dotenv()) -# Base attributes reused across all tests base_attributes = { "grade_level": "5th grade", "subject": "Math", @@ -19,7 +18,7 @@ "lang": "en" } -# PDF Tests +# Test 1: Basic functionality with PDF def test_executor_pdf_url_valid(): syllabus = executor( **base_attributes, @@ -27,206 +26,95 @@ def test_executor_pdf_url_valid(): file_type="pdf" ) assert isinstance(syllabus, dict) - -def test_executor_pdf_url_invalid(): - with pytest.raises(SyllabusGeneratorError) as exc_info: - executor( - **base_attributes, - file_url="https://filesamples.com/samples/document/pdf/sample1.pdf", - file_type=1 - ) - assert isinstance(exc_info.value, SyllabusGeneratorError) - -# CSV Tests -def test_executor_csv_url_valid(): - syllabus = executor( - **base_attributes, - file_url="https://filesamples.com/samples/document/csv/sample1.csv", - file_type="csv" - ) - assert isinstance(syllabus, dict) - -def test_executor_csv_url_invalid(): - with pytest.raises(SyllabusGeneratorError) as exc_info: - executor( - **base_attributes, - file_url="https://filesamples.com/samples/document/csv/sample1.csv", - file_type=1 - ) - assert isinstance(exc_info.value, SyllabusGeneratorError) - -# TXT Tests -def test_executor_txt_url_valid(): + # Check all expected sections are present + expected_sections = [ + "course_information", "learning_outcomes", "course_content", + "assessment_criteria", "course_schedule", "learning_resources", + "policies_procedures" + ] + for section in expected_sections: + assert section in syllabus, f"Missing section: {section}" + # Basic content check + assert "arithmetic" in syllabus["course_information"].lower(), "Course info should mention arithmetic" + +# Test 2: Default values for new fields +def test_executor_with_defaults(): syllabus = executor( **base_attributes, file_url="https://filesamples.com/samples/document/txt/sample1.txt", file_type="txt" ) assert isinstance(syllabus, dict) - -def test_executor_txt_url_invalid(): - with pytest.raises(SyllabusGeneratorError) as exc_info: - executor( - **base_attributes, - file_url="https://filesamples.com/samples/document/txt/sample1.txt", - file_type=1 - ) - assert isinstance(exc_info.value, SyllabusGeneratorError) - -# MD Tests -def test_executor_md_url_valid(): + assert "course_schedule" in syllabus + # Validate defaults in course_schedule + schedule_str = str(syllabus["course_schedule"]).lower() + assert "week" in schedule_str, "Should use default unit_time 'Week'" + assert "2025-03-01" in schedule_str, "Should use default start_date '2025-03-01'" + # Check learning outcomes (5-7 expected) + assert "learning_outcomes" in syllabus + outcomes = syllabus["learning_outcomes"] + assert isinstance(outcomes, list) and 5 <= len(outcomes) <= 7, "Should have 5-7 outcomes" + +# Test 3: Custom values for new fields +def test_executor_with_custom_time(): syllabus = executor( **base_attributes, - file_url="https://github.com/radicalxdev/kai-ai-backend/blob/main/README.md", - file_type="md" - ) - assert isinstance(syllabus, dict) - -def test_executor_md_url_invalid(): - with pytest.raises(SyllabusGeneratorError) as exc_info: - executor( - **base_attributes, - file_url="https://github.com/radicalxdev/kai-ai-backend/blob/main/README.md", - file_type=1 - ) - assert isinstance(exc_info.value, SyllabusGeneratorError) - -# PPTX Tests -def test_executor_pptx_url_valid(): - syllabus = executor( - **base_attributes, - file_url="https://scholar.harvard.edu/files/torman_personal/files/samplepptx.pptx", - file_type="pptx" - ) - assert isinstance(syllabus, dict) - -def test_executor_pptx_url_invalid(): - with pytest.raises(SyllabusGeneratorError) as exc_info: - executor( - **base_attributes, - file_url="https://scholar.harvard.edu/files/torman_personal/files/samplepptx.pptx", - file_type=1 - ) - assert isinstance(exc_info.value, SyllabusGeneratorError) - -# DOCX Tests -def test_executor_docx_url_valid(): - syllabus = executor( - **base_attributes, - file_url="https://filesamples.com/samples/document/docx/sample1.docx", - file_type="docx" - ) - assert isinstance(syllabus, dict) - -def test_executor_docx_url_invalid(): - with pytest.raises(SyllabusGeneratorError) as exc_info: - executor( - **base_attributes, - file_url="https://filesamples.com/samples/document/docx/sample1.docx", - file_type=1 - ) - assert isinstance(exc_info.value, SyllabusGeneratorError) - -# XLS Tests -def test_executor_xls_url_valid(): - syllabus = executor( - **base_attributes, - file_url="https://filesamples.com/samples/document/xls/sample1.xls", - file_type="xls" - ) - assert isinstance(syllabus, dict) - -def test_executor_xls_url_invalid(): - with pytest.raises(SyllabusGeneratorError) as exc_info: - executor( - **base_attributes, - file_url="https://filesamples.com/samples/document/xls/sample1.xls", - file_type=1 - ) - assert isinstance(exc_info.value, SyllabusGeneratorError) - -# XLSX Tests -def test_executor_xlsx_url_valid(): - syllabus = executor( - **base_attributes, - file_url="https://filesamples.com/samples/document/xlsx/sample1.xlsx", - file_type="xlsx" + file_url="https://filesamples.com/samples/document/txt/sample1.txt", + file_type="txt", + unit_time="Day", + unit_time_value=5, + start_date="2025-04-01" ) assert isinstance(syllabus, dict) - -def test_executor_xlsx_url_invalid(): - with pytest.raises(SyllabusGeneratorError) as exc_info: - executor( - **base_attributes, - file_url="https://filesamples.com/samples/document/xlsx/sample1.xlsx", - file_type=1 - ) - assert isinstance(exc_info.value, SyllabusGeneratorError) - -# XML Tests -def test_executor_xml_url_invalid(): - with pytest.raises(SyllabusGeneratorError) as exc_info: - executor( - **base_attributes, - file_url="https://filesampleshub.com/download/code/xml/dummy.xml", - file_type=1 - ) - assert isinstance(exc_info.value, SyllabusGeneratorError) - -# GDocs Tests -def test_executor_gdocs_url_invalid(): - with pytest.raises(SyllabusGeneratorError) as exc_info: - executor( - **base_attributes, - file_url="https://docs.google.com/document/d/1OWQfO9LX6psGipJu9LabzNE22us1Ct/edit", - file_type=1 - ) - assert isinstance(exc_info.value, SyllabusGeneratorError) - -# GSheets Tests -def test_executor_gsheets_url_invalid(): - with pytest.raises(SyllabusGeneratorError) as exc_info: - executor( - **base_attributes, - file_url="https://docs.google.com/spreadsheets/d/16OPtLLSfU/edit", - file_type=1 - ) - assert isinstance(exc_info.value, SyllabusGeneratorError) - -# GSlides Tests -def test_executor_gslides_url_invalid(): - with pytest.raises(SyllabusGeneratorError) as exc_info: - executor( - **base_attributes, - file_url="https://docs.google.com/spreadsheets/d/16OPtLLSfU/edit", - file_type=1 - ) - assert isinstance(exc_info.value, SyllabusGeneratorError) - -# GPDFs Tests -def test_executor_gpdfs_url_valid(): + assert "course_schedule" in syllabus + # Validate custom values in course_schedule + schedule_str = str(syllabus["course_schedule"]).lower() + assert "day" in schedule_str, "Should reflect custom unit_time 'Day'" + assert "2025-04-01" in schedule_str, "Should reflect custom start_date '2025-04-01'" + # Check course_content reflects unit_time_value + content_str = str(syllabus["course_content"]).lower() + assert len(syllabus["course_content"]) <= 5, "Course content should respect unit_time_value of 5" + +# Test 4: Dependency check (objectives influencing learning_outcomes) +def test_executor_objectives_dependency(): syllabus = executor( **base_attributes, - file_url="https://drive.google.com/file/d/1fUj1uWIMh6QZsPkt0Vs7mEd2VEqz3O8l/view", - file_type="gpdf" + file_url="https://filesamples.com/samples/document/txt/sample1.txt", + file_type="txt", + objectives="Master addition; Understand fractions" ) + assert "learning_outcomes" in syllabus + outcomes_str = str(syllabus["learning_outcomes"]).lower() + assert "addition" in outcomes_str, "Learning outcomes should reflect objectives" + # Note: This assumes you enhanced learning_outcomes_prompt to use {objectives} + +# Test 5: Minimal input with defaults +def test_executor_minimal_input(): + minimal_attributes = { + "grade_level": "6th grade", + "subject": "Science", + "course_description": "Intro to biology", + "objectives": "Learn basics", + "required_materials": "Textbook", + "grading_policy": "50% tests", + "policies_expectations": "Attend class", + "course_outline": "Week 1: Cells", + "additional_notes": "", + "lang": "en", + "file_url": "https://filesamples.com/samples/document/txt/sample1.txt", + "file_type": "txt" + } + syllabus = executor(**minimal_attributes) assert isinstance(syllabus, dict) + assert all(section in syllabus for section in [ + "course_information", "course_schedule" + ]), "Should generate all sections with minimal input" -def test_executor_gpdfs_url_invalid(): - with pytest.raises(SyllabusGeneratorError) as exc_info: +# Test 6: Error handling for invalid file_type +def test_executor_invalid_file_type(): + with pytest.raises(SyllabusGeneratorError): executor( **base_attributes, - file_url="https://drive.google.com/file/d/1fUj1uWIMh6QZsPkt0Vs7mEd2VEqz3O8l/view", - file_type=1 - ) - assert isinstance(exc_info.value, SyllabusGeneratorError) - -# MP3 Tests -def test_executor_mp3_url_invalid(): - with pytest.raises(SyllabusGeneratorError) as exc_info: - executor( - **base_attributes, - file_url="https://raw.githubusercontent.com/asleem/uploaded_files/main/dummy.mp3", - file_type=1 - ) - assert isinstance(exc_info.value, SyllabusGeneratorError) \ No newline at end of file + file_url="https://filesamples.com/samples/document/pdf/sample1.pdf", + file_type="invalid_type" + ) \ No newline at end of file diff --git a/app/tools/syllabus_generator/tools.py b/app/tools/syllabus_generator/tools.py index 3757e099..b19375f2 100644 --- a/app/tools/syllabus_generator/tools.py +++ b/app/tools/syllabus_generator/tools.py @@ -5,6 +5,7 @@ from langchain_google_genai import GoogleGenerativeAI from langchain_core.prompts import PromptTemplate from langchain_core.runnables import RunnableParallel +from langchain.chains import SequentialChain from app.services.schemas import SyllabusGeneratorArgsModel from fastapi import HTTPException @@ -23,6 +24,9 @@ def __init__(self, syllabus_generator_args: SyllabusGeneratorArgsModel, summary: self._additional_notes = syllabus_generator_args.additional_notes self._lang = syllabus_generator_args.lang self._summary = summary + self._unit_time = syllabus_generator_args.unit_time + self._unit_time_value = syllabus_generator_args.unit_time_value + self._start_date = syllabus_generator_args.start_date def to_dict(self) -> dict: return { @@ -37,181 +41,145 @@ def to_dict(self) -> dict: "additional_notes": self._additional_notes, "lang": self._lang, "summary": self._summary, + "unit_time": self._unit_time, + "unit_time_value": self._unit_time_value, + "start_date": self._start_date } class SyllabusGeneratorPipeline: def __init__(self, verbose=False): self.verbose = verbose self.model = GoogleGenerativeAI(model="gemini-1.5-pro") - self.parsers = { - "course_information": JsonOutputParser(pydantic_object=CourseInformation), - "course_description_objectives": JsonOutputParser(pydantic_object=CourseDescriptionObjectives), - "course_content": JsonOutputParser(pydantic_object=CourseContentItem), - "policies_procedures": JsonOutputParser(pydantic_object=PoliciesProcedures), - "assessment_grading_criteria": JsonOutputParser(pydantic_object=AssessmentGradingCriteria), - "learning_resources": JsonOutputParser(pydantic_object=LearningResource), - "course_schedule": JsonOutputParser(pydantic_object=CourseScheduleItem), - } def compile(self): - try: - prompts = { - "course_information": PromptTemplate( - template=( - "Generate a detailed and structured course information in {lang} based on:\n\n" - "Grade Level: {grade_level}\n" - "Subject: {subject}\n" - "Course Description: {course_description}\n" - "Summary: {summary}\n\n" - "Ensure the response is professional and comprehensive.\n{format_instructions}" - ), - input_variables=["grade_level", "subject", "course_description", "lang", "summary"], - partial_variables={"format_instructions": self.parsers["course_information"].get_format_instructions()}, - ), - "course_description_objectives": PromptTemplate( - template=( - "Develop detailed course objectives and intended learning outcomes in {lang}:\n\n" - "Objectives: {objectives}\n" - "Summary: {summary}\n\n" - "Provide measurable goals and realistic expectations for students.\n{format_instructions}" - ), - input_variables=["objectives", "lang", "summary"], - partial_variables={"format_instructions": self.parsers["course_description_objectives"].get_format_instructions()}, - ), - "course_content": PromptTemplate( - template=( - "Create a detailed course content structure in {lang}:\n\n" - "Course Outline: {course_outline}\n" - "Summary: {summary}\n\n" - "Include topics, time frames, and key learning points.\n{format_instructions}" - ), - input_variables=["course_outline", "lang", "summary"], - partial_variables={"format_instructions": self.parsers["course_content"].get_format_instructions()}, - ), - "policies_procedures": PromptTemplate( - template=( - "Draft clear and professional course policies and procedures in {lang}:\n\n" - "Grading Policy: {grading_policy}\n" - "Class Policies and Expectations: {policies_expectations}\n" - "Summary: {summary}\n\n" - "Ensure all rules and expectations are outlined clearly.\n{format_instructions}" - ), - input_variables=["grading_policy", "policies_expectations", "lang", "summary"], - partial_variables={"format_instructions": self.parsers["policies_procedures"].get_format_instructions()}, - ), - "assessment_grading_criteria": PromptTemplate( - template=( - "Define assessment methods and grading criteria in {lang}:\n\n" - "Grading Policy: {grading_policy}\n" - "Summary: {summary}\n\n" - "Ensure that assessment methods and the grading scale are precise and easy to understand.\n{format_instructions}" - ), - input_variables=["grading_policy", "lang", "summary"], - partial_variables={"format_instructions": self.parsers["assessment_grading_criteria"].get_format_instructions()}, - ), - "learning_resources": PromptTemplate( - template=( - "Generate a comprehensive list of recommended learning resources in {lang}:\n\n" - "Required Materials: {required_materials}\n" - "Summary: {summary}\n\n" - "Include titles, authors, and publication years of the materials.\n{format_instructions}" - ), - input_variables=["required_materials", "lang", "summary"], - partial_variables={"format_instructions": self.parsers["learning_resources"].get_format_instructions()}, - ), - "course_schedule": PromptTemplate( - template=( - "Construct a detailed course schedule in {lang}:\n\n" - "Course Outline: {course_outline}\n" - "Summary: {summary}\n\n" - "Ensure the schedule includes dates, activities, and key topics.\n{format_instructions}" - ), - input_variables=["course_outline", "lang", "summary"], - partial_variables={"format_instructions": self.parsers["course_schedule"].get_format_instructions()}, - ), - } - - chains = { - key: prompt | self.model | self.parsers[key] - for key, prompt in prompts.items() - } - - parallel_pipeline = RunnableParallel(branches=chains) - - if self.verbose: - logger.info("Successfully compiled the parallel pipeline.") - - except Exception as e: - logger.error(f"Failed to compile LLM pipeline: {e}") - raise HTTPException(status_code=500, detail="Failed to compile LLM pipeline.") - - return parallel_pipeline + # Sequential prompts with enhanced templates + course_info_prompt = PromptTemplate( + template=( + "Generate a structured course overview for {grade_level} {subject} in {lang}.\n" + "Include:\n- A brief course description based on: {course_description}\n" + "- Core topics derived from: {course_outline}\n" + "- Target learners (e.g., age group, prior knowledge) for {grade_level}.\n" + "Respond as a JSON object." + ), + input_variables=["grade_level", "subject", "course_description", "course_outline", "lang"] + ) + + learning_outcomes_prompt = PromptTemplate( + template=( + "Based on this course information: {course_info}, create 5-7 key learning outcomes " + "in {lang} that students should achieve by the end of the course. Make them specific, " + "measurable, and aligned with the course description and core topics.\n" + "Respond as a JSON object." + ), + input_variables=["course_info", "lang"] + ) + + course_content_prompt = PromptTemplate( + template=( + "Using these learning outcomes: {learning_outcomes}, generate a structured course plan " + "in {lang} for {unit_time_value} {unit_time}s. Include topics, subtopics, and estimated " + "time allocation for each, ensuring alignment with the outcomes.\n" + "Respond as a JSON object." + ), + input_variables=["learning_outcomes", "unit_time", "unit_time_value", "lang"] + ) + + assessment_criteria_prompt = PromptTemplate( + template=( + "Given this course content: {course_content}, develop a fair and balanced grading policy " + "in {lang}. Include different assessment types (e.g., homework, quizzes, exams) and their " + "respective weightage, ensuring they align with the course structure.\n" + "Respond as a JSON object." + ), + input_variables=["course_content", "lang"] + ) + + course_schedule_prompt = PromptTemplate( + template=( + "Using this course content: {course_content} and assessment criteria: {assessment_criteria}, " + "create a detailed course schedule in {lang} starting on {start_date} for {unit_time_value} " + "{unit_time}s. Ensure assessments align with learning progression and include specific dates.\n" + "Respond as a JSON object." + ), + input_variables=["course_content", "assessment_criteria", "start_date", "unit_time", "unit_time_value", "lang"] + ) + + # Parallel prompts (independent sections) + learning_resources_prompt = PromptTemplate( + template=( + "Suggest relevant learning resources (textbooks, websites, articles, etc.) in {lang} " + "for a {grade_level} {subject} course based on: {required_materials} and {summary}.\n" + "Respond as a JSON object." + ), + input_variables=["grade_level", "subject", "required_materials", "summary", "lang"] + ) + + policies_procedures_prompt = PromptTemplate( + template=( + "Outline course policies and procedures in {lang}, including attendance, late submissions, " + "academic integrity, and classroom conduct, based on: {policies_expectations} and {course_info}.\n" + "Respond as a JSON object." + ), + input_variables=["policies_expectations", "course_info", "lang"] + ) + + # Sequential chain + course_info_chain = course_info_prompt | self.model + learning_outcomes_chain = learning_outcomes_prompt | self.model + course_content_chain = course_content_prompt | self.model + assessment_criteria_chain = assessment_criteria_prompt | self.model + course_schedule_chain = course_schedule_prompt | self.model + + sequential_chain = SequentialChain( + chains=[ + course_info_chain, + learning_outcomes_chain, + course_content_chain, + assessment_criteria_chain, + course_schedule_chain + ], + input_variables=["grade_level", "subject", "course_description", "course_outline", "lang", + "unit_time", "unit_time_value", "start_date"], + output_variables=["course_info", "learning_outcomes", "course_content", + "assessment_criteria", "course_schedule"] + ) + + # Parallel chain + parallel_chain = RunnableParallel( + learning_resources=learning_resources_prompt | self.model, + policies_procedures=policies_procedures_prompt | self.model + ) + + hybrid_pipeline = { + "sequential": sequential_chain, + "parallel": parallel_chain + } + + if self.verbose: + logger.info("Compiled enhanced hybrid syllabus generation pipeline.") + return hybrid_pipeline def generate_syllabus(request_args: SyllabusRequestArgs, verbose=True): try: pipeline = SyllabusGeneratorPipeline(verbose=verbose) - chain = pipeline.compile() - outputs = chain.invoke(request_args.to_dict()) - model = SyllabusSchema( - course_information=outputs["branches"]["course_information"], - course_description_objectives=outputs["branches"]["course_description_objectives"], - course_content=outputs["branches"]["course_content"], - policies_procedures=outputs["branches"]["policies_procedures"], - assessment_grading_criteria=outputs["branches"]["assessment_grading_criteria"], - learning_resources=outputs["branches"]["learning_resources"], - course_schedule=outputs["branches"]["course_schedule"], - ) - return dict(model) + hybrid_chain = pipeline.compile() + + inputs = request_args.to_dict() + sequential_output = hybrid_chain["sequential"].invoke(inputs) + parallel_output = hybrid_chain["parallel"].invoke(inputs) + + syllabus = { + "course_information": sequential_output["course_info"], + "learning_outcomes": sequential_output["learning_outcomes"], + "course_content": sequential_output["course_content"], + "assessment_criteria": sequential_output["assessment_criteria"], + "course_schedule": sequential_output["course_schedule"], + "learning_resources": parallel_output["learning_resources"], + "policies_procedures": parallel_output["policies_procedures"] + } + + return syllabus except Exception as e: logger.error(f"Failed to generate syllabus: {e}") - raise HTTPException(status_code=500, detail="Failed to generate syllabus from LLM.") - - -class CourseInformation(BaseModel): - course_title: str = Field(description="The course title") - grade_level: str = Field(description="The grade level") - description: str = Field(description="The course description") - -class CourseDescriptionObjectives(BaseModel): - objectives: List[str] = Field(description="The course objectives") - intended_learning_outcomes: List[str] = Field(description="The intended learning outcomes of the course") - -class CourseContentItem(BaseModel): - unit_time: str = Field(description="The unit of time for the course content") - unit_time_value: int = Field(description="The unit of time value for the course content") - topic: str = Field(description="The topic per unit of time for the course content") - -class PoliciesProcedures(BaseModel): - attendance_policy: str = Field(description="The attendance policy of the class") - late_submission_policy: str = Field(description="The late submission policy of the class") - academic_honesty: str = Field(description="The academic honesty policy of the class") - -class AssessmentMethod(BaseModel): - type_assessment: str = Field(description="The type of assessment") - weight: int = Field(description="The weight of the assessment in the final grade") - -class AssessmentGradingCriteria(BaseModel): - assessment_methods: List[AssessmentMethod] = Field(description="The assessment methods") - grading_scale: dict = Field(description="The grading scale") - -class LearningResource(BaseModel): - title: str = Field(description="The book title of the learning resource") - author: str = Field(description="The book author of the learning resource") - year: int = Field(description="The year of creation of the book") - -class CourseScheduleItem(BaseModel): - unit_time: str = Field(description="The unit of time for the course schedule item") - unit_time_value: int = Field(description="The unit of time value for the course schedule item") - date: str = Field(description="The date for the course schedule item") - topic: str = Field(description="The topic for the learning resource") - activity_desc: str = Field(description="The descrition of the activity for the learning resource") - -class SyllabusSchema(BaseModel): - course_information: CourseInformation = Field(description="The course information") - course_description_objectives: CourseDescriptionObjectives = Field(description="The objectives of the course") - course_content: List[CourseContentItem] = Field(description="The content of the course") - policies_procedures: PoliciesProcedures = Field(description="The policies procedures of the course") - assessment_grading_criteria: AssessmentGradingCriteria = Field(description="The asssessment grading criteria of the course") - learning_resources: List[LearningResource] = Field(description="The learning resources of the course") - course_schedule: List[CourseScheduleItem] = Field(description="The course schedule") \ No newline at end of file + raise HTTPException(status_code=500, detail="Syllabus generation failed.") \ No newline at end of file From 9ca7f60dff221d319c92666834cdb12949ff4e87 Mon Sep 17 00:00:00 2001 From: melisa48 <60372223+melisa48@users.noreply.github.com> Date: Thu, 13 Mar 2025 15:16:07 -0700 Subject: [PATCH 2/2] Mission 1 presentation generator. --- app/api/router.py | 80 ++++++- app/main.py | 50 ----- app/services/schemas.py | 56 ++++- app/tools/presentation_generator/core.py | 99 ++++---- .../presentation_generator/metadata.json | 50 +---- .../prompt/presentation-generator-outline.txt | 17 ++ .../prompt/presentation-generator-slides.txt | 26 +++ .../presentation_generator/tests/test_core.py | 196 ++++------------ app/tools/presentation_generator/tools.py | 212 ++++++++++-------- .../presentation_generator/tools/__init__.py | 0 .../tools/outline_generator.py | 86 +++++++ .../tools/slides_generator.py | 123 ++++++++++ app/tools/utils/tool_utilities.py | 10 +- app/tools/utils/tools_config.json | 8 + 14 files changed, 606 insertions(+), 407 deletions(-) delete mode 100644 app/main.py create mode 100644 app/tools/presentation_generator/prompt/presentation-generator-outline.txt create mode 100644 app/tools/presentation_generator/prompt/presentation-generator-slides.txt create mode 100644 app/tools/presentation_generator/tools/__init__.py create mode 100644 app/tools/presentation_generator/tools/outline_generator.py create mode 100644 app/tools/presentation_generator/tools/slides_generator.py diff --git a/app/api/router.py b/app/api/router.py index 4e4baa73..8255970b 100644 --- a/app/api/router.py +++ b/app/api/router.py @@ -9,16 +9,92 @@ from app.services.logger import setup_logger from app.api.error_utilities import InputValidationError, ErrorResponse from app.tools.utils.tool_utilities import load_tool_metadata, execute_tool, finalize_inputs -from fastapi.responses import FileResponse -from starlette.background import BackgroundTask +from app.tools.presentation_generator.tools.slides_generator import SlidesGenerator +import uuid +from fastapi import FastAPI logger = setup_logger(__name__) router = APIRouter() +app = FastAPI() + +# Initialize presentation contexts in app state if not exists +if not hasattr(app.state, "presentation_contexts"): + app.state.presentation_contexts = {} @router.get("/") def read_root(): return {"Hello": "World"} +# Handles two-step presentation generation: +# 1. Generate outline with initial inputs +# 2. Generate slides using stored outline and inputs +@router.post("/generate-outline", response_model=Union[ToolResponse, ErrorResponse]) +async def generate_outline(data: ToolRequest, _ = Depends(key_check)): + try: + # Execute outline generation and store context for slides + request_data = data.tool_data + requested_tool = load_tool_metadata(request_data.tool_id) + request_inputs_dict = finalize_inputs(request_data.inputs, requested_tool['inputs']) + result = execute_tool(request_data.tool_id, request_inputs_dict) + + # Store in app state, to use as context for slides generation + presentation_id = str(uuid.uuid4()) + app.state.presentation_contexts[presentation_id] = { + "outline": result, + "inputs": request_inputs_dict + } + + return ToolResponse(data={ + "outline": result, + "presentation_id": presentation_id + }) + + except InputValidationError as e: + logger.error(f"InputValidationError: {e}") + return JSONResponse( + status_code=400, + content=jsonable_encoder(ErrorResponse(status=400, message=e.message)) + ) + + except HTTPException as e: + logger.error(f"HTTPException: {e}") + return JSONResponse( + status_code=e.status_code, + content=jsonable_encoder(ErrorResponse(status=e.status_code, message=e.detail)) + ) + +@router.post("/generate-slides/{presentation_id}", response_model=Union[ToolResponse, ErrorResponse]) +async def generate_slides(presentation_id: str, _ = Depends(key_check)): + try: + # Retrieve stored context and generate slides + context = app.state.presentation_contexts.get(presentation_id) + if not context: + raise HTTPException( + status_code=404, + detail="Presentation context not found" + ) + + slides = SlidesGenerator( + outline=context["outline"], + inputs=context["inputs"] + ).compile() + + return ToolResponse(data=slides) + + except InputValidationError as e: + logger.error(f"InputValidationError: {e}") + return JSONResponse( + status_code=400, + content=jsonable_encoder(ErrorResponse(status=400, message=e.message)) + ) + + except HTTPException as e: + logger.error(f"HTTPException: {e}") + return JSONResponse( + status_code=e.status_code, + content=jsonable_encoder(ErrorResponse(status=e.status_code, message=e.detail)) + ) + @router.post("/submit-tool", response_model=Union[ToolResponse, ErrorResponse]) async def submit_tool( data: ToolRequest, _ = Depends(key_check)): try: diff --git a/app/main.py b/app/main.py deleted file mode 100644 index 4a5c44e7..00000000 --- a/app/main.py +++ /dev/null @@ -1,50 +0,0 @@ -from fastapi import FastAPI, Request, Depends -from fastapi.responses import JSONResponse -from fastapi.exceptions import RequestValidationError -from fastapi.middleware.cors import CORSMiddleware -from contextlib import asynccontextmanager -from app.api.router import router -from app.services.logger import setup_logger -from app.api.error_utilities import ErrorResponse - -import os -from dotenv import load_dotenv, find_dotenv - -load_dotenv(find_dotenv()) - -logger = setup_logger(__name__) - -@asynccontextmanager -async def lifespan(app: FastAPI): - logger.info(f"Initializing Application Startup") - logger.info(f"Successfully Completed Application Startup") - - yield - logger.info("Application shutdown") - -app = FastAPI(lifespan = lifespan) -app.add_middleware( - CORSMiddleware, - allow_origins=["*"], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) - -@app.exception_handler(RequestValidationError) -async def validation_exception_handler(request: Request, exc: RequestValidationError): - errors = [] - for error in exc.errors(): - field = " -> ".join(str(loc) for loc in error['loc']) - message = error['msg'] - error_detail = f"Error in field '{field}': {message}" - errors.append(error_detail) - logger.error(error_detail) # Log the error details - - error_response = ErrorResponse(status=422, message=errors) - return JSONResponse( - status_code=422, - content=error_response.dict() - ) - -app.include_router(router) \ No newline at end of file diff --git a/app/services/schemas.py b/app/services/schemas.py index dda47587..b1a280ae 100644 --- a/app/services/schemas.py +++ b/app/services/schemas.py @@ -1,5 +1,5 @@ from pydantic import BaseModel, Field -from typing import Optional, List, Any, Literal, Union +from typing import Optional, List, Any, Literal, Union, Dict from enum import Enum from app.services.assistant_registry import AssistantInputs from app.services.tool_registry import BaseTool @@ -114,16 +114,15 @@ class ConnectWithThemArgs(BaseModel): lang: str = Field(..., description="The language in which the subject is being taught.") class PresentationGeneratorInput(BaseModel): - grade_level: str - n_slides: int - topic: str - objectives: str - additional_comments: str - objectives_file_url: str - objectives_file_type: str - additional_comments_file_url: str - additional_comments_file_type: str - lang: Optional[str] = "en" + instructionalLevel: str # Renamed from grade_level + slideCount: int # Renamed from n_slides + text: str # Renamed from topic + objectives: str = "" # Optional, kept as-is + additional_comments: str = "" # Optional, kept as-is + objectives_file_url: str = "" + objectives_file_type: str = "" + additional_comments_file_url: str = "" + additional_comments_file_type: str = "" class RubricGeneratorArgs(BaseModel): grade_level: Literal["pre-k", "kindergarten", "elementary", "middle", "high", "university", "professional"] @@ -156,4 +155,37 @@ class WritingFeedbackGeneratorArgs(BaseModel): criteria_file_type: str writing_to_review_file_url: str writing_to_review_file_type: str - lang: Optional[str] = "en" \ No newline at end of file + lang: Optional[str] = "en" + +# New output schemas for Presentation Generator +class SlideContent(BaseModel): + topic: str = Field(description="The topic or title of the slide") + content: Dict[str, Any] = Field( # Dict is now defined + description="Structured content of the slide", + example={ + "main_points": ["point 1", "point 2"], + "examples": ["example 1", "example 2"], + "details": "Additional explanation", + "visual_notes": "Suggested visuals" + } + ) + speaker_notes: str = Field( + description="Detailed notes for the presenter to assist in delivering the slide content", + example="Begin by introducing the topic, elaborate on the main points with examples, and transition smoothly to the next slide." + ) + class Config: + json_schema_extra = { + "example": { + "topic": "Introduction to World War II", + "content": { + "main_points": ["Causes of the war", "Key events"], + "examples": ["Treaty of Versailles", "Pearl Harbor"], + "details": "The war began in 1939...", + "visual_notes": "Timeline of events" + }, + "speaker_notes": "Start with a brief hook about global tensions, explain the causes concisely, and preview the key events we’ll cover next." + } + } + +class PresentationOutput(BaseModel): + slides: List[SlideContent] = Field(description="List of slides with their content and speaker notes") \ No newline at end of file diff --git a/app/tools/presentation_generator/core.py b/app/tools/presentation_generator/core.py index d738466a..c9b9314e 100644 --- a/app/tools/presentation_generator/core.py +++ b/app/tools/presentation_generator/core.py @@ -1,68 +1,71 @@ from app.utils.document_loaders import get_docs -from app.tools.presentation_generator.tools import PresentationGenerator -from app.services.schemas import PresentationGeneratorInput +from app.tools.outline_generator import executor as outline_executor +from app.tools.slides_generator import executor as slides_executor +from app.services.schemas import PresentationGeneratorArgs from app.services.logger import setup_logger from app.api.error_utilities import LoaderError, ToolExecutorError logger = setup_logger() -def executor(grade_level: str, - n_slides: int, - topic: str, - objectives: str, - additional_comments: str, - objectives_file_url: str, - objectives_file_type: str, - additional_comments_file_url: str, - additional_comments_file_type: str, - lang: str, - verbose=False): +def executor(instructionalLevel: str, + slideCount: int, + text: str, + objectives: str = "", + additional_comments: str = "", + objectives_file_url: str = "", + objectives_file_type: str = "", + additional_comments_file_url: str = "", + additional_comments_file_type: str = "", + verbose: bool = False): + """ + Execute the presentation generation process (outline only for this context). - try: - if(objectives_file_type): - logger.info(f"Generating docs. from {objectives_file_type}") - if(additional_comments_file_type): - logger.info(f"Generating docs. from {additional_comments_file_type}") - - docs = None + Args: + instructionalLevel (str): The educational level (e.g., Elementary, High School, University). + slideCount (int): Number of slides to generate (5-20 per PRD). + text (str): The topic or context for the presentation. + objectives (str, optional): Learning objectives. + additional_comments (str, optional): Extra notes. + objectives_file_url (str, optional): URL to a file with objectives. + objectives_file_type (str, optional): Type of the objectives file (e.g., pdf, gdoc). + additional_comments_file_url (str, optional): URL to a file with comments. + additional_comments_file_type (str, optional): Type of the comments file. + verbose (bool): Enable detailed logging for debugging. - def fetch_docs(file_url, file_type): - return get_docs(file_url, file_type, True) if file_url and file_type else None + Returns: + dict: The generated outline in JSON format. - objectives_docs = fetch_docs(objectives_file_url, objectives_file_type) - additional_comments_docs = fetch_docs(additional_comments_file_url, additional_comments_file_type) - - docs = ( - objectives_docs + additional_comments_docs - if objectives_docs and additional_comments_docs - else objectives_docs or additional_comments_docs - ) + Raises: + ToolExecutorError: If generation fails. + """ + try: + # Optional document loading (for context, though not used in outline_generator yet) + docs = None + if objectives_file_url and objectives_file_type: + logger.info(f"Generating docs from {objectives_file_type}") + docs = get_docs(objectives_file_url, objectives_file_type, verbose) + if additional_comments_file_url and additional_comments_file_type: + logger.info(f"Generating docs from {additional_comments_file_type}") + additional_docs = get_docs(additional_comments_file_url, additional_comments_file_type, verbose) + docs = docs + additional_docs if docs and additional_docs else additional_docs or docs - presentation_generator_args = PresentationGeneratorInput( - grade_level=grade_level, - n_slides=n_slides, - topic=topic, + # Generate outline (this core.py is only for outline in your friend's setup) + output = outline_executor( + instructionalLevel=instructionalLevel, + slideCount=slideCount, + text=text, objectives=objectives, additional_comments=additional_comments, - objectives_file_url=objectives_file_url, - objectives_file_type=objectives_file_type, - additional_comments_file_url=additional_comments_file_url, - additional_comments_file_type=additional_comments_file_type, - lang=lang + verbose=verbose ) - - output = PresentationGenerator(args=presentation_generator_args, verbose=verbose).generate_presentation(docs) - - logger.info(f"Presentation generated successfully") + logger.info("Outline generated successfully") + return output except LoaderError as e: - error_message = e + error_message = str(e) logger.error(f"Error in Presentation Generator Pipeline -> {error_message}") raise ToolExecutorError(error_message) - except Exception as e: error_message = f"Error in executor: {e}" logger.error(error_message) - raise ValueError(error_message) - - return output \ No newline at end of file + raise ToolExecutorError(error_message) \ No newline at end of file diff --git a/app/tools/presentation_generator/metadata.json b/app/tools/presentation_generator/metadata.json index 7df7145e..85b67cfa 100644 --- a/app/tools/presentation_generator/metadata.json +++ b/app/tools/presentation_generator/metadata.json @@ -1,53 +1,21 @@ { "inputs": [ { - "label": "Grade Level", - "name": "grade_level", - "type": "text" + "label": "Instructional Level", + "name": "instructionalLevel", + "type": "select", + "options": ["Elementary", "High School", "University"] }, { "label": "Number of Slides", - "name": "n_slides", - "type": "number" + "name": "slideCount", + "type": "number", + "min": 5, + "max": 20 }, { "label": "Topic", - "name": "topic", - "type": "text" - }, - { - "label": "Standards/Objectives", - "name": "objectives", - "type": "text" - }, - { - "label": "Additional Comments", - "name": "additional_comments", - "type": "text" - }, - { - "label": "Standards/Objectives File URL", - "name": "objectives_file_url", - "type": "text" - }, - { - "label": "Standards/Objectives File Type", - "name": "objectives_file_type", - "type": "text" - }, - { - "label": "Additional Comments File URL", - "name": "additional_comments_file_url", - "type": "text" - }, - { - "label": "Additional Comments File Type", - "name": "additional_comments_file_type", - "type": "text" - }, - { - "label": "Language", - "name": "lang", + "name": "text", "type": "text" } ] diff --git a/app/tools/presentation_generator/prompt/presentation-generator-outline.txt b/app/tools/presentation_generator/prompt/presentation-generator-outline.txt new file mode 100644 index 00000000..281a3d9b --- /dev/null +++ b/app/tools/presentation_generator/prompt/presentation-generator-outline.txt @@ -0,0 +1,17 @@ +Generate a presentation outline with {slideCount} slide titles based on: +Text: {text} +Instructional Level: {instructionalLevel} +Objectives (if provided): {objectives} +Additional Comments (if provided): {additional_comments} +Context (if available, e.g., from uploaded documents): {context} + +Ensure the titles are: +- Specific and detailed, tailored to the instructional level (e.g., simpler for Elementary, analytical for University). +- Logically structured (e.g., introduction, key points, conclusion). +- Ready for direct use in a presentation. +- Informed by objectives, additional comments, and context if provided to enhance relevance and depth. + +Example output: +["Introduction to World War II", "Causes of the War", "Major Battles", "The Aftermath", "Lessons Learned"] + +{format_instructions} \ No newline at end of file diff --git a/app/tools/presentation_generator/prompt/presentation-generator-slides.txt b/app/tools/presentation_generator/prompt/presentation-generator-slides.txt new file mode 100644 index 00000000..85364a96 --- /dev/null +++ b/app/tools/presentation_generator/prompt/presentation-generator-slides.txt @@ -0,0 +1,26 @@ +Generate a complete presentation based on this outline: +Outline: {outline} +Instructional Level: {instructionalLevel} +Objectives (if provided): {objectives} +Additional Comments (if provided): {additional_comments} +Context (if available, e.g., from uploaded documents): {context} + +Provide: +- A main title reflecting the topic, informed by the outline and context if available. +- Detailed content for each slide (e.g., bullet points or paragraphs) tailored to the instructional level, using objectives and comments to guide depth and focus if provided. +- Assign a template for each slide (titleBody, titleBullets, twoColumn) based on content structure: + - Use "titleBullets" for lists or short points (e.g., newline-separated items or <50 words). + - Use "twoColumn" for longer, comparative content (e.g., >100 words). + - Use "titleBody" for paragraphs or default cases. +- Avoid vague placeholders; content must be ready for direct use in a presentation. + +Example output: +{ + "main_title": "World War II Overview", + "list_slides": [ + {"title": "Introduction to World War II", "content": "World War II (1939-1945) was a global conflict involving major powers...", "template": "titleBody"}, + {"title": "Causes of the War", "content": "- Treaty of Versailles\n- Rise of Fascism\n- Economic Instability", "template": "titleBullets"} + ] +} + +{format_instructions} \ No newline at end of file diff --git a/app/tools/presentation_generator/tests/test_core.py b/app/tools/presentation_generator/tests/test_core.py index 2b2ed71c..a02151f9 100644 --- a/app/tools/presentation_generator/tests/test_core.py +++ b/app/tools/presentation_generator/tests/test_core.py @@ -1,186 +1,68 @@ import pytest from app.tools.presentation_generator.core import executor +from app.api.error_utilities import ToolExecutorError # Added this import -# Base attributes reused across all tests +# Base attributes reused across all tests, aligned with PRD's core inputs base_attributes = { - "grade_level": "5th grade", - "n_slides": 10, - "topic": "Mathematics", - "objectives": "", - "additional_comments": "", - "additional_comments_file_url": "https://docs.google.com/document/d/1IsTPJSgWMdD20tXMm1sXJSCc0xz9Kxmn/edit?usp=sharing&ouid=107052763106493355624&rtpof=true&sd=true", - "additional_comments_file_type": "gdoc", - "lang": "en" + "instructionalLevel": "High School", + "slideCount": 5, + "text": "World War II Overview" } -# PDF Tests -def test_executor_pdf_objectives_url_valid(): - presentation = executor( - **base_attributes, - objectives_file_url="https://filesamples.com/samples/document/pdf/sample1.pdf", - objectives_file_type="pdf" - ) +# Core Functionality Tests +def test_executor_basic_valid(): + presentation = executor(**base_attributes) assert isinstance(presentation, dict) + assert "main_title" in presentation + assert "list_slides" in presentation + assert len(presentation["list_slides"]) == base_attributes["slideCount"] + assert all("template" in slide for slide in presentation["list_slides"]) -def test_executor_pdf_objectives_url_invalid(): +def test_executor_invalid_slide_count_below_range(): with pytest.raises(ValueError) as exc_info: - executor( - **base_attributes, - objectives_file_url="https://filesamples.com/samples/document/pdf/sample1.pdf", - objectives_file_type=1 - ) - assert isinstance(exc_info.value, ValueError) + executor(instructionalLevel="High School", slideCount=4, text="World War II") + assert "Number of slides must be between 5 and 20" in str(exc_info.value) -# CSV Tests -def test_executor_csv_objectives_url_valid(): - presentation = executor( - **base_attributes, - objectives_file_url="https://filesamples.com/samples/document/csv/sample1.csv", - objectives_file_type="csv" - ) - assert isinstance(presentation, dict) - -def test_executor_csv_objectives_url_invalid(): +def test_executor_invalid_slide_count_above_range(): with pytest.raises(ValueError) as exc_info: - executor( - **base_attributes, - objectives_file_url="https://filesamples.com/samples/document/csv/sample1.csv", - objectives_file_type=1 - ) - assert isinstance(exc_info.value, ValueError) - -# TXT Tests -def test_executor_txt_objectives_url_valid(): - presentation = executor( - **base_attributes, - objectives_file_url="https://filesamples.com/samples/document/txt/sample1.txt", - objectives_file_type="txt" - ) - assert isinstance(presentation, dict) + executor(instructionalLevel="High School", slideCount=21, text="World War II") + assert "Number of slides must be between 5 and 20" in str(exc_info.value) -def test_executor_txt_objectives_url_invalid(): +def test_executor_missing_text(): with pytest.raises(ValueError) as exc_info: - executor( - **base_attributes, - objectives_file_url="https://filesamples.com/samples/document/txt/sample1.txt", - objectives_file_type=1 - ) - assert isinstance(exc_info.value, ValueError) - -# MD Tests -def test_executor_md_objectives_url_valid(): - presentation = executor( - **base_attributes, - objectives_file_url="https://github.com/radicalxdev/kai-ai-backend/blob/main/README.md", - objectives_file_type="md" - ) - assert isinstance(presentation, dict) + executor(instructionalLevel="High School", slideCount=5, text="") + assert "Topic must be provided" in str(exc_info.value) -def test_executor_md_objectives_url_invalid(): +def test_executor_missing_instructional_level(): with pytest.raises(ValueError) as exc_info: - executor( - **base_attributes, - objectives_file_url="https://github.com/radicalxdev/kai-ai-backend/blob/main/README.md", - objectives_file_type=1 - ) - assert isinstance(exc_info.value, ValueError) - -# PPTX Tests -def test_executor_pptx_objectives_url_valid(): - presentation = executor( - **base_attributes, - objectives_file_url="https://scholar.harvard.edu/files/torman_personal/files/samplepptx.pptx", - objectives_file_type="pptx" - ) - assert isinstance(presentation, dict) + executor(instructionalLevel="", slideCount=5, text="World War II") + assert "Instructional level must be provided" in str(exc_info.value) -def test_executor_pptx_objectives_url_invalid(): - with pytest.raises(ValueError) as exc_info: - executor( - **base_attributes, - objectives_file_url="https://scholar.harvard.edu/files/torman_personal/files/samplepptx.pptx", - objectives_file_type=1 - ) - assert isinstance(exc_info.value, ValueError) - -# DOCX Tests -def test_executor_docx_objectives_url_valid(): - presentation = executor( - **base_attributes, - objectives_file_url="https://filesamples.com/samples/document/docx/sample1.docx", - objectives_file_type="docx" - ) - assert isinstance(presentation, dict) - -def test_executor_docx_objectives_url_invalid(): - with pytest.raises(ValueError) as exc_info: - executor( - **base_attributes, - objectives_file_url="https://filesamples.com/samples/document/docx/sample1.docx", - objectives_file_type=1 - ) - assert isinstance(exc_info.value, ValueError) - -# XLS Tests -def test_executor_xls_objectives_url_valid(): - presentation = executor( - **base_attributes, - objectives_file_url="https://filesamples.com/samples/document/xls/sample1.xls", - objectives_file_type="xls" - ) - assert isinstance(presentation, dict) - -def test_executor_xls_objectives_url_invalid(): - with pytest.raises(ValueError) as exc_info: - executor( - **base_attributes, - objectives_file_url="https://filesamples.com/samples/document/xls/sample1.xls", - objectives_file_type=1 - ) - assert isinstance(exc_info.value, ValueError) - -# XLSX Tests -def test_executor_xlsx_objectives_url_valid(): +# Optional File-Based Tests (Reduced Set) +def test_executor_pdf_objectives_url_valid(): presentation = executor( **base_attributes, - objectives_file_url="https://filesamples.com/samples/document/xlsx/sample1.xlsx", - objectives_file_type="xlsx" + objectives_file_url="https://filesamples.com/samples/document/pdf/sample1.pdf", + objectives_file_type="pdf" ) assert isinstance(presentation, dict) + assert "main_title" in presentation + assert len(presentation["list_slides"]) == base_attributes["slideCount"] -def test_executor_xlsx_objectives_url_invalid(): - with pytest.raises(ValueError) as exc_info: +def test_executor_pdf_objectives_url_invalid_type(): + with pytest.raises(ToolExecutorError): # Now defined with the import executor( **base_attributes, - objectives_file_url="https://filesamples.com/samples/document/xlsx/sample1.xlsx", - objectives_file_type=1 + objectives_file_url="https://filesamples.com/samples/document/pdf/sample1.pdf", + objectives_file_type=1 # Invalid type ) - assert isinstance(exc_info.value, ValueError) -# GPDF Tests -def test_executor_gpdf_objectives_url_valid(): +def test_executor_gdoc_additional_comments_url_valid(): presentation = executor( **base_attributes, - objectives_file_url="https://drive.google.com/file/d/1fUj1uWIMh6QZsPkt0Vs7mEd2VEqz3O8l/view", - objectives_file_type="gpdf" + additional_comments_file_url="https://docs.google.com/document/d/1IsTPJSgWMdD20tXMm1sXJSCc0xz9Kxmn/edit?usp=sharing", + additional_comments_file_type="gdoc" ) assert isinstance(presentation, dict) - -def test_executor_gpdf_objectives_url_invalid(): - with pytest.raises(ValueError) as exc_info: - executor( - **base_attributes, - objectives_file_url="https://drive.google.com/file/d/1fUj1uWIMh6QZsPkt0Vs7mEd2VEqz3O8l/view", - objectives_file_type=1 - ) - assert isinstance(exc_info.value, ValueError) - -# MP3 Tests -def test_executor_mp3_objectives_url_invalid(): - with pytest.raises(ValueError) as exc_info: - executor( - **base_attributes, - objectives_file_url="https://raw.githubusercontent.com/asleem/uploaded_files/main/dummy.mp3", - objectives_file_type=1 - ) - assert isinstance(exc_info.value, ValueError) + assert "main_title" in presentation + assert len(presentation["list_slides"]) == base_attributes["slideCount"] \ No newline at end of file diff --git a/app/tools/presentation_generator/tools.py b/app/tools/presentation_generator/tools.py index 06e1bbdc..1a5042a7 100644 --- a/app/tools/presentation_generator/tools.py +++ b/app/tools/presentation_generator/tools.py @@ -4,131 +4,161 @@ from app.services.logger import setup_logger from langchain_chroma import Chroma from langchain_core.prompts import PromptTemplate -from langchain_core.runnables import RunnablePassthrough, RunnableParallel +from langchain_core.runnables import RunnableParallel, RunnablePassthrough from langchain_core.output_parsers import JsonOutputParser -from langchain_google_genai import GoogleGenerativeAI +from google.cloud import vertexai +import vertexai.language_models as lm from langchain_google_genai import GoogleGenerativeAIEmbeddings from langchain_core.documents import Document logger = setup_logger(__name__) def read_text_file(file_path): - # Get the directory containing the script file script_dir = os.path.dirname(os.path.abspath(__file__)) - - # Combine the script directory with the relative file path absolute_file_path = os.path.join(script_dir, file_path) - with open(absolute_file_path, 'r') as file: return file.read() - -class PresentationGenerator: - def __init__(self, args=None, vectorstore_class=Chroma, prompt=None, embedding_model=None, model=None, parser=None, verbose=False): - default_config = { - "model": GoogleGenerativeAI(model="gemini-1.5-flash"), - "embedding_model": GoogleGenerativeAIEmbeddings(model='models/embedding-001'), - "parser": JsonOutputParser(pydantic_object=FullPresentation), - "prompt": read_text_file("prompt/presentation-generator-prompt.txt"), - "prompt_without_context": read_text_file("prompt/presentation-generator-without-context-prompt.txt"), - "vectorstore_class": Chroma - } - self.prompt = prompt or default_config["prompt"] - self.prompt_without_context = default_config["prompt_without_context"] - self.model = model or default_config["model"] - self.parser = parser or default_config["parser"] - self.embedding_model = embedding_model or default_config["embedding_model"] +class Slide(BaseModel): + title: str = Field(..., description="The title of the slide") + content: str = Field(..., description="The actual content of the slide") + template: str = Field(default="titleBody", description="Slide template (e.g., titleBody, titleBullets, twoColumn)") + +class FullPresentation(BaseModel): + main_title: str = Field(..., description="The main title of the presentation") + list_slides: List[Slide] = Field(..., description="The full collection of slides") - self.vectorstore_class = vectorstore_class or default_config["vectorstore_class"] - self.vectorstore, self.retriever, self.runner = None, None, None +class PresentationGenerator: + def __init__(self, args, vectorstore_class=Chroma, embedding_model=None, verbose=False): + vertexai.init(project="marvelai-project", location="us-central1") + self.model = lm.TextGenerationModel.from_pretrained("gemini-1.5-pro") + self.parser = JsonOutputParser(pydantic_object=FullPresentation) + self.prompt_outline = read_text_file("prompt/presentation-generator-outline.txt") + self.prompt_slides = read_text_file("prompt/presentation-generator-slides.txt") + self.embedding_model = embedding_model or GoogleGenerativeAIEmbeddings(model='models/embedding-001') + self.vectorstore_class = vectorstore_class self.args = args self.verbose = verbose + self.vectorstore = None - if vectorstore_class is None: raise ValueError("Vectorstore must be provided") - if args.grade_level is None: raise ValueError("Grade Level must be provided") - if args.n_slides is None: raise ValueError("Number of Slides must be provided") - if int(args.n_slides) < 1 or int(args.n_slides) > 10: - raise ValueError("Number must be between 1 and 10.") - if args.topic is None: raise ValueError("Topic must be provided") - if args.objectives is None: raise ValueError("Objectives must be provided") - if args.lang is None: raise ValueError("Language must be provided") + # Validate PRD-required inputs + if not args.text: raise ValueError("Topic must be provided") + if not args.slideCount: raise ValueError("Number of slides must be provided") + if int(args.slideCount) < 5 or int(args.slideCount) > 20: + raise ValueError("Number of slides must be between 5 and 20") + if not args.instructionalLevel: raise ValueError("Instructional level must be provided") def compile_with_context(self, documents: List[Document]): - # Return the chain - prompt = PromptTemplate( - template=self.prompt, - input_variables=["attribute_collection"], + # Outline prompt with context + outline_prompt = PromptTemplate( + template=self.prompt_outline, + input_variables=["text", "slideCount", "instructionalLevel", "objectives", "additional_comments", "context"], + partial_variables={"format_instructions": JsonOutputParser(pydantic_object=list).get_format_instructions()} + ) + slides_prompt = PromptTemplate( + template=self.prompt_slides, + input_variables=["outline", "instructionalLevel", "objectives", "additional_comments", "context"], partial_variables={"format_instructions": self.parser.get_format_instructions()} ) - if self.runner is None: + # Create vectorstore and retriever + if not self.vectorstore: logger.info(f"Creating vectorstore from {len(documents)} documents") if self.verbose else None self.vectorstore = self.vectorstore_class.from_documents(documents, self.embedding_model) - logger.info(f"Vectorstore created") if self.verbose else None - - self.retriever = self.vectorstore.as_retriever() - logger.info(f"Retriever created successfully") if self.verbose else None - - self.runner = RunnableParallel( - {"context": self.retriever, - "attribute_collection": RunnablePassthrough() - } - ) + retriever = self.vectorstore.as_retriever() + + # Phase 1: Generate outline with context + outline_chain = ( + RunnableParallel({ + "text": RunnablePassthrough(lambda x: x["text"]), + "slideCount": RunnablePassthrough(lambda x: x["slideCount"]), + "instructionalLevel": RunnablePassthrough(lambda x: x["instructionalLevel"]), + "objectives": RunnablePassthrough(lambda x: x["objectives"]), + "additional_comments": RunnablePassthrough(lambda x: x["additional_comments"]), + "context": retriever + }) + | outline_prompt + | self.model + | JsonOutputParser(pydantic_object=list) + ) - chain = self.runner | prompt | self.model | self.parser + # Phase 2: Generate slides with context + slides_chain = ( + RunnableParallel({ + "outline": RunnablePassthrough(), + "instructionalLevel": lambda x: self.args.instructionalLevel, + "objectives": lambda x: self.args.objectives or "", + "additional_comments": lambda x: self.args.additional_comments or "", + "context": retriever + }) + | slides_prompt + | self.model + | self.parser + ) - logger.info(f"Chain compilation complete") + return outline_chain | slides_chain - return chain - def compile_without_context(self): - # Return the chain - prompt = PromptTemplate( - template=self.prompt_without_context, - input_variables=["attribute_collection"], + outline_prompt = PromptTemplate( + template=self.prompt_outline, + input_variables=["text", "slideCount", "instructionalLevel", "objectives", "additional_comments"], + partial_variables={"format_instructions": JsonOutputParser(pydantic_object=list).get_format_instructions()} + ) + slides_prompt = PromptTemplate( + template=self.prompt_slides, + input_variables=["outline", "instructionalLevel", "objectives", "additional_comments"], partial_variables={"format_instructions": self.parser.get_format_instructions()} ) - chain = prompt | self.model | self.parser - - logger.info(f"Chain compilation complete") - - return chain - - def generate_presentation(self, documents: Optional[List[Document]]): - logger.info(f"Creating the Presentation") - - if(documents): - chain = self.compile_with_context(documents) - else: - chain = self.compile_without_context() - - input_parameters = ( - f"Grade Level: {self.args.grade_level}, " - f"Number of Slides: {self.args.n_slides+1 if self.args.n_slides>9 else self.args.n_slides}, " - f"Topic: {self.args.topic}, " - f"Standard/Objectives: {self.args.objectives}, " - f"Additional Comments: {self.args.additional_comments}, " - f"Language (YOU MUST RESPOND IN THIS LANGUAGE): {self.args.lang}" + # Phase 1: Generate outline + outline_chain = outline_prompt | self.model | JsonOutputParser(pydantic_object=list) + + # Phase 2: Generate slides + slides_chain = ( + RunnableParallel({ + "outline": RunnablePassthrough(), + "instructionalLevel": lambda x: self.args.instructionalLevel, + "objectives": lambda x: self.args.objectives or "", + "additional_comments": lambda x: self.args.additional_comments or "" + }) + | slides_prompt + | self.model + | self.parser ) - logger.info(f"Input parameters: {input_parameters}") - response = chain.invoke(input_parameters) + return outline_chain | slides_chain + + def generate_presentation(self, documents: Optional[List[Document]] = None): + logger.info("Creating the presentation") + chain = self.compile_with_context(documents) if documents else self.compile_without_context() + input_dict = { + "text": self.args.text, + "slideCount": self.args.slideCount, + "instructionalLevel": self.args.instructionalLevel, + "objectives": self.args.objectives or "", + "additional_comments": self.args.additional_comments or "" + } + if documents: + input_dict["context"] = "\n".join(doc.page_content for doc in documents) + response = chain.invoke(input_dict) + + # Optional: Enforce template assignment consistency + for slide in response["list_slides"]: + slide["template"] = self.assign_template(slide["content"]) logger.info(f"Generated response: {response}") - - if(documents): - if self.verbose: print(f"Deleting vectorstore") + if documents and self.vectorstore: + if self.verbose: print("Deleting vectorstore") self.vectorstore.delete_collection() - return response -class Slide(BaseModel): - title: str = Field(..., description="The title of the Slide") - content: str = Field(..., description="The content of the Slide. It must be the actual context, not simple indications") - suggestions: str = Field(..., description="""Suggestions for visual elements (e.g., charts, images, layouts) - that enhance understanding and engagement (ONLY IF NEEDED).""") - -class FullPresentation(BaseModel): - main_title: str = Field(..., description="The main title of the Presentation") - list_slides: List[Slide] = Field(..., description="The full collection of slides about the Presentation") \ No newline at end of file + def assign_template(self, content: str) -> str: + if "\n" in content or len(content.split()) < 50: + return "titleBullets" + elif len(content.split()) > 100: + return "twoColumn" + return "titleBody" + + # Optional: Enforce template assignment consistency + # for slide in response["list_slides"]: + # slide["template"] = self.assign_template(slide["content"]) diff --git a/app/tools/presentation_generator/tools/__init__.py b/app/tools/presentation_generator/tools/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app/tools/presentation_generator/tools/outline_generator.py b/app/tools/presentation_generator/tools/outline_generator.py new file mode 100644 index 00000000..ada95545 --- /dev/null +++ b/app/tools/presentation_generator/tools/outline_generator.py @@ -0,0 +1,86 @@ +from pydantic import BaseModel, Field +from typing import List, Optional +import os +from app.services.logger import setup_logger +from langchain_core.prompts import PromptTemplate +from langchain_core.output_parsers import JsonOutputParser +from google.cloud import vertexai +import vertexai.language_models as lm +from fastapi import HTTPException +from app.services.schemas import PresentationGeneratorArgs # Updated schema + +logger = setup_logger(__name__) + +class OutlineSlide(BaseModel): + topic: str = Field(description="The main topic or title of the slide") + description: str = Field(description="Brief description of the slide content") + transition: str = Field(description="How this slide connects to the next one for smooth flow") + +class OutlineSchema(BaseModel): + slides: List[OutlineSlide] = Field(description="List of slides with their topics and descriptions") + +class OutlineGenerator: + def __init__(self, args: PresentationGeneratorArgs, verbose=False): + vertexai.init(project="marvelai-project", location="us-central1") + self.model = lm.TextGenerationModel.from_pretrained("gemini-1.5-pro") + self.parser = JsonOutputParser(pydantic_object=OutlineSchema) + self.args = args + self.verbose = verbose + + # Validate required inputs (aligned with PRD) + if not self.args.text: raise ValueError("Topic must be provided") + if not self.args.slideCount: raise ValueError("Number of slides must be provided") + if int(self.args.slideCount) < 5 or int(self.args.slideCount) > 20: + raise ValueError("Number of slides must be between 5 and 20") + if not self.args.instructionalLevel: raise ValueError("Instructional level must be provided") + + def compile(self) -> dict: + try: + prompt = PromptTemplate( + template=( + "Generate a coherent presentation outline for {instructionalLevel} students.\n\n" + "Topic: {text}\n" + "Number of slides needed: {slideCount}\n" + "Learning objectives: {objectives}\n" + "Additional comments: {additional_comments}\n\n" + "Create an outline where:\n" + "1. Each slide has a clear topic\n" + "2. Include a brief description of the content\n" + "3. Add transitions between slides for smooth flow\n" + "4. Ensure content builds progressively\n" + "5. Match the instructional level's comprehension\n" + "6. Generate exactly {slideCount} slides\n\n" + "{format_instructions}" + ), + input_variables=["instructionalLevel", "text", "slideCount", "objectives", "additional_comments"], + partial_variables={"format_instructions": self.parser.get_format_instructions()} + ) + chain = prompt | self.model | self.parser + result = chain.invoke({ + "instructionalLevel": self.args.instructionalLevel, + "text": self.args.text, + "slideCount": self.args.slideCount, + "objectives": self.args.objectives or "", + "additional_comments": self.args.additional_comments or "" + }) + if self.verbose: + logger.info("Generated outline successfully") + return dict(result) # Returns {"slides": [...]} + except Exception as e: + logger.error(f"Failed to generate outline: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to generate outline: {str(e)}") + +def executor(instructionalLevel: str, slideCount: int, text: str, objectives: str = "", additional_comments: str = "", verbose: bool = False) -> dict: + args = PresentationGeneratorArgs( + instructionalLevel=instructionalLevel, + slideCount=slideCount, + text=text, + objectives=objectives, + additional_comments=additional_comments, + objectives_file_url="", + objectives_file_type="", + additional_comments_file_url="", + additional_comments_file_type="" + ) + generator = OutlineGenerator(args, verbose) + return generator.compile() \ No newline at end of file diff --git a/app/tools/presentation_generator/tools/slides_generator.py b/app/tools/presentation_generator/tools/slides_generator.py new file mode 100644 index 00000000..76dc10d8 --- /dev/null +++ b/app/tools/presentation_generator/tools/slides_generator.py @@ -0,0 +1,123 @@ +from pydantic import BaseModel, Field +from typing import List, Dict, Any +from app.services.logger import setup_logger +from langchain_core.prompts import PromptTemplate +from langchain_core.runnables import RunnableParallel +from langchain_core.output_parsers import JsonOutputParser +from google.cloud import vertexai +import vertexai.language_models as lm +from fastapi import HTTPException + +logger = setup_logger(__name__) + +class SlideContent(BaseModel): + topic: str = Field(description="The topic or title of the slide") + content: Dict[str, Any] = Field( + description="Structured content of the slide", + example={ + "main_points": ["point 1", "point 2"], + "examples": ["example 1", "example 2"], + "details": "Additional explanation", + "visual_notes": "Suggested visuals" + } + ) + speaker_notes: str = Field( # New field added + description="Detailed notes for the presenter to assist in delivering the slide content", + example="Begin by introducing the topic, elaborate on the main points with examples, and smoothly transition to the next slide by previewing its focus." + ) + class Config: + json_schema_extra = { + "example": { + "topic": "Vectors and Vector Operations", + "content": { + "main_points": [ + "Definition of vectors", + "Vector addition and multiplication", + "Geometric interpretation" + ], + "examples": [ + "Feature vectors in ML", + "Velocity vectors in physics" + ], + "details": "A vector is an ordered list of numbers...", + "visual_notes": "Draw 2D vector addition diagram" + }, + "speaker_notes": "Start with a simple definition of vectors, use the diagram to explain addition, and connect to real-world examples like ML and physics before moving to the next topic." + } + } + +class PresentationSchema(BaseModel): + slides: List[SlideContent] = Field(description="List of slides with their content and speaker notes") + +class SlidesGenerator: + def __init__(self, outline: dict, inputs: dict, verbose=False): + vertexai.init(project="marvelai-project", location="us-central1") + self.model = lm.TextGenerationModel.from_pretrained("gemini-1.5-pro") + self.parser = JsonOutputParser(pydantic_object=SlideContent) + self.outline = outline + self.inputs = inputs + self.verbose = verbose + + def compile(self) -> dict: + try: + base_context = ( + "Creating a presentation for:\n" + f"Instructional Level: {self.inputs['instructionalLevel']}\n" + f"Topic: {self.inputs['text']}\n" + f"Learning Objectives: {self.inputs['objectives']}\n" + f"Additional Comments: {self.inputs['additional_comments']}\n\n" + ) + prompts = {} + for idx, slide in enumerate(self.outline["slides"]): + prompt = PromptTemplate( + template=( + f"{base_context}" + f"Generate content for Slide {idx + 1}:\n" + f"Topic: {slide['topic']}\n" + f"Description: {slide['description']}\n" + f"Transition: {slide['transition']}\n\n" + "Create engaging slide content that:\n" + "1. Is appropriate for the instructional level\n" + "2. Uses clear and concise language\n" + "3. Includes key points and examples\n" + "4. Creates an appropriate segue as per the transition\n" + "5. Supports learning objectives\n" + "6. Includes detailed speaker notes to assist the presenter in delivering the content\n\n" + "Return a JSON object with 'topic', 'content', and 'speaker_notes' fields.\n" + "- 'content' should contain structured data (e.g., main_points, examples, details, visual_notes).\n" + "- 'speaker_notes' should provide specific guidance for the presenter (e.g., what to say, how to explain).\n" + "Ensure the JSON is valid and complete.\n\n" + "{format_instructions}" + ), + input_variables=[], + partial_variables={"format_instructions": self.parser.get_format_instructions()} + ) + prompts[f"slide_{idx + 1}"] = prompt + + chains = { + key: prompt | self.model | self.parser + for key, prompt in prompts.items() + } + parallel_pipeline = RunnableParallel(**chains) + + results = parallel_pipeline.invoke({}) + if self.verbose: + logger.info(f"Generated {len(results)} slides with speaker notes successfully") + + presentation = PresentationSchema( + slides=[results[f"slide_{i+1}"] for i in range(len(self.outline["slides"]))] + ) + return dict(presentation) + except Exception as e: + logger.error(f"Failed to generate slides: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to generate slides: {str(e)}") + +def executor(outline: Dict[str, List[Dict[str, str]]], instructionalLevel: str, text: str, objectives: str = "", additional_comments: str = "", verbose: bool = False) -> dict: + inputs = { + "instructionalLevel": instructionalLevel, + "text": text, + "objectives": objectives, + "additional_comments": additional_comments + } + generator = SlidesGenerator(outline, inputs, verbose) + return generator.compile() \ No newline at end of file diff --git a/app/tools/utils/tool_utilities.py b/app/tools/utils/tool_utilities.py index 6eb06867..1a664fd3 100644 --- a/app/tools/utils/tool_utilities.py +++ b/app/tools/utils/tool_utilities.py @@ -32,13 +32,11 @@ def load_tool_metadata(tool_id): logger.error(f"No tool configuration found for tool_id: {tool_id}") raise HTTPException(status_code=404, detail="Tool configuration not found") - # Ensure the base path is relative to the current file's directory base_dir = os.path.dirname(os.path.abspath(__file__)) logger.debug(f"Base directory: {base_dir}") - # Construct the directory path - module_dir_path = os.path.join(base_dir, '../..', *tool_config['path'].split('.')[:-1]) # Go one level up and then to the path - module_dir_path = os.path.abspath(module_dir_path) # Get absolute path + module_dir_path = os.path.join(base_dir, '../..', *tool_config['path'].split('.')[:-1]) + module_dir_path = os.path.abspath(module_dir_path) logger.debug(f"Module directory path: {module_dir_path}") file_path = os.path.join(module_dir_path, tool_config['metadata_file']) @@ -82,7 +80,7 @@ def validate_file_input(input_name: str, input_value: Any): logger.error(error_message) raise InputValidationError(error_message) try: - ToolFile.model_validate(file_obj, from_attributes=True) # This will raise a validation error if the structure is incorrect + ToolFile.model_validate(file_obj, from_attributes=True) except ValidationError: error_message = f"Each item in the input `{input_name}` must be a valid ToolFile where a URL is provided" logger.error(error_message) @@ -131,7 +129,7 @@ def execute_tool(tool_id, request_inputs_dict): raise HTTPException(status_code=404, detail="Tool executable not found") execute_function = get_executor_by_name(tool_config['path']) - request_inputs_dict['verbose'] = True + #request_inputs_dict['verbose'] = True return execute_function(**request_inputs_dict) diff --git a/app/tools/utils/tools_config.json b/app/tools/utils/tools_config.json index 08c2e241..8f69cd96 100644 --- a/app/tools/utils/tools_config.json +++ b/app/tools/utils/tools_config.json @@ -38,5 +38,13 @@ "writing-feedback-generator": { "path": "tools.writing_feedback_generator.core", "metadata_file": "metadata.json" + }, + "outline_generator": { + "path": "tools.outline_generator", + "metadata_file": "metadata.json" + }, + "slides_generator": { + "path": "tools.slides_generator", + "metadata_file": "metadata.json" } }