Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions app/tools/text_rewriter/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from typing import Any, Dict, Optional
from app.api.error_utilities import FileHandlerError
from app.utils.document_loaders import get_docs
from app.tools.text_rewriter.tools import TextRewriterPipeline
from app.services.logger import setup_logger

logger = setup_logger(__name__)

def executor(
rewrite_instruction: str,
text: Optional[str] = None,
text_file_url: Optional[str] = None,
text_file_type: Optional[str] = None,
lang: str = "en"
) -> Dict[str, str]:
"""
Executor function for rewriting text using a generative pipeline based on user instructions.

Args:
rewrite_instruction (str): Instructions for how to rewrite the text (e.g. 'Simplify', 'Paraphrase', 'Translate to French').
text (Optional[str]): Plain text input.
text_file_url (Optional[str]): URL of the file to load (CSV, PDF, DOCX, PPT, TXT, etc.).
text_file_type (Optional[str]): Type of the document ('pdf', 'docx', 'txt', etc.).
lang (str): Language code for output.

Returns:
Dict[str, str]: Contains 'status' and either 'rewritten_text' or 'message'.
"""
try:
# Validate inputs
if not text_file_url and not text:
raise ValueError("Either 'text_file_url' or 'text' must be provided.")
if text_file_url and not text_file_type:
raise ValueError("If 'text_file_url' is provided, 'text_file_type' must also be provided.")

# Load documents if URL provided
docs = []
if text_file_url and text_file_type:
if not isinstance(text_file_type, str):
raise ValueError("Unsupported text_file_type: must be a string.")
logger.info("Text rewriting started with document loading...")
docs = get_docs(text_file_url, text_file_type, verbose=True)

# Initialize pipeline and generate rewrite
pipeline = TextRewriterPipeline(
rewrite_instruction=rewrite_instruction,
text=text or "",
text_file_url=text_file_url or "",
text_file_type=text_file_type or "",
lang=lang
)
rewritten = pipeline.rewrite(docs)

logger.info("Text rewriting completed successfully.")
return {"status": "success", "rewritten_text": rewritten}

except ValueError as ve:
logger.error(f"ValueError during text rewriting: {str(ve)}")
return {"status": "error", "message": str(ve)}
except FileNotFoundError as fnf:
logger.error(f"FileNotFoundError during text rewriting: {str(fnf)}")
return {"status": "error", "message": "File not found. Please check the text_file_url or file path."}
except Exception as e:
logger.error(f"Unexpected error during text rewriting: {str(e)}")
return {"status": "error", "message": "An unexpected error occurred. Please try again."}
34 changes: 34 additions & 0 deletions app/tools/text_rewriter/metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"inputs": [
{
"label": "Rewrite Instruction (e.g., 'Simplify', 'Paraphrase', 'Translate to French')",
"name": "rewrite_instruction",
"type": "text",
"required": true
},
{
"label": "Direct Text Input",
"name": "text",
"type": "text",
"required": false
},
{
"label": "URL of the File (CSV, PDF, DOCX, PPT, TXT, etc.)",
"name": "text_file_url",
"type": "text",
"required": false
},
{
"label": "Type of the File",
"name": "text_file_type",
"type": "text",
"required": false
},
{
"label": "Language",
"name": "lang",
"type": "text",
"required": false
}
]
}
65 changes: 65 additions & 0 deletions app/tools/text_rewriter/tests/tests_core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from typing import Any, Dict, Optional
from app.api.error_utilities import FileHandlerError
from app.utils.document_loaders import get_docs
from app.tools.text_rewriter.tools import TextRewriterPipeline
from app.services.logger import setup_logger

logger = setup_logger(__name__)

def executor(
rewrite_instruction: str,
text: Optional[str] = None,
text_file_url: Optional[str] = None,
text_file_type: Optional[Any] = None,
lang: str = "en"
) -> Dict[str, str]:
"""
Executor function for rewriting text using a generative pipeline based on user instructions.

Args:
rewrite_instruction (str): Instructions for how to rewrite the text (e.g. 'Simplify', 'Paraphrase', 'Translate to French').
text (Optional[str]): Plain text input.
text_file_url (Optional[str]): URL of the file to load (CSV, PDF, DOCX, PPT, TXT, etc.).
text_file_type (Optional[Any]): Type of the document ('pdf', 'docx', 'txt', etc.).
lang (str): Language code for output.

Returns:
Dict[str, str]: Contains 'status' and either 'rewritten_text' or 'message'.
"""
try:
# Validate inputs
if not text_file_url and not text:
raise ValueError("Either 'text_file_url' or 'text' must be provided.")
if text_file_url and not text_file_type:
raise ValueError("If 'text_file_url' is provided, 'text_file_type' must also be provided.")

# Load documents if URL provided
docs = []
if text_file_url and text_file_type:
if not isinstance(text_file_type, str):
raise ValueError("Unsupported text_file_type: must be a string.")
logger.info("Text rewriting started with document loading...")
docs = get_docs(text_file_url, str(text_file_type), verbose=True)

# Initialize pipeline and generate rewrite
pipeline = TextRewriterPipeline(
rewrite_instruction=rewrite_instruction,
text=text or "",
text_file_url=text_file_url or "",
text_file_type=str(text_file_type) if text_file_type is not None else "",
lang=lang
)
rewritten = pipeline.rewrite(docs)

logger.info("Text rewriting completed successfully.")
return {"status": "success", "rewritten_text": rewritten}

except ValueError as ve:
logger.error(f"ValueError during text rewriting: {str(ve)}")
return {"status": "error", "message": str(ve)}
except FileNotFoundError as fnf:
logger.error(f"FileNotFoundError during text rewriting: {str(fnf)}")
return {"status": "error", "message": "File not found. Please check the text_file_url or file path."}
except Exception as e:
logger.error(f"Unexpected error during text rewriting: {str(e)}")
return {"status": "error", "message": "An unexpected error occurred. Please try again."}
67 changes: 67 additions & 0 deletions app/tools/text_rewriter/tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from typing import List, Optional
from langchain_core.documents import Document
from langchain_google_genai import ChatGoogleGenerativeAI
from app.services.logger import setup_logger

logger = setup_logger(__name__)

class TextRewriterPipeline:
"""
A pipeline for rewriting text or documents according to user-provided instructions.
"""

def __init__(
self,
rewrite_instruction: str,
text: Optional[str] = None,
text_file_url: Optional[str] = None,
text_file_type: Optional[str] = None,
lang: str = "en",
):
self.rewrite_instruction = rewrite_instruction
self.text = text # Raw text input
self.text_file_url = text_file_url # File URL if provided
self.text_file_type = text_file_type
self.lang = lang

# Initialize the LLM (Google Gemini)
self.model = ChatGoogleGenerativeAI(model="gemini-1.5-pro")

def rewrite(self, docs: List[Document]) -> str:
"""
Main entry point for rewriting content.
If docs are provided, rewrite from those documents; otherwise, rewrite raw text.
"""
if docs:
logger.info("Rewriting content from documents.")
return self._rewrite_docs(docs)

if self.text:
logger.info("Rewriting direct text input.")
return self._rewrite_text(self.text)

logger.warning("No documents or text provided for rewriting.")
return ""

def _rewrite_docs(self, docs: List[Document]) -> str:
"""
Combine document contents and invoke the LLM with rewrite instructions.
"""
content = "\n\n".join([doc.page_content for doc in docs])
prompt = (
f"{self.rewrite_instruction} the following document content "
f"in language '{self.lang}':\n\n{content}"
)
response = self.model.invoke([prompt])
return str(response)

def _rewrite_text(self, text: str) -> str:
"""
Invoke the LLM with rewrite instructions for raw text input.
"""
prompt = (
f"{self.rewrite_instruction} the following text "
f"in language '{self.lang}':\n\n{text}"
)
response = self.model.invoke([prompt])
return str(response)