Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions .github/workflows/automation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,7 @@ jobs:
run: npm ci

- name: Run WDIO Tests
env:
WP_URL: ${{ secrets.WP_URL }}
WP_USERNAME: ${{ secrets.WP_USERNAME }}
WP_PASSWORD: ${{ secrets.WP_PASSWORD }}
OBOT_URL: ${{ secrets.OBOT_URL }}
GITLAB_TOKEN: ${{ secrets.GITLAB_TOKEN }}
env: ${{ secrets }}
run: |
npm run wdio:byScenario
npm run eval
Expand Down
4 changes: 0 additions & 4 deletions auto_eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ interface GradeInfo {

interface ToolData {
responses?: string[];
errors?: string[];
task_done?: boolean | null;
failure_reason?: string[];
status?: string;
Expand Down Expand Up @@ -118,10 +117,7 @@ async function enhanceReportWithEval(
// Merge reasons
const reasons: string[] = [];
if (gradeInfo.reason) reasons.push(gradeInfo.reason);
if (toolData.errors?.length) reasons.push(...toolData.errors);

toolData.failure_reason = reasons;
delete toolData.errors;

// Set status based on grading
if (gradeInfo.result === "FAILURE") toolData.status = "Failure";
Expand Down
32 changes: 6 additions & 26 deletions src/core/mcpFunc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ export async function sendPromptValidateAndCollect(promptText: string, toolList:

// Send and wait for reply
const reply = await sendPromptAndWaitForReply(promptText);
await browser.pause(2000);
await browser.pause(10000);

// Wait until a new message-content div appears
await browser.waitUntil(async () => {
Expand Down Expand Up @@ -98,28 +98,12 @@ export async function sendPromptValidateAndCollect(promptText: string, toolList:
const currReply = promptReplies[await promptReplies.length - 1];
if (!currReply) throw new Error(`No reply container found even after waiting for prompt: "${promptText}"`);

// Validation regex
const successRegex = /(success|completed|connected|created|retrieved|posted|updated|closed|deleted|functioning|valid|available|ready to use)/i;
const failureRegex = /(not valid|failed|error|cannot access|do not have|insufficient|not available|required|troubleshooting)/i;

const hasSuccess = successRegex.test(reply);
const hasFailure = failureRegex.test(reply);

let errorMessage = '';
if (!hasSuccess && !hasFailure) {
errorMessage = `No success or actionable failure detected in prompt #${index + 1} response.`;
}

console.log(`Prompt #${index + 1}: Tools used: ${toolsTexts.length ? toolsTexts.join(', ') : 'None'} | Status: ${hasSuccess ? 'Success' : (hasFailure ? 'Failure' : 'Unknown')}`);

// Return data for reporting
return {
prompt: promptText,
reply,
replyElement: currReply,
tools: toolsTexts,
status: hasSuccess ? 'Success' : (hasFailure ? 'Failure' : 'Unknown'),
error: errorMessage || null,
};
}

Expand All @@ -131,13 +115,13 @@ function maxStatus(s1: string, s2: string): string {
export function aggregateToolResponses(promptResults: any[]) {
const report: Record<string, {
promptText: string,
tools: Record<string, { responses: string[]; status: string; errors: string[] }>
tools: Record<string, { responses: string[] }>
}> = {};

for (let i = 0; i < promptResults.length; i++) {
const result = promptResults[i];
const { prompt, tools, reply, status, error } = result;
if (!reply && !error) continue;
const { prompt, tools, reply } = result;
if (!reply) continue;

const promptKey = `Prompt #${i + 1}`;

Expand All @@ -153,14 +137,10 @@ export function aggregateToolResponses(promptResults: any[]) {

for (const tool of toolsToUse) {
if (!report[promptKey].tools[tool]) {
report[promptKey].tools[tool] = { responses: [], status: 'Unknown', errors: [] };
report[promptKey].tools[tool] = { responses: []};
}

if (reply) report[promptKey].tools[tool].responses.push(reply);
if (error) report[promptKey].tools[tool].errors.push(error);

report[promptKey].tools[tool].status =
maxStatus(status, report[promptKey].tools[tool].status);
}
}

Expand All @@ -170,7 +150,7 @@ export function aggregateToolResponses(promptResults: any[]) {
export function saveMCPReport(mcpName: string, reportJson: any) {
const folderName = `MCP Server Reports`;
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
const fileName = `${mcpName}_MCP_Report_${timestamp}.json`;
const fileName = `${mcpName.toLowerCase().replace(/\s+/g, '_')}_MCP_Report_${timestamp}.json`;
const dirPath = path.join(process.cwd(), folderName);
const filePath = path.join(dirPath, fileName);

Expand Down
69 changes: 59 additions & 10 deletions src/core/selectors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,11 @@ const Selectors = {
tableInput:'//pre[contains(@class, "whitespace-pre-wrap")]//span[contains(@class, "text-gray-500")]',
showDetails2: '(//button[text()="Show Details"])[2]',
showDetails3: '(//button[text()="Show Details"])[3]',
connectionsList: `//div[@class="flex flex-col"]`,
currentProjectButton: `//span[text()="Project"]/ancestor::button`,
createNewProjectButton: `//button[text()=" Create New Project"]`,
inputNewProjectName: '//input[@id="project-name"]',
saveButton: '//button[text()="Save"]',

admin:{
oktaLogin:'//button[normalize-space(.//div) = "Sign in with Okta"]',
Expand Down Expand Up @@ -210,22 +215,66 @@ const Selectors = {
clickChatObot: '//button[normalize-space(text())="Chat"]',
connectorbtn: '//p[normalize-space(text())="Connectors"]/following-sibling::button',
mcpSearchInput: '//input[normalize-space(@placeholder)="Search by name..."]',
// selectMCPServer: '//p[normalize-space(text())="WordPress1"]',
selectMCPServer: (option: string) => `//p[normalize-space(text())="${option}"]/ancestor::div[contains(@class, 'flex')]/button`,
wpSiteURL: '//input[normalize-space(@id)="WORDPRESS_SITE"]',
wpUsername: '//input[normalize-space(@id)="WORDPRESS_USERNAME"]',
wpPassword: '//input[normalize-space(@id)="WordPress App Password"]',
btnClick: (option: string) => `//button[normalize-space(text())="${option}"]`,
promptInput: '//div[@class="plaintext-editor text-md relative w-full flex-1 grow resize-none p-2 leading-8 outline-none"]',
// submitPrompt: '//button[@type="submit"]',
// obotInput: '//div[@class="ProseMirror editor"]',
gitlabToken: '//input[@name="GitLab Personal Access Token"]',
// messageContainer: "//div[contains(@class, 'flex-1') and contains(@class, 'flex-col') and contains(@class, 'justify-start') and contains(@class, 'gap-8')]",
obotInput: "//div[contains(@class,'ProseMirror') and @contenteditable='true']",
submitPrompt: '//button[@type="submit"]',
lastBotReply: '//div[@class="message-content"]',
messageContainer: "//div[contains(@class, 'flex-1') and contains(@class, 'flex-col') and contains(@class, 'justify-start') and contains(@class, 'gap-8')]"

messageContainer: "//div[contains(@class, 'flex-1') and contains(@class, 'flex-col') and contains(@class, 'justify-start') and contains(@class, 'gap-8')]",
wordpressMCP:{
wpSiteURL: '//input[normalize-space(@id)="WORDPRESS_SITE"]',
wpUsername: '//input[normalize-space(@id)="WORDPRESS_USERNAME"]',
wpPassword: '//input[normalize-space(@id)="WordPress App Password"]',
},
gitlabMCP:{
gitlabToken: '//input[@name="GitLab Personal Access Token"]',
},
bigQuery:{
googleCloudProjectID: '//input[@id="GOOGLE_CLOUD_PROJECT"]',
googleCloudCredentials: '//input[@name="Google Application Credentials"]//following-sibling::div[1]',
},
datadog:{
datadogAPIKey: `//input[@id="Datadog API Key"]`,
datadogAPPKey: `//input[@id="Datadog App Key"]`,
},
databricks:{
utility:{
workspaceHostname: `//input[@id="DATABRICKS_WORKSPACE_URL"]`,
functionCatalog: `//input[@id="DATABRICKS_FUNCTIONS_CATALOG"]`,
functionalSchema: `//input[@id="DATABRICKS_FUNCTIONS_SCHEMA"]`,
PAT: `//input[@id="Personal Access Token"]`,
},
vector:{
vectorCatalog: `//input[@id="DATABRICKS_VECTOR_SEARCH_CATALOG"]`,
vectorSchema: `//input[@id="DATABRICKS_VECTOR_SEARCH_SCHEMA"]`,
},
genie: {
genieSpaceID: `//input[@id="DATABRICKS_GENIE_SPACE_ID"]`
}
},
brave: {
braveAPIKey: `//input[@id="Brave API Key"]`
},
chromaCloud: {
tenentID: `//input[@id="CHROMA_TENANT"]`,
DBName: `input[@id="CHROMA_DATABASE"]`,
APIKey: `//input[@id="Chroma Cloud API Key"]`
},
fireCrawl: {
API_key: `//input[@id="Firecrawl API Key"]`
},
gitMCP: {
urlLink: `//input[@id="url-manifest-url"]`
},
redis: {
urlLink: `//input[@id="REDIS_URI"]`
},
postman: {
hostURL: `//input[@id="HOST_URL"]`,
toolCOnfig: `//input[@id="TOOL_CONFIGURATION"]`,
postmanAPIKey: `//input[@id="Postman API Key"]`,
}
}
}
export default Selectors;
20 changes: 20 additions & 0 deletions src/data/bigquery_toolbox.MCP.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"prompts": [
"Is BigQuery Toolbox connected?",
"Retrieve all columns from the table company_data.Employees where Salary is greater than 60000.",
"List the first 10 employees' first and last names from company_data.Employees.",
"Count the total number of employees in company_data.Employees.",
"Find the average salary of all employees in company_data.Employees.",
"Find all employees from company_data.Employees who were hired after January 1, 2023.",
"Get the total salary amount paid to all employees in company_data.Employees.",
"List employees from company_data.Employees with their email addresses ordered by HireDate descending.",
"Use service account credentials to query the company_data.Employees table.",
"Run SELECT * FROM company_data.Employees LIMIT 5.",
"Find the employee with the highest salary in company_data.Employees.",
"Find the employee with the lowest salary in company_data.Employees.",
"Find the number of employees hired per year from company_data.Employees."
],
"tools": [
"query"
]
}
25 changes: 25 additions & 0 deletions src/data/brave_search.MCP.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"prompts": [
"Is Brave Search MCP server connected?",
"Search the web for 'best productivity apps 2025' and return the top 5 results.",
"Find web pages about 'remote work trends' published in the last 7 days.",
"Search for the latest news about 'AI regulation' and return the 5 most recent articles.",
"Find breaking news on 'electric vehicles' from the last 24 hours.",
"Search for images of 'modern home office setups' and return the top 5 results.",
"Find safe-for-work images of 'healthy breakfast ideas'.",
"Search for videos about 'machine learning tutorials' and return the top 3 results.",
"Find recent videos on '2025 tech conferences' published in the last month.",
"Find coffee shops near 'Denver, CO' with ratings and reviews.",
"Search for top-rated vegan restaurants in 'San Francisco'.",
"Summarize the top web results for 'benefits of meditation' with inline references.",
"Generate a concise summary of news articles about 'renewable energy investments'."
],
"tools": [
"brave_web_search",
"brave_news_search",
"brave_image_search",
"brave_video_search",
"brave_local_search",
"brave_summarizer"
]
}
32 changes: 32 additions & 0 deletions src/data/chroma_cloud.MCP.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"prompts": [
"Create a new collection called 'test_collection' using the default embedding function.",
"Add the following documents to a collection named 'test_collection': 'This is the first test document.' (ID: doc1), 'This is the second test document.' (ID: doc2).",
"Retrieve all documents from the collection 'test_collection'.",
"Get information about the collection 'test_collection'.",
"Get the number of documents in the collection 'test_collection'.",
"Peek at the first 3 documents in the collection 'test_collection'.",
"Query the collection 'test_collection' for documents similar to 'test document' and return the top 2 results.",
"Update the document with ID 'doc2' in 'test_collection' to have the content: 'This is the updated second test document.'",
"Delete the document with ID 'doc1' from the collection 'test_collection'.",
"Fork the collection 'test_collection' into a new collection called 'test_collection_fork'.",
"Rename the collection 'test_collection' to 'test_collection_renamed'.",
"List all collections in Chroma Cloud.",
"Delete the collection named 'test_collection_renamed'."
],
"tools": [
"chroma_create_collection",
"chroma_add_documents",
"chroma_get_documents",
"chroma_get_collection_info",
"chroma_get_collection_count",
"chroma_peek_collection",
"chroma_query_documents",
"chroma_update_documents",
"chroma_delete_documents",
"chroma_fork_collection",
"chroma_modify_collection",
"chroma_list_collections",
"chroma_delete_collection"
]
}
20 changes: 20 additions & 0 deletions src/data/databrick_genie.MCP.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"prompts": [
"Connect to the Databricks Genie Spaces MCP server.",
"List all available tools under Databricks Genie Spaces.",
"Attempt to fetch data without specifying any parameters.",
"What is the total sales for Q1 2024?",
"Give me insights on the trend of product sales over the last year.",
"How many new customers did we acquire in the last 3 months?",
"Which products had the highest sales last week in region A?",
"Run a serverless SQL query to fetch the top 10 customers by revenue.",
"Use serverless SQL to aggregate sales data by product category.",
"Run a complex serverless SQL query on a large dataset.",
"Show me the trend of website traffic for the last 6 months.",
"Give me the top 5 sales-performing regions over the past quarter.",
"Request data insights across multiple datasets concurrently.",
"Run multiple BI queries about different products and regions at the same time.",
"Search for product sales data across multiple years and regions."
],
"tools": []
}
17 changes: 17 additions & 0 deletions src/data/databrick_unity.MCP.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"prompts": [
"Is Databricks Unity Catalog Functions connected?",
"Execute the function get_user_activity from analytics_catalog.user_schema catalog with the following parameters: start_date='2025-10-01', end_date='2025-10-10'.",
"Execute the function current_timestamp from system.default catalog with no parameters.",
"Execute the function date_add from system.default catalog with the following parameters: start_date='2025-10-01', days=5.",
"Run the function get_total_sales from sales_catalog.monthly_reports with the parameter month='September'.",
"Execute the function get_user_activity from analytics_catalog.user_schema with the parameters start_date='invalid_date' and end_date='2025-10-10' to test error handling.",
"Run the function calculate_sales with an invalid parameter region_code='XYZ' that does not exist in the valid list.",
"Execute the function concat from system.default catalog with the following parameters: str1='Hello', str2='World'.",
"Execute the function non_existent_function from default.test_schema catalog to test function not found error.",
"Execute the function current_date from system.default catalog with invalid parameter test_param='xyz' to test parameter validation.",
"SELECT analytics_catalog.user_schema.get_user_activity('2025-10-01', '2025-10-10');",
"Run the SQL query: SELECT analytics_catalog.user_schema.get_user_activity('2025-10-01', '2025-10-10');"
],
"tools": []
}
25 changes: 25 additions & 0 deletions src/data/databrick_vector.MCP.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"prompts": [
"Connect to the Databricks Vector Search MCP server.",
"List all available tools under Databricks Vector Search.",
"Fetch data from Databricks using the vector model without specifying parameters.",
"Search for documents related to 'machine learning models'.",
"Perform semantic search using the vector embedding for the phrase 'neural networks in deep learning'.",
"Find similar items to the embedding for the phrase 'cloud computing infrastructure'.",
"Search for articles about 'distributed systems' and return results that are more relevant to 'data replication in databases'.",
"Find similar items to a non-existent or irrelevant embedding.",
"List all available vector search indexes in the system.",
"Create a new vector search index for the dataset 'customer reviews'.",
"Modify the index 'product_reviews' by adding a new field for 'review_rating'.",
"Delete the 'outdated_documents' index from the search database.",
"Create an index without specifying a dataset.",
"Use serverless compute to run a semantic search over the 'tech_education' dataset.",
"Check if the serverless compute is being utilized for this search operation.",
"Perform a search on a very large dataset that may result in a timeout.",
"Run the same semantic search query twice: 'data analytics tools for 2024'.",
"Search for content related to 'AWS EC2' from documents published after 2020.",
"Fetch results for 'cloud services' and include 'cloud_security' tag.",
"Perform a semantic search on the entire AWS documentation with a high query volume."
],
"tools": []
}
Loading