Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,418 changes: 1,418 additions & 0 deletions benchmarks/FireScale/databricks/benchmark.sql

Large diffs are not rendered by default.

52 changes: 52 additions & 0 deletions benchmarks/FireScale/databricks/queries.json

Large diffs are not rendered by default.

96 changes: 96 additions & 0 deletions benchmarks/FireScale/databricks/setup.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
DROP AGGREGATING INDEX IF EXISTS idx_by_day;
DROP TABLE IF EXISTS uservisits;
DROP TABLE IF EXISTS rankings;
DROP TABLE IF EXISTS ipaddresses;
DROP TABLE IF EXISTS agents;
DROP TABLE IF EXISTS searchwords;

CREATE TABLE "uservisits" ("sourceip" text NOT NULL,
"destinationurl" text NOT NULL,
"visitdate" pgdate NOT NULL,
"adrevenue" REAL NOT NULL,
"useragent" text NOT NULL,
"countrycode" text NOT NULL,
"languagecode" text NOT NULL,
"searchword" text NOT NULL,
"duration" integer NOT NULL)
PRIMARY INDEX "visitdate", "destinationurl", "sourceip";

CREATE TABLE "ipaddresses" ("ip" text NOT NULL,
"autonomoussystem" integer NOT NULL,
"asname" text NOT NULL)
PRIMARY INDEX "ip";

CREATE TABLE "rankings" ("pageurl" text NOT NULL,
"pagerank" integer NULL,
"avgduration" integer NOT NULL)
PRIMARY INDEX "pageurl";

CREATE TABLE "agents" ("id" integer NOT NULL,
"agentname" text NOT NULL,
"operatingsystem" text NOT NULL,
"devicearch" text NOT NULL,
"browser" text NOT NULL);

CREATE TABLE "searchwords" ("word" text NOT NULL,
"word_hash" bigint NOT NULL,
"word_id" bigint NOT NULL,
"firstseen" pgdate NOT NULL,
"is_topic" boolean NOT NULL);

COPY
INTO
uservisits
FROM
's3://firebolt-benchmarks-requester-pays-us-east-1/firenewt/1tb/uservisits/gz-parquet/'
WITH
CREDENTIALS = (AWS_ROLE_ARN = 'arn:aws:iam::442042532160:role/FireboltS3DatasetsAccess')
TYPE = parquet;

COPY
INTO
rankings
FROM
's3://firebolt-benchmarks-requester-pays-us-east-1/firenewt/1tb/rankings/'
WITH
CREDENTIALS = (AWS_ROLE_ARN = 'arn:aws:iam::442042532160:role/FireboltS3DatasetsAccess')
TYPE = parquet;

COPY
INTO
ipaddresses
FROM
's3://firebolt-benchmarks-requester-pays-us-east-1/firenewt/1tb/dimensions/ipaddresses/'
WITH
CREDENTIALS = (AWS_ROLE_ARN = 'arn:aws:iam::442042532160:role/FireboltS3DatasetsAccess')
TYPE = parquet;

COPY
INTO
agents
FROM
's3://firebolt-benchmarks-requester-pays-us-east-1/firenewt/1tb/dimensions/agents/'
WITH
CREDENTIALS = (AWS_ROLE_ARN = 'arn:aws:iam::442042532160:role/FireboltS3DatasetsAccess')
TYPE = parquet;

COPY
INTO
searchwords
FROM
's3://firebolt-benchmarks-requester-pays-us-east-1/firenewt/1tb/dimensions/searchwords/'
WITH
CREDENTIALS = (AWS_ROLE_ARN = 'arn:aws:iam::442042532160:role/FireboltS3DatasetsAccess')
TYPE = parquet;

VACUUM uservisits;

VACUUM uservisits;

VACUUM rankings;

VACUUM searchwords;

VACUUM agents;

VACUUM ipaddresses;
Comment on lines +86 to +96
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you VACUUM in Databricks / have you validated that this setup file works for Databricks?

3 changes: 3 additions & 0 deletions clients/python/src/connectors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
from .firebolt import FireboltConnector
from .redshift import RedshiftConnector
from .snowflake import SnowflakeConnector
from .databricks import DatabricksConnector

__all__ = [
"FireboltConnector",
"SnowflakeConnector",
"BigQueryConnector",
"RedshiftConnector",
"DatabricksConnector"
]


Expand All @@ -18,6 +20,7 @@ def get_connector_class(vendor: str):
"firebolt": FireboltConnector,
"bigquery": BigQueryConnector,
"redshift": RedshiftConnector,
"databricks": DatabricksConnector
}
if vendor not in connector_map:
raise ValueError(f"Unsupported vendor: {vendor}")
Expand Down
67 changes: 67 additions & 0 deletions clients/python/src/connectors/databricks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from databricks import sql
from typing import Any, Dict, Optional, List

class DatabricksConnector:
def __init__(self, config: Dict[str, str]):
"""
Initialize Databricks connector with configuration parameters.

Args:
config (Dict[str, str]): Configuration dictionary containing:
"server_hostname": "your sql warehouse hostname",
"http_path": "http path for warehouse",
"access_token": "your databricks personal access token",
"catalog": "Databricks warehouse name",
"schema": "Databricks schema name"
"""
self.config = config
self._validate_config()
self._conn = None
self.cursor = None

def _validate_config(self) -> None:
"""Validate that required configuration parameters are present."""
required_params = ['server_hostname', 'http_path', 'access_token', 'catalog', 'schema']
missing_params = [param for param in required_params if param not in self.config]
if missing_params:
raise ValueError(f"Missing required configuration parameters: {missing_params}")

def connect(self) -> None:
"""Connect to Databricks using stored configuration."""
if not self._conn:
self._conn = sql.connect(
server_hostname=self.config['server_hostname'],
http_path=self.config['http_path'],
access_token=self.config['access_token'],
catalog=self.config['catalog'],
schema=self.config['schema']
)
self.cursor = self._conn.cursor()
self.cursor.execute("SET use_cached_result = false;")

def execute_query(self, query: str, params: Optional[Dict[str, Any]] = None) -> List[Dict]:
"""
Execute a SQL query and return results as a list of dictionaries.

Args:
query (str): SQL query to execute
params (Optional[Dict[str, Any]]): Query parameters for parameterized queries

Returns:
List[Dict]: Query results as a list of dictionaries
"""
if not self._conn or not self.cursor:
self.connect()

try:
self.cursor.execute(query, params or {})
return self.cursor.fetchall()
except Exception as e:
raise Exception(f"Error executing query: {str(e)}")

def close(self) -> None:
"""Close the Databricks connection if it exists."""
if self._conn:
self._conn.close()
self._conn = None
self.cursor = None
7 changes: 7 additions & 0 deletions config/credentials/sample_credentials.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,12 @@
"project_id": "your_project_id",
"dataset": "your_dataset",
"key": "your json key generated from google cloud"
},
"databricks": {
"server_hostname": "your Databricks SQL warehouse server hostname",
"http_path": "your Databricks SQL warehouse http path",
"access_token": "your Databricks personal access token",
"catalog": "your Databricks catalog",
"schema": "your Databricks schema"
}
}