feat: Converted app to fastapi app.

183e9eb0 · harshavardhan.c · 7845d0b6 · 183e9eb0 · 183e9eb0 · 183e9eb0
Commit 183e9eb0 authored Aug 08, 2025 by harshavardhan.c
24 changed files
--- a/__version__.py
+++ b/__version__.py
+__version__ = "v1.0.0"
--- a/agent_subscribers.py
+++ b/agent_subscribers.py
@@ -20,7 +20,7 @@ broker = KafkaBroker(
 async def consume_stream_for_processing_dependencies(message: dict):
    try:
        await ModelCreatorAgent.model_creator_agent(
-            message=ModelCreatorSchema(meta=message)
+            message=ModelCreatorSchema(**message)
        )
        return True
    except Exception as e:

--- a/app.py
+++ b/app.py
-# app.py
-import asyncio
-import logging as logger
-import sys
+import gc

-from dotenv import load_dotenv
+gc.collect()

-load_dotenv()
-from faststream import FastStream
-from ut_dev_utils import configure_logger
+import argparse

-from agent_subscribers import broker
+ap = argparse.ArgumentParser()

-configure_logger()
+if __name__ == "__main__":
+    from dotenv import load_dotenv

-# Create FastStream app
-app = FastStream(broker)
+    load_dotenv()

+    from ut_dev_utils import configure_logger

-async def run_app():
-    try:
-        logger.info("Starting FastStream application...")
-        await app.run()
-    except KeyboardInterrupt:
-        logger.info("Application interrupted by user")
-    except Exception as e:
-        logger.error(f"Application error: {e}")
-        raise
-    finally:
-        logger.info("Application shutdown complete")
+    configure_logger()

+    import asyncio
+    import logging as logger
+    import sys

-# Main execution
-if __name__ == "__main__":
-    try:
-        # For better performance on Linux/Mac, use uvloop if available
-        if sys.platform != "win32":
-            try:
-                import uvloop
-
-                asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
-                logger.info("Using uvloop for better performance")
-            except ImportError:
-                logger.info("uvloop not available, using default event loop")
-        # Run the application
-        asyncio.run(run_app())
-    except KeyboardInterrupt:
-        print("\nApplication stopped by user")
-    except Exception as e:
-        logger.error(f"Failed to start application: {e}")
-        sys.exit(1)
+    from scripts.config import Services
+
+    ap.add_argument(
+        "--port",
+        "-p",
+        required=False,
+        default=Services.PORT,
+        help="Port to start the application.",
+    )
+    ap.add_argument(
+        "--bind",
+        "-b",
+        required=False,
+        default=Services.HOST,
+        help="IP to start the application.",
+    )
+    arguments = vars(ap.parse_args())
+    logger.info(f"App Starting at {arguments['bind']}:{arguments['port']}")
+
+    if sys.platform == "win32":
+        import uvicorn
+
+        asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
+        uvicorn.run(
+            "main:app",
+            host=arguments["bind"],
+            port=int(arguments["port"]),
+            root_path="",
+        )
+    else:
+        from granian import Granian
+        from granian.constants import Interfaces
+
+        Granian(
+            "main:app",
+            address=arguments["bind"],
+            port=int(arguments["port"]),
+            interface=Interfaces.ASGI,
+            log_access=True,
+            log_enabled=True,
+            respawn_failed_workers=True,
+            threads=10,
+            threading_mode="runtime",
+        ).serve()
--- a/main.py
+++ b/main.py
+import sys
+
+from ut_dev_utils import FastAPIConfig, generate_fastapi_app
+from ut_dev_utils.errors.exception_handlers import ExceptionHandlers
+
+from __version__ import __version__
+from scripts.config import PROJECT_NAME
+from scripts.core.services import router
+
+description = """
+Databricks Platform Automation microservice for FTDMPC.
+"""
+
+tags_metadata = []
+
+app_config = FastAPIConfig(
+    title="Databricks Platform Automation APP",
+    description=description,
+    version=__version__,
+    root_path="" if sys.platform == "win32" else "/dbx_mgmt",
+    tags_metadata=tags_metadata,
+    exception_handlers={
+        Exception: ExceptionHandlers.generic_exception_handler,
+    },
+)
+
+app = generate_fastapi_app(
+    app_config,
+    routers=[router],
+    project_name=PROJECT_NAME,
+    enable_default_openapi=True,
+)
--- a/scripts/config/__init__.py
+++ b/scripts/config/__init__.py
@@ -61,27 +61,18 @@ class _KafkaConfig(BaseSettings):


 class _DatabricksConfig(BaseSettings):
-    DATABRICKS_HOST: str
-    DATABRICKS_PORT: int = Field(default=443)
-    DATABRICKS_URI: str
-    DATABRICKS_HTTP_PATH: str
-    DATABRICKS_ACCESS_TOKEN: str
+    DATABRICKS_DEFAULT_PORT: int = Field(default=443)
    DATABRICKS_CATALOG_NAME: str = Field(default="unified_model")
    DATABRICKS_PUBLIC_SCHEMA_NAME: str = Field(default="public")
    DATABRICKS_ANALYTICAL_SCHEMA_NAME: str = Field(default="analytical")
    DATABRICKS_STORAGE_FORMAT: str = Field(default="PARQUET")
-    DATABRICKS_STORAGE_PATH: str = Field(
-        default="abfss://unity-catalog-storage@dbstoragenzxfhpgsipt5a.dfs.core.windows.net/416418955412087"
-    )
-
-    @model_validator(mode="before")
-    def prepare_databricks_uri(cls, values):
-        values["DATABRICKS_URI"] = (
-            f"databricks://token:{values['DATABRICKS_ACCESS_TOKEN']}@{values['DATABRICKS_HOST']}:{values['DATABRICKS_PORT']}"
-            f"?http_path={values['DATABRICKS_HTTP_PATH']}"
-        )
-
-        return values
+    DATABRICKS_CLUSTER_NAME: str = Field(default="UT-Steaming-Cluster")
+    DATABRICKS_CLUSTER_DISK_SIZE: int = Field(default=150)
+    DATABRICKS_CLUSTER_MIN_WORKERS: int = Field(default=1)
+    DATABRICKS_CLUSTER_SPARK_VERSION: str = Field(default="15.4.x-scala2.12")
+    DATABRICKS_CLUSTER_RUNTIME_VERSION: str = Field(default="9.1")
+    DATABRICKS_CLUSTER_NODE_TYPE_ID: str = Field(default="Standard_DS3_v2")
+    DATABRICKS_CLUSTER_DRIVER_NODE_TYPE_ID: str = Field(default="Standard_DS3_v2")


 Services = _Services()
@@ -98,4 +89,5 @@ __all__ = [
    "PathToStorage",
    "KafkaConfig",
    "DatabricksConfig",
+    "PROJECT_NAME",
 ]
--- a/scripts/constants/__init__.py
+++ b/scripts/constants/__init__.py
 class DatabricksConstants:
    METADATA_INGESTION_JOB_NAME = "metadata_ingestion_job"
    METADATA_DELETION_JOB_NAME = "metadata_deletion_job"
+    TIMESERIES_INGESTION_JOB_NAME = "timeseries_ingestion_job"
    METADATA_INGESTION_NOTEBOOK_NAME = "metadata_ingestion_notebook"
    METADATA_DELETION_NOTEBOOK_NAME = "metadata_deletion_notebook"
    TIMESERIES_INGESTION_NOTEBOOK_NAME = "timeseries_ingestion_notebook"
+    VOLUME_NAME = "unity_catalog_storage"


 class NotebookConstants:

--- a/scripts/constants/notebooks/timeseries_ingestion.txt
+++ b/scripts/constants/notebooks/timeseries_ingestion.txt
--- a/scripts/core/handlers/model_creator_handler.py
+++ b/scripts/core/handlers/model_creator_handler.py
--- a/scripts/core/services/__init__.py
+++ b/scripts/core/services/__init__.py
+from fastapi import APIRouter
+
+router = APIRouter()
+
+from .v1 import v1_router
+
+router.include_router(v1_router)
--- a/scripts/core/services/v1/__init__.py
+++ b/scripts/core/services/v1/__init__.py
+from fastapi import APIRouter
+
+v1_router = APIRouter(prefix="/api/v1")
+
+__all__ = ["v1_router"]
+
+from .model_creator_services import model_creator_router
+
+v1_router.include_router(model_creator_router)
--- a/scripts/core/services/v1/model_creator_services.py
+++ b/scripts/core/services/v1/model_creator_services.py
+import logging
+from typing import Annotated
+
+from fastapi import BackgroundTasks
+from fastapi.params import Depends, Query
+from faststream.confluent.fastapi import KafkaRouter
+from ut_dev_utils.responses import DefaultResponseSchema
+from ut_security_util import MetaInfoSchema
+from ut_sql_utils.asyncio.declarative_utils import DeclarativeUtils
+
+from scripts.config import KafkaConfig
+from scripts.core.handlers.model_creator_handler import ModelCreatorHandler
+from scripts.db.psql import get_declarative_utils
+from scripts.decorators.databricks_validator import get_databricks_config
+from scripts.schemas import ModelCreatorSchema
+
+model_creator_router = KafkaRouter(KafkaConfig.KAFKA_URI)
+
+
+@model_creator_router.get("/model_creator")
+async def add_to_stream(
+    meta: MetaInfoSchema,
+    bg_task: BackgroundTasks,
+    payload: Annotated[ModelCreatorSchema, Depends(get_databricks_config)],
+    declarative_utils: DeclarativeUtils = Depends(get_declarative_utils),
+    analytical: bool = Query(default=False),
+):
+    model_cal_obj = ModelCreatorHandler(
+        declarative_utils=declarative_utils, meta=meta, message=payload
+    )
+    logging.info("Adding background task for model creation...")
+    bg_task.add_task(
+        model_cal_obj.create_models_in_unity_catalog, analytical=analytical
+    )
+    return DefaultResponseSchema(message="Model creation task added to stream")
--- a/scripts/db/databricks/__init__.py
+++ b/scripts/db/databricks/__init__.py
-from typing import Dict, List
-
-from sqlalchemy import (
-    BigInteger,
-    Column,
-    Date,
-    DateTime,
-    Integer,
-    MetaData,
-    String,
-    Table,
-)
-
-from scripts.utils.databricks_utils import DatabricksSQLUtility
-from scripts.utils.model_convertor_utils import TypeMapper
-
-
-class DataBricksSQLLayer(DatabricksSQLUtility):
-    def __init__(self, catalog_name: str, project_id: str, schema: str):
-        super().__init__(catalog_name, project_id)
-        self.schema = schema
-
-    def create_external_table_from_structure(
-        self,
-        table: Table,
-        external_location: str,
-        file_format: str = "PARQUET",
-        table_properties: Dict[str, str] = None,
-        partition_columns: list = None,
-    ) -> str:
-        """
-        Create an external table from a model class.
-
-        Args:
-            table: The model class to create the external table from.
-            external_location: The external location path.
-            file_format: The file format of the data files.
-            table_properties: Additional table properties.
-            partition_columns: List of columns to partition the table by.
-
-        Returns:
-            External Location - Returns the external location
+class DatabricksManager:
+    def __init__(self, databricks_host: str, access_token: str):
        """
-        schema_table = f"{table.schema}.{table.name}" if table.schema else table.name
-        columns_sql = TypeMapper().extract_columns_without_constraints(table)
-        external_location = (
-            f"{external_location}/{self.catalog_name}/{file_format}/{schema_table}"
-        )
-        sql_parts = [
-            f"CREATE TABLE IF NOT EXISTS {schema_table}",
-            f"({columns_sql})",
-            f"USING {file_format}",
-            f"LOCATION '{external_location}'",
-        ]
-        if partition_columns:
-            partition_clause = ", ".join(partition_columns)
-            sql_parts.append(f"PARTITIONED BY ({partition_clause})")
-
-        if table_properties:
-            props = [f"'{k}' = '{v}'" for k, v in table_properties.items()]
-            props_sql = ",\n    ".join(props)
-            sql_parts.append(f"TBLPROPERTIES (\n    {props_sql}\n)")
-
-        create_sql = "\n".join(sql_parts)
-        self.execute_sql_statement(create_sql)
-        return external_location
+        Initialize Databricks Manager

-    def create_timeseries_table(self, columns: List[str], external_location: str):
-        """
-        Create a timeseries table model and all columns will be of type String
        Args:
-            columns: List of columns in the table
-            external_location: The external location path
-            Example:
-                columns = [l1,l2,enterprise]
-        Returns:
-            Timeseries Table model
+            databricks_host: Your Databricks workspace URL
+            access_token: Personal access token or service principal token
        """
-
-        table_columns = [
-            Column("timestamp", BigInteger, nullable=False),
-            Column("dt_timestamp", DateTime, nullable=False),
-            Column("dt_date", Date, nullable=False),
-            Column("dt_hour", Integer, nullable=False),
-            Column("value", String, nullable=False),
-            Column("value_type", String, nullable=False, default="float"),
-            Column("c3", String, nullable=False),
-        ]
-        default_columns = ["c1", "c5", "Q", "T", "D", "P", "A", "B", *columns]
-        table_columns.extend(
-            [Column(col_name, String, nullable=True) for col_name in default_columns]
+        self.host = (
+            databricks_host
+            if "https://" in databricks_host
+            else f"https://{databricks_host}"
        )
-        partition_columns = ["dt_date", "dt_hour", "c3"]
-        table_properties = {
-            "parquet.compression": "snappy",  # Fast decompression for frequent queries
-            "parquet.page.size": "524288",  # 512KB - better time-range filtering
-            "parquet.block.size": "268435456",  # 256MB - efficient sequential reads
-            "serialization.format": "1",  # Support for arrays/complex types
+        self.headers = {
+            "Authorization": f"Bearer {access_token}",
+            "Content-Type": "application/json",
        }
-        table_obj = Table(
-            "timeseries_data", MetaData(), *table_columns, schema=self.schema
-        )
-        self.create_external_table_from_structure(
-            table=table_obj,
-            external_location=external_location,
-            partition_columns=partition_columns,
-            table_properties=table_properties,
-        )
-        return external_location
--- a/scripts/db/databricks/cluster_manager.py
+++ b/scripts/db/databricks/cluster_manager.py
+import logging
+import time
+from typing import Union
+
+from scripts.config import DatabricksConfig
+from scripts.db.databricks import DatabricksManager
+from scripts.utils.httpx_util import HTTPXRequestUtil
+
+
+class DatabricksClusterManager(DatabricksManager):
+    def __init__(self, databricks_host: str, access_token: str):
+        """
+        Initialize Databricks cluster manager
+        databricks_host: Your Databricks workspace URL
+        access_token: Personal access token or service principal token
+        """
+        super().__init__(databricks_host, access_token)
+        self.base_url = f"{self.host}/api/2.1/clusters"
+
+    def create_cluster(self, cluster_config: dict):
+        """
+        Create a new cluster in Databricks
+
+        Args:
+            cluster_config: Dictionary containing cluster configuration
+
+        Returns:
+            str: Cluster ID if successful, None if failed
+        """
+        url = f"{self.base_url}/create"
+        response = HTTPXRequestUtil(url).post(headers=self.headers, json=cluster_config)
+        if response.status_code != 200:
+            logging.error(f"Failed to create cluster: {response.text}")
+            return None
+        cluster_id = response.json().get("cluster_id")
+        if not cluster_id:
+            logging.error("No cluster_id returned from create request")
+            return None
+        logging.info(f"Cluster created with ID: {cluster_id}")
+        # Wait for cluster to be ready
+        if self.wait_for_cluster_ready(cluster_id):
+            logging.info(f"Cluster {cluster_id} is ready for use!")
+        else:
+            logging.error(f"Cluster {cluster_id} failed to start within timeout")
+        return cluster_id
+
+    def fetch_cluster_stats(self, cluster_id) -> dict:
+        """
+        Fetch the status of a cluster
+        Args:
+            cluster_id: The ID of the cluster
+        """
+        url = f"{self.base_url}/get"
+        params = {"cluster_id": cluster_id}
+        response = HTTPXRequestUtil(url).get(headers=self.headers, params=params)
+        if response.status_code == 200:
+            return response.json()
+        else:
+            logging.error(f"Error checking cluster: {response.text}")
+            return {}
+
+    def start_cluster(self, cluster_id: str) -> bool:
+        """
+        Start a terminated cluster
+        Args:
+            cluster_id: ID of the cluster to start
+
+        Returns:
+            bool: True if start request successful, False otherwise
+        """
+        url = f"{self.base_url}/start"
+        payload = {"cluster_id": cluster_id}
+        response = HTTPXRequestUtil(url).post(headers=self.headers, json=payload)
+        if response.status_code != 200:
+            logging.error(f"Failed to create cluster: {response.text}")
+            return False
+        cluster_id = response.json().get("cluster_id")
+        if not cluster_id:
+            logging.error("No cluster_id returned from create request")
+            return False
+        logging.info(f"Cluster created with ID: {cluster_id}")
+        # Wait for cluster to be ready
+        if self.wait_for_cluster_ready(cluster_id):
+            logging.info(f"Cluster {cluster_id} is ready for use!")
+        else:
+            logging.error(f"Cluster {cluster_id} failed to start within timeout")
+        return True
+
+    def get_existing_cluster_by_name(self, cluster_name: str) -> Union[None, dict]:
+        """
+        Check if a cluster with the given name already exists
+
+        Args:
+            cluster_name: Name of the cluster to search for
+
+        Returns:
+            dict: Cluster info if found, None if not found
+        """
+        url = f"{self.base_url}/list"
+        response = HTTPXRequestUtil(url).get(headers=self.headers)
+        if response.status_code == 200:
+            clusters = response.json().get("clusters", [])
+            for cluster in clusters:
+                if cluster.get("cluster_name") == cluster_name:
+                    return cluster
+        else:
+            logging.warning(f"Warning: Could not list clusters: {response.text}")
+
+        return None
+
+    def get_streaming_cluster_config(
+        self, cluster_name: str = "UT-Steaming-Cluster"
+    ) -> dict:
+        """
+        Get configuration for a continuous streaming cluster optimized for Event Hub processing
+        Args:
+            cluster_name: Name for the cluster (default: "UT-Steaming-Cluster")
+        Returns:
+            dict: Complete cluster configuration
+        """
+        return {
+            "cluster_name": cluster_name,
+            "spark_version": DatabricksConfig.DATABRICKS_CLUSTER_SPARK_VERSION,
+            "node_type_id": DatabricksConfig.DATABRICKS_CLUSTER_NODE_TYPE_ID,  # 8 cores, 16GB RAM
+            "driver_node_type_id": DatabricksConfig.DATABRICKS_CLUSTER_DRIVER_NODE_TYPE_ID,
+            # CRITICAL: Never auto-terminate
+            "auto_termination_minutes": 0,  # 0 = NEVER terminate
+            # Auto-scaling for variable loads
+            "autoscale": {
+                "min_workers": DatabricksConfig.DATABRICKS_CLUSTER_MIN_WORKERS,  # Minimum cost
+                "max_workers": 8,  # Scale up for high Event Hub volume
+            },
+            # "is_single_node": True,
+            # Streaming optimizations
+            "spark_conf": self.get_spark_config(),
+            # Reliability settings
+            "azure_attributes": {
+                "availability": "ON_DEMAND_AZURE",  # Most reliable
+                "first_on_demand": 1,
+            },
+            # Storage for checkpoints and logs
+            "enable_elastic_disk": True,
+            "disk_spec": {
+                "disk_type": {"azure_disk_volume_type": "PREMIUM_LRS"},
+                "disk_size": DatabricksConfig.DATABRICKS_CLUSTER_DISK_SIZE,
+            },
+            # Monitoring tags
+            "custom_tags": {
+                "purpose": "continuous_streaming",
+                "workload": "eventhub_processing",
+                "criticality": "high",
+                "auto_terminate": "never",
+            },
+            # Unity Catalog
+            "data_security_mode": "SINGLE_USER",
+        }
+
+    @staticmethod
+    def get_spark_config() -> dict:
+        return {
+            "spark.executor.memory": "6g",
+            "spark.driver.memory": "5g",
+            "spark.executor.cores": "3",  # Reduced from 4 to 3 (leave 1 core for OS)
+            "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
+            "spark.executor.instances": "4",
+            "spark.sql.shuffle.partitions": "32",
+            "spark.executor.extraJavaOptions": "-XX:+UseG1GC -XX:MaxGCPauseMillis=200",
+            "spark.driver.extraJavaOptions": "-XX:+UseG1GC -XX:MaxGCPauseMillis=200",
+        }
+
+    def wait_for_cluster_ready(
+        self, cluster_id: str, timeout_minutes: int = 10
+    ) -> bool:
+        """
+        Wait for cluster with exponential backoff for more efficient polling
+        """
+        timeout_seconds = timeout_minutes * 60
+        start_time = time.time()
+        check_interval = 10  # Start with 10 seconds
+        max_interval = 90  # Max 90 seconds between checks
+
+        while time.time() - start_time < timeout_seconds:
+            cluster_stats = self.fetch_cluster_stats(cluster_id)
+
+            if cluster_stats:
+                state = cluster_stats.get("state", "UNKNOWN")
+
+                if state == "RUNNING":
+                    return True
+                elif state in ["TERMINATED", "TERMINATING", "ERROR"]:
+                    return False
+                elif state in ["PENDING", "RESTARTING", "RESIZING"]:
+                    # These are transitional states - keep waiting
+                    logging.info(
+                        f"Cluster {cluster_id} is starting... Current state: {state}"
+                    )
+                else:
+                    logging.warning(f"Unknown cluster state: {state}")
+
+            # Exponential backoff
+            logging.info(
+                f"Cluster {cluster_id} not ready yet. Waiting {check_interval} seconds..."
+            )
+            time.sleep(check_interval)
+
+            check_interval = min(check_interval * 1.5, max_interval)
+
+        return False
+
+    def get_http_path_details_by_cluster_id(self, cluster_id: str, workspace_url: str):
+        return f"/sql/protocolv1/o/{self.extract_org_id(workspace_url)}/{cluster_id}"
+
+    @staticmethod
+    def extract_org_id(workspace_url: str):
+        """Extract organization ID from Azure Databricks URL"""
+        # From URL like: https://adb-416418955412087.7.azuredatabricks.net
+        # Extract: 416418955412087
+        import re
+
+        match = re.search(r"adb-(\d+)", workspace_url.replace("https://", ""))
+        return match.group(1) if match else None
--- a/scripts/db/databricks/external_table_manager.py
+++ b/scripts/db/databricks/external_table_manager.py
+from typing import Dict, List
+
+from sqlalchemy import (
+    BigInteger,
+    Column,
+    Date,
+    DateTime,
+    Integer,
+    MetaData,
+    String,
+    Table,
+)
+
+from scripts.utils.databricks_utils import DatabricksSQLUtility
+from scripts.utils.model_convertor_utils import TypeMapper
+
+
+class DataBricksSQLLayer(DatabricksSQLUtility):
+    def __init__(self, catalog_name: str, project_id: str, schema: str):
+        super().__init__(catalog_name=catalog_name, project_id=project_id)
+        self.schema = schema
+
+    def create_external_table_from_structure(
+        self,
+        table: Table,
+        external_location: str,
+        file_format: str = "PARQUET",
+        table_properties: Dict[str, str] = None,
+        partition_columns: list = None,
+    ) -> str:
+        """
+        Create an external table from a model class.
+
+        Args:
+            table: The model class to create the external table from.
+            external_location: The external location path.
+            file_format: The file format of the data files.
+            table_properties: Additional table properties.
+            partition_columns: List of columns to partition the table by.
+
+        Returns:
+            External Location - Returns the external location
+        """
+        schema_table = f"{table.schema}.{table.name}" if table.schema else table.name
+        columns_sql = TypeMapper().extract_columns_without_constraints(table)
+        external_location = (
+            f"{external_location}/{self.catalog_name}/{file_format}/{schema_table}"
+        )
+        sql_parts = [
+            f"CREATE TABLE IF NOT EXISTS {schema_table}",
+            f"({columns_sql})",
+            f"USING {file_format}",
+            f"LOCATION '{external_location}'",
+        ]
+        if partition_columns:
+            partition_clause = ", ".join(partition_columns)
+            sql_parts.append(f"PARTITIONED BY ({partition_clause})")
+
+        if table_properties:
+            props = [f"'{k}' = '{v}'" for k, v in table_properties.items()]
+            props_sql = ",\n    ".join(props)
+            sql_parts.append(f"TBLPROPERTIES (\n    {props_sql}\n)")
+
+        create_sql = "\n".join(sql_parts)
+        self.execute_sql_statement(create_sql)
+        return external_location
+
+    def create_timeseries_table(self, columns: List[str], external_location: str):
+        """
+        Create a timeseries table model and all columns will be of type String
+        Args:
+            columns: List of columns in the table
+            external_location: The external location path
+            Example:
+                columns = [l1,l2,enterprise]
+        Returns:
+            Timeseries Table model
+        """
+
+        table_columns = [
+            Column("timestamp", BigInteger, nullable=False),
+            Column("dt_timestamp", DateTime, nullable=False),
+            Column("dt_date", Date, nullable=False),
+            Column("dt_hour", Integer, nullable=False),
+            Column("value", String, nullable=False),
+            Column("value_type", String, nullable=False, default="float"),
+            Column("c3", String, nullable=False),
+        ]
+        default_columns = ["c1", "c5", "Q", "T", "D", "P", "A", "B", *columns]
+        table_columns.extend(
+            [Column(col_name, String, nullable=True) for col_name in default_columns]
+        )
+        partition_columns = ["dt_date", "dt_hour", "c3"]
+        table_properties = {
+            "parquet.compression": "snappy",  # Fast decompression for frequent queries
+            "parquet.page.size": "524288",  # 512KB - better time-range filtering
+            "parquet.block.size": "268435456",  # 256MB - efficient sequential reads
+            "serialization.format": "1",  # Support for arrays/complex types
+        }
+        table_obj = Table(
+            "timeseries_data", MetaData(), *table_columns, schema=self.schema
+        )
+        self.create_external_table_from_structure(
+            table=table_obj,
+            external_location=external_location,
+            partition_columns=partition_columns,
+            table_properties=table_properties,
+        )
+        return external_location
--- a/scripts/db/databricks/job_manager.py
+++ b/scripts/db/databricks/job_manager.py
 import logging
+from typing import Dict, List

-from ut_security_util.security_tools.auth_util import HTTPXRequestHandler
-
+from scripts.db.databricks import DatabricksManager
 from scripts.utils.httpx_util import HTTPXRequestUtil


-class DatabricksJobManager:
+class DatabricksJobManager(DatabricksManager):
    def __init__(self, databricks_host: str, access_token: str):
        """
        Initialize Databricks job manager
@@ -14,15 +14,8 @@ class DatabricksJobManager:
            databricks_host: Your Databricks workspace URL
            access_token: Personal access token or service principal token
        """
-        self.host = (
-            databricks_host
-            if "https://" in databricks_host
-            else f"https://{databricks_host}"
-        )
-        self.headers = {
-            "Authorization": f"Bearer {access_token}",
-            "Content-Type": "application/json",
-        }
+        super().__init__(databricks_host, access_token)
+        self.base_url = f"{self.host}/api/2.1/jobs"

    def create_job(self, job_config: dict):
        """
@@ -31,7 +24,7 @@ class DatabricksJobManager:
        Args:
            job_config: Dictionary containing job configuration
        """
-        url = f"{self.host}/api/2.1/jobs/create"
+        url = f"{self.base_url}/create"

        response = HTTPXRequestUtil(url).post(headers=self.headers, json=job_config)

@@ -53,7 +46,7 @@ class DatabricksJobManager:
            job_id: The ID of the job to run
            parameters: Dictionary of parameters to pass to the job
        """
-        url = f"{self.host}/api/2.1/jobs/run-now"
+        url = f"{self.base_url}/run-now"

        payload = {"job_id": job_id}

@@ -78,12 +71,10 @@ class DatabricksJobManager:
        Args:
            run_id: The ID of the job run
        """
-        url = f"{self.host}/api/2.1/jobs/runs/get"
+        url = f"{self.base_url}/runs/get"
        params = {"run_id": run_id}

-        response = HTTPXRequestHandler(url).get(
-            url, headers=self.headers, params=params
-        )
+        response = HTTPXRequestUtil(url).get(url, headers=self.headers, params=params)

        if response.status_code == 200:
            return response.json()
@@ -93,14 +84,57 @@ class DatabricksJobManager:
            )
            return None

+    def get_job_runs(
+        self, job_id: int, active_only: bool = False, limit: int = 20
+    ) -> List[Dict]:
+        url = f"{self.base_url}/runs/list"
+        params = {
+            "job_id": job_id,
+            "limit": limit,
+            "active_only": "true" if active_only else "false",
+        }
+
+        response = HTTPXRequestUtil(url).get(headers=self.headers, params=params)
+        response.raise_for_status()
+        return response.json().get("runs", [])
+
+    def is_job_running(self, job_id) -> Dict:
+        """
+        Check if a job has any active runs
+
+        Returns:
+            Dict with 'is_running' boolean and 'active_runs' list
+        """
+        try:
+            active_runs = self.get_job_runs(job_id, active_only=True)
+            running_states = ["PENDING", "RUNNING", "TERMINATING"]
+            active_running_runs = [
+                run
+                for run in active_runs
+                if run.get("state", {}).get("life_cycle_state") in running_states
+            ]
+
+            return {
+                "is_running": len(active_running_runs) > 0,
+                "active_runs": active_running_runs,
+                "total_active_runs": len(active_running_runs),
+            }
+        except Exception as e:
+            logging.error(f"Error checking job status: {e}")
+            return {"is_running": False, "active_runs": [], "total_active_runs": 0}
+
    @staticmethod
-    def create_job_config_for_serverless(notebook_path: str, job_name: str):
+    def create_job_config_for_serverless(
+        notebook_path: str, job_name: str, job_parameters: dict, tags: dict
+    ):
        """
        Create job configuration for a parameterized notebook

        Args:
            notebook_path: Path to the notebook in Databricks workspace
            job_name: Name of the job
+            job_parameters: Dictionary of parameters to pass to the notebook
+            tags: Dictionary of tags to apply to the job
        """

        return {
@@ -110,18 +144,44 @@ class DatabricksJobManager:
                    "task_key": "table_update_task",
                    "notebook_task": {
                        "notebook_path": notebook_path,
-                        "base_parameters": {"input_message": "default_value"},
+                        "base_parameters": job_parameters,
                    },
                    "timeout_seconds": 3600,
                }
            ],
            "max_concurrent_runs": 10,
-            "tags": {
-                "purpose": (
-                    "metadata_ingestion"
-                    if "ingestion" in job_name
-                    else "metadata_deletion"
-                ),
-                "compute_type": "serverless",
+            "tags": tags,
+        }
+
+    @staticmethod
+    def create_job_config_for_server(
+        notebook_path: str,
+        job_name: str,
+        job_parameters: dict,
+        tags: dict,
+        cluster_config: dict,
+    ):
+        """
+        Create job configuration for a parameterized notebook
+
+        Args:
+            notebook_path: Path to the notebook in Databricks workspace
+            job_name: Name of the job
+            job_parameters: Dictionary of parameters to pass to the notebook
+            tags: Dictionary of tags to apply to the job
+            cluster_config: Dictionary of cluster configuration ({"existing_cluster_id": cluster_id})
+        """
+
+        return {
+            "name": job_name,
+            **cluster_config,
+            "notebook_task": {
+                "notebook_path": notebook_path,
+                "base_parameters": job_parameters,
            },
+            "timeout_seconds": 0,  # No timeout - run indefinitely
+            "max_concurrent_runs": 1,
+            "max_retries": -1,  # Infinite retries
+            "retry_on_timeout": True,
+            "tags": tags,
        }
--- a/scripts/db/databricks/library_manager.py
+++ b/scripts/db/databricks/library_manager.py
+import logging
+from typing import List, Union
+
+from scripts.db.databricks import DatabricksManager
+from scripts.utils.httpx_util import HTTPXRequestUtil
+
+
+class DatabricksLibraryManager(DatabricksManager):
+    def __init__(self, databricks_host: str, access_token: str):
+        """
+        Initialize Databricks cluster manager
+        databricks_host: Your Databricks workspace URL
+        access_token: Personal access token or service principal token
+        """
+        super().__init__(databricks_host, access_token)
+        self.base_url = f"{self.host}/api/2.0/libraries"
+
+    def install_libraries(self, libraries: Union[str, list], cluster_id: str):
+        """
+        Install libraries in Databricks cluster
+        libraries: List of library names or single library name
+        cluster_id: ID of the cluster to install libraries in
+        """
+        url = f"{self.base_url}/install"
+        payload = {"cluster_id": cluster_id, "libraries": libraries}
+        response = HTTPXRequestUtil(url).post(headers=self.headers, json=payload)
+        if response.status_code == 200:
+            logging.info("Libraries installed successfully")
+        else:
+            logging.error(f"Failed to install libraries: {response.text}")
+
+    @staticmethod
+    def default_libraries() -> List[dict]:
+        return [
+            {"pypi": {"package": "azure-eventhub"}},
+            {
+                "maven": {
+                    "coordinates": "com.microsoft.azure:azure-eventhubs-spark_2.12:2.3.22"
+                }
+            },
+        ]
--- a/scripts/db/databricks/notebook_manager.py
+++ b/scripts/db/databricks/notebook_manager.py
 import base64
 import logging

+from scripts.db.databricks import DatabricksManager
 from scripts.utils.httpx_util import HTTPXRequestUtil


-class NotebookManager:
+class DatabricksNotebookManager(DatabricksManager):
    def __init__(self, databricks_host, access_token):
        """
        Initialize Databricks connection
@@ -13,15 +14,7 @@ class NotebookManager:
            databricks_host: Your Databricks workspace URL (e.g., 'https://your-workspace.cloud.databricks.com')
            access_token: Personal access token or service principal token
        """
-        self.host = (
-            databricks_host
-            if "https://" in databricks_host
-            else f"https://{databricks_host}"
-        )
-        self.headers = {
-            "Authorization": f"Bearer {access_token}",
-            "Content-Type": "application/json",
-        }
+        super().__init__(databricks_host, access_token)

    def create_notebook(
        self, notebook_path, notebook_code: str, language="PYTHON", overwrite=True

--- a/scripts/db/psql/__init__.py
+++ b/scripts/db/psql/__init__.py
-from ut_sql_utils.asyncio import SQLSessionManager
+from ut_sql_utils.asyncio import DeclarativeBaseClassFactory, SQLSessionManager
+from ut_sql_utils.asyncio.declarative_utils import DeclarativeUtilsFactory

 from scripts.db.redis.project_details import project_details_db

+sql_database = "unified_model"
+
+Base = DeclarativeBaseClassFactory(sql_database)
+
 session_manager = SQLSessionManager(project_details_db)
+
+get_db = session_manager.get_db_factory(database=sql_database)
+
+get_declarative_utils = DeclarativeUtilsFactory.get_declarative_utils_factory(
+    sql_database, session_manager
+)
--- a/scripts/services/__init__.py
+++ b/scripts/services/__init__.py
--- a/scripts/decorators/databricks_validator.py
+++ b/scripts/decorators/databricks_validator.py
+import logging
+from typing import Annotated, Optional
+
+from fastapi import Cookie, Depends, Header, HTTPException, Request
+from ut_dev_utils import ILensErrors
+
+from scripts.config import DatabricksConfig
+from scripts.db.redis.project_details import fetch_level_details
+from scripts.schemas import ModelCreatorSchema
+
+
+async def get_project_id_advanced(
+    request: Request,
+    # Cookie parameter
+    project_id_cookie: Annotated[Optional[str], Cookie(alias="projectId")] = None,
+    # Header parameter
+    project_id_header: Annotated[Optional[str], Header(alias="projectId")] = None,
+) -> str:
+    """Extract project_id with priority: Cookie > Header > Body > Query"""
+
+    project_id = (
+        project_id_cookie
+        or project_id_header
+        or request.query_params.get("project_id")
+        or request.query_params.get("projectId")
+    )
+
+    # Try to get from request body if not found
+    if not project_id and request.method in ["POST", "PUT", "PATCH"]:
+        try:
+            body = await request.json()
+            project_id = body.get("project_id")
+        except Exception as e:
+            logging.exception(f"Error getting project_id from request body: {e}")
+            pass
+
+    if not project_id:
+        raise HTTPException(
+            status_code=400,
+            detail={
+                "error": "project_id not found",
+                "sources_checked": [
+                    "cookies",
+                    "headers",
+                    "query_params",
+                    "request_body",
+                ],
+                "example": "Add project_id in cookie, header, query param, or request body",
+            },
+        )
+    return project_id
+
+
+async def get_databricks_config(project_id: str = Depends(get_project_id_advanced)):
+    """Get Databricks configuration using project_id"""
+    try:
+        return get_databricks_details_from_redis(project_id)
+    except (ValueError, ILensErrors) as e:
+        raise ILensErrors(message=f"Configuration Error: {str(e)}")
+
+
+def get_databricks_details_from_redis(project_id: str) -> ModelCreatorSchema:
+    project_details = fetch_level_details(project_id, raw=True)
+
+    if not project_details or "databricks_details" not in project_details:
+        raise ILensErrors(message=f"No Databricks config for project {project_id}")
+
+    db_config = project_details["databricks_details"]
+    required_keys = [
+        "databricks_host",
+        "databricks_access_token",
+        "databricks_storage_path",
+        "eventhub_connection_string",
+    ]
+
+    if missing := [k for k in required_keys if not db_config.get(k)]:
+        raise ILensErrors(
+            message=f'Missing: {", ".join(missing)} for project {project_id}'
+        )
+
+    return ModelCreatorSchema(
+        **{k: db_config[k] for k in required_keys},
+        databricks_port=db_config.get(
+            "databricks_port", DatabricksConfig.DATABRICKS_DEFAULT_PORT
+        ),
+    )
--- a/scripts/engines/agents/model_creator_agent.py
+++ b/scripts/engines/agents/model_creator_agent.py
@@ -13,7 +13,7 @@ class ModelCreatorAgent:
    async def model_creator_agent(message: ModelCreatorSchema):
        declarative_utils = await DeclarativeUtilsFactory.get_declarative_utils(
            raw_database="unified_model",
-            project_id=message.meta.project_id,
+            project_id=message.project_id,
            session_manager=session_manager,
            schema=message.schema,
        )

--- a/scripts/errors/__init__.py
+++ b/scripts/errors/__init__.py
+from ut_dev_utils.errors import ILensErrors
+
+
+class ExternalServiceError(ILensErrors):
+    """Raised when external service calls fail"""
+
+    def __init__(self, message: str, status_code: int = 200):
+        super().__init__(message=message, status_code=status_code)
+
+
+class ResourceNotFoundError(ILensErrors):
+    """Raised when a requested resource is not found"""
+
+    def __init__(self, message: str, status_code: int = 200):
+        super().__init__(message=message, status_code=status_code)
+
+
+class GenericErrors(ILensErrors):
+    """Raised when external service calls fail"""
+
+    def __init__(self, message: str, status_code: int = 200):
+        super().__init__(message=message, status_code=status_code)
--- a/scripts/schemas/__init__.py
+++ b/scripts/schemas/__init__.py
 from typing import Any, Dict, List, Optional, Union

-from pydantic import BaseModel, Field, model_validator
-from ut_security_util import MetaInfoSchema
+from pydantic import BaseModel, Field, computed_field, model_validator

 from scripts.config import DatabricksConfig


 class ModelCreatorSchema(BaseModel):
-    meta: MetaInfoSchema
    schema: Optional[str] = DatabricksConfig.DATABRICKS_PUBLIC_SCHEMA_NAME
-    databricks_host: str = DatabricksConfig.DATABRICKS_HOST
-    databricks_port: int = DatabricksConfig.DATABRICKS_PORT
-    databricks_access_token: str = DatabricksConfig.DATABRICKS_ACCESS_TOKEN
-    databricks_http_path: str = DatabricksConfig.DATABRICKS_HTTP_PATH
+    databricks_host: str
+    databricks_port: int
+    databricks_access_token: str
    databricks_user_email: str = "aniket.dhale@ilenscloud.onmicrosoft.com"
-    databricks_storage_path: str = DatabricksConfig.DATABRICKS_STORAGE_PATH
+    databricks_storage_path: str
+    databricks_http_path: Optional[str] = None
+    eventhub_connection_string: str
+
+    @computed_field
+    @property
+    def databricks_uri(self) -> Optional[str]:
+        """Automatically computed databricks URI that updates when databricks_http_path changes"""
+        if self.databricks_http_path:
+            return (
+                f"databricks://token:{self.databricks_access_token}@{self.databricks_host}:{self.databricks_port}"
+                f"?http_path={self.databricks_http_path}"
+            )
+        return None


 class ModelInstanceSchema(BaseModel):
@@ -25,15 +35,23 @@ class ModelInstanceSchema(BaseModel):
    sql_schema: Optional[str] = Field(
        default=DatabricksConfig.DATABRICKS_PUBLIC_SCHEMA_NAME, alias="schema"
    )
-    databricks_host: str = DatabricksConfig.DATABRICKS_HOST
-    databricks_port: int = DatabricksConfig.DATABRICKS_PORT
-    databricks_access_token: str = DatabricksConfig.DATABRICKS_ACCESS_TOKEN
-    databricks_http_path: str = DatabricksConfig.DATABRICKS_HTTP_PATH
-    databricks_user_email: str = "aniket.dhale@ilenscloud.onmicrosoft.com"
-    databricks_storage_path: str = DatabricksConfig.DATABRICKS_STORAGE_PATH
+    databricks_host: str
+    databricks_port: int
+    databricks_access_token: str
+    databricks_http_path: str
+    databricks_user_email: str
+    databricks_storage_path: str

    @model_validator(mode="before")
    def validate_data(cls, values: Dict[str, Any]) -> Dict[str, Any]:
        if "data" in values and isinstance(values["data"], dict):
            values["data"] = [values["data"]]
        return values
+
+    @model_validator(mode="before")
+    def prepare_databricks_uri(cls, values):
+        values["databricks_uri"] = (
+            f"databricks://token:{values['databricks_access_token']}@{values['databricks_host']}:{values['databricks_port']}"
+            f"?http_path={values['databricks_http_path']}"
+        )
+        return values
--- a/scripts/utils/databricks_utils.py
+++ b/scripts/utils/databricks_utils.py
@@ -4,8 +4,6 @@ from typing import Optional
 from sqlalchemy import create_engine, text
 from ut_dev_utils import get_db_name

-from scripts.config import DatabricksConfig
-

 class DatabricksSQLUtility:
    def __init__(self, catalog_name: str, project_id: str):
@@ -18,7 +16,7 @@ class DatabricksSQLUtility:
        self.catalog_name = get_db_name(project_id=project_id, database=catalog_name)
        self.engine = None

-    def connect_to_databricks(self):
+    def connect_to_databricks(self, databricks_uri: str):
        """
        Connect to Databricks using sqlalchemy-databricks
        """
@@ -26,7 +24,7 @@ class DatabricksSQLUtility:
            # Build connection string for sqlalchemy-databricks

            self.engine = create_engine(
-                DatabricksConfig.DATABRICKS_URI,
+                databricks_uri,
                pool_pre_ping=True,
                pool_recycle=3600,
                echo=False,
@@ -160,6 +158,25 @@ class DatabricksSQLUtility:
            )
            raise

+    def create_volume(self, volume_name: str, location_name: str = None) -> str:
+        """
+        Create a volume in Unity Catalog
+        volume_name: Name for the volume(<catalog>.<schema>.<external-volume-name>)
+        location_name: Name of the external location
+        """
+        if location_name:
+            ddl = f"CREATE EXTERNAL VOLUME IF NOT EXISTS {volume_name}"
+            ddl += f"\nLOCATION '{location_name}'"
+        else:
+            ddl = f"CREATE VOLUME IF NOT EXISTS `{volume_name}`"
+        try:
+            self.execute_sql_statement(ddl)
+            logger.info(f"Volume '{volume_name}' created successfully")
+            return volume_name
+        except Exception as e:
+            logger.error(f"Failed to create volume '{volume_name}': {str(e)}")
+            raise
+
    def execute_sql_statement(self, query: str):
        try:
            with self.engine.connect() as conn: