import pandas as pd
import pandas.errors
import sys
import os
import warnings

from core.yaml_reader import YamlConfigLoader
from utility.logging import set_logger
from utility.slack import log_collect_and_notify

from sqlalchemy import exc as sa_exc
from sqlalchemy import create_engine
from sqlalchemy import Table, Column, Integer, String, MetaData
from sqlalchemy.dialects import postgresql  # or whatever dialect you're using


# Python Script File Name
current_python_file_name = os.path.basename(sys.argv[0])

# Set logger
logger = set_logger(current_python_file_name)

# Create Object from yaml
yaml_loader = YamlConfigLoader('prod_replication_secrets.yml')

# Set Params to read resources from YAML file
params = yaml_loader.get_resources()


# Safe Query
def safe_query_byz_schema(sql_query, schema):
    try:
        # Run CTE SQL query and fetch data
        df = pd.read_sql(sql_query, schema)
        if df.empty is True:
            raise pandas.errors.EmptyDataError("DataFrame is Empty, Check Error logs for Details")
    except BaseException as e:
        log_collect_and_notify(str(e), logger, current_python_file_name)
        raise
    except pandas.errors.EmptyDataError as e:
        log_collect_and_notify(str(e), logger, current_python_file_name)
        raise
    return df


# CONNECTIONS
byzfunder_mca1 = create_engine(f"mysql+mysqlconnector:"
                               f"//{params['byzfunder_mca1']['username']}:"
                               f"{params['byzfunder_mca1']['password']}@"
                               f"{params['byzfunder_mca1']['host']}:"
                               f"{params['byzfunder_mca1']['port']}"
                               f"/{params['byzfunder_mca1']['schema']}",
    pool_pre_ping=True,
    pool_recycle=3600
)

core_data = create_engine(f"mysql+mysqlconnector:"
                          f"//{params['core_data']['username']}:"
                          f"{params['core_data']['password']}@"
                          f"{params['core_data']['host']}:"
                          f"{params['core_data']['port']}"
                          f"/{params['core_data']['schema']}",
    pool_pre_ping=True,
    pool_recycle=3600
)

# SQL Queries
leads_query = """select
                    id AS leadid,
                    userid as customerId,
                    date_format(from_unixtime(submission_date), '%Y-%m-%d') AS dateSubmitted,
                    entity_start_date AS entityStartDate,
                    entity_name AS entityName,
                    entity_dba AS entityDba,
                    entity_federal_id AS fein,
                    prev_id AS prev_id,
		    deal_type as deal_type,
                    entity_type AS entityType,
                    entity_address AS entityAddress,
                    entity_city AS entityCity,
                    entity_zip AS entityZip,
                    entity_telephone AS entityTelephone,
                    entity_state AS entityState,
                    entity_county AS entityCounty,
                    entity_billing_address_1 AS entityBillingAddress1,
                    entity_billing_address_2 AS entityBillingAddress2,
                    iso_id AS isoCode,
                    inactive_iso_submission AS inactiveIsoSubmission,
                    entity_email AS entityEmail,
                    date_format(from_unixtime(timestamp), '%Y-%m-%d') AS timestamp
                   from leads  ;"""

# Define the date range that pandas can handle
min_date = pd.Timestamp.min
max_date = pd.Timestamp.max


def safe_to_datetime(date_str):
    try:
        date = pd.to_datetime(date_str, errors='raise')
        if date < min_date or date > max_date:
            return pd.NaT
        return date
    except:
        return pd.NaT


def main():
    # Reading wo_derived sql
    df = safe_query_byz_schema(leads_query, byzfunder_mca1)

    # Apply the safe_to_datetime function to the 'entityStartDate' column
    df['entityStartDate'] = df['entityStartDate'].apply(safe_to_datetime)

    # Now df['entityStartDate'] should be in a valid datetime format or NaT
    print(df['entityStartDate'])

    # Load into core_data
    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=sa_exc.SAWarning)
            
            # Define the table schema
            metadata = MetaData()
            leads_table = Table(
                'leads', 
                metadata,
                Column('leadid', Integer, primary_key=True),  # primary key column
                Column('customerId', String),
                Column('dateSubmitted', DateTime),
                Column('entityStartDate', DateTime),
                Column('entityName', String),
                Column('entityDba', String),
                Column('fein', String),
                Column('prev_id', String),
                Column('deal_type', String),
                Column('entityType', String),
                Column('entityAddress', String),
                Column('entityCity', String),
                Column('entityZip', String),
                Column('entityTelephone', String),
                Column('entityState', String),
                Column('entityCounty', String),
                Column('entityBillingAddress1', String),
                Column('entityBillingAddress2', String),
                Column('isoCode', String),
                Column('inactiveIsoSubmission', String),
                Column('entityEmail', String),
                Column('timestamp', DateTime)
                # define other columns as needed
            )
            
            # Create the table
            metadata.create_all(core_data)
            
            # Insert the data from DataFrame
            df.to_sql(con=core_data, name='leads', if_exists='replace', 
                    index=False, dtype={column_name: type_info})
    except BaseException as e:
        log_collect_and_notify(str(e), logger, current_python_file_name)
        raise


if __name__ == "__main__":
    main()
