
import os
import pandas as pd


#--------------------------------------------------------------------

#filter by id, FSD2201
#columns are gonna be classes
#rows are gonna be years from different csvs

#use LST class stats
#then for hotspot count use HOTSPOT_CLASS_STATS_ANNUAL

#MEANLST MAXLST
#lst_stat = "MAXLST"
lst_stat = "MAXLST"
iata = "FSD"

HOTSPOT_CLASS_STATS_ANNUAL = "Y:\\UHI\\data\\Phase2HPCTesting\\MSO_FSD_2022_12_22\\"+iata+"\\output_folder\\HOTSPOT_CLASS_STATS_ANNUAL\\ByCSV"
INTENSITY_CITYCEN_STATS_ANNUAL = "Y:\\UHI\\data\\Phase2HPCTesting\\MSO_FSD_2022_12_22\\"+iata+"\\output_folder\\INTENSITY_CITYCEN_STATS_ANNUAL\\ByCSV"
INTENSITY_CITYCEN_STATS_TOTAL = "Y:\\UHI\\data\\Phase2HPCTesting\\MSO_FSD_2022_12_22\\"+iata+"\\output_folder\\INTENSITY_CITYCEN_STATS_TOTAL\\ByCSV"
LST_CITYCEN_STATS_ANNUAL = "Y:\\UHI\\data\\Phase2HPCTesting\\MSO_FSD_2022_12_22\\"+iata+"\\output_folder\\LST_CITYCEN_STATS_ANNUAL\\ByCSV"
LST_CLASS_STATS_ANNUAL = "Y:\\UHI\\data\\Phase2HPCTesting\\MSO_FSD_2022_12_22\\"+iata+"\\output_folder\\LST_CLASS_STATS_ANNUAL\\ByCSV"
LST_CLASS_STATS_TOTAL = "Y:\\UHI\\data\\Phase2HPCTesting\\MSO_FSD_2022_12_22\\"+iata+"\\output_folder\\LST_CLASS_STATS_TOTAL\\ByCSV"
NUGROWTH_CITYCEN_STATS_STACK = "Y:\\UHI\\data\\Phase2HPCTesting\\MSO_FSD_2022_12_22\\"+iata+"\\output_folder\\NUGROWTH_CITYCEN_STATS_STACK\\ByCSV"
PERSISTENT_CITYCEN_STATS_STACK = "Y:\\UHI\\data\\Phase2HPCTesting\\MSO_FSD_2022_12_22\\"+iata+"\\output_folder\\PERSISTENT_CITYCEN_STATS_STACK\\ByCSV"



output_csv_path = os.path.join("D:\\RezaTemp\\",iata+"_"+lst_stat+".csv")

#get file list for LST_CLASS_STATS_ANNUAL
#sort
#parse year from the filename
#then load the df from the file into a dict by year
#subset the dicts_by_year by the correct id, eg FSD2201, MSO2201?

#from the lst df
#make new columns with class and stat value concatenated
#make new DF with new columns
#go through each year df to make the row for the new column then insert it

#write out the dataframe

filenames = os.listdir(LST_CLASS_STATS_ANNUAL)
filenames = sorted(filenames)
meanlst_filenames = []
for filename in filenames:
    if lst_stat in filename:
        meanlst_filenames.append(filename)
lst_class_stats_annual_df_by_year = {}
for filename in meanlst_filenames:
    csv_path = os.path.join(LST_CLASS_STATS_ANNUAL, filename)
    df = pd.read_csv(csv_path)
    filename_pieces = filename.split("_")
    parsed_year_from_filename = filename_pieces[3]
    #filter to only have the ids ending in 01, eg FSD2201
    #https://www.statology.org/pandas-drop-rows-that-contain-string/
    df = df[df["id"].str.contains("01")]
    lst_class_stats_annual_df_by_year[parsed_year_from_filename] = df

filenames = os.listdir(HOTSPOT_CLASS_STATS_ANNUAL)
filenames = sorted(filenames)
meanlst_filenames = []
for filename in filenames:
    if lst_stat in filename:
        meanlst_filenames.append(filename)
hotspot_class_stats_annual_df_by_year = {}
for filename in meanlst_filenames:
    csv_path = os.path.join(HOTSPOT_CLASS_STATS_ANNUAL, filename)
    df = pd.read_csv(csv_path)
    filename_pieces = filename.split("_")
    parsed_year_from_filename = filename_pieces[3]
    #filter to only have the ids ending in 01, eg FSD2201
    #https://www.statology.org/pandas-drop-rows-that-contain-string/
    df = df[df["id"].str.contains("01")]
    hotspot_class_stats_annual_df_by_year[parsed_year_from_filename] = df

filenames = os.listdir(INTENSITY_CITYCEN_STATS_ANNUAL)
filenames = sorted(filenames)
meanlst_filenames = []
for filename in filenames:
    if lst_stat in filename:
        meanlst_filenames.append(filename)
intensity_citycen_stats_annual_df_by_year = {}
for filename in meanlst_filenames:
    csv_path = os.path.join(INTENSITY_CITYCEN_STATS_ANNUAL, filename)
    df = pd.read_csv(csv_path)
    filename_pieces = filename.split("_")
    parsed_year_from_filename = filename_pieces[3]
    #filter to only have the ids ending in 01, eg FSD2201
    #https://www.statology.org/pandas-drop-rows-that-contain-string/
    df = df[df["id"].str.contains("01")]
    intensity_citycen_stats_annual_df_by_year[parsed_year_from_filename] = df

#--------------------------------------------------------------------

p_value_df = pd.read_csv(os.path.join(INTENSITY_CITYCEN_STATS_TOTAL, "pValue_ZMEAN.csv"))
r_squared_df = pd.read_csv(os.path.join(INTENSITY_CITYCEN_STATS_TOTAL, "r-squared_ZMEAN.csv"))
slope_df = pd.read_csv(os.path.join(INTENSITY_CITYCEN_STATS_TOTAL, "Slope_ZMEAN.csv"))

#--------------------------------------------------------------------


a_hotspot_class_stats_annual_df_by_year_df = list(hotspot_class_stats_annual_df_by_year.values())[0]
file_count_check_1 = len(a_hotspot_class_stats_annual_df_by_year_df)

a_lst_class_stats_annual_df_by_year_df = list(lst_class_stats_annual_df_by_year.values())[0]
file_count_check_2 = len(a_lst_class_stats_annual_df_by_year_df)
#id class min max mean median sd sum count nulst_mean

assert file_count_check_1 == file_count_check_2, "LST_CLASS_STATS folder num files does not match HOTSPOT_CLASS_STATS num files"



#so the new column names are gonna be
#city_center,year, class_mean,class_sd,class_min,class_max,class_num_hotspots
output_columns = []
output_columns.append("city_center")
output_columns.append("year")


unique_hotspot_classes = a_hotspot_class_stats_annual_df_by_year_df["class"].unique()
unique_classes = a_lst_class_stats_annual_df_by_year_df["class"].unique()
combine_columns = ["min", "max", "mean", "median", "sd"]
for a_class in unique_classes:
    new_column_name = "class_" + str(a_class)+"_hotspot_count"
    output_columns.append(new_column_name)
    for a_column in combine_columns:
        new_column_name = "class_"+str(a_class)+"_"+str(a_column)
        output_columns.append(new_column_name)






output_columns.append("intensity_min")
output_columns.append("intensity_max")
output_columns.append("intensity_mean")
output_columns.append("intensity_sd")



#output_columns.append("hotspot_count")

output_columns.append("p_value Intensity Trends [1985-2020]")
output_columns.append("r_squared Intensity Trends [1985-2020]")
output_columns.append("slope Intensity Trends [1985-2020]")






output_df = pd.DataFrame(columns=output_columns)

for year_key in lst_class_stats_annual_df_by_year.keys():

    df_year = year_key
    new_row = {}

    a_lst_class_stats_annual_df = lst_class_stats_annual_df_by_year[year_key]
    a_hotspot_class_stats_annual_df = hotspot_class_stats_annual_df_by_year[year_key]
    a_intensity_citycen_stats_annual_df = intensity_citycen_stats_annual_df_by_year[year_key]

    for row_index in range(len(a_lst_class_stats_annual_df)):

        #--------------------------------------------------------------------

        lst_class_stats_annual_row = a_lst_class_stats_annual_df.iloc[[row_index]]
        a_id = lst_class_stats_annual_row["id"].values[0]
        a_class = lst_class_stats_annual_row["class"].values[0]
        a_year = df_year
        a_min = lst_class_stats_annual_row["min"].values[0]
        a_max = lst_class_stats_annual_row["max"].values[0]
        a_mean = lst_class_stats_annual_row["mean"].values[0]
        a_median = lst_class_stats_annual_row["median"].values[0]
        a_sd = lst_class_stats_annual_row["sd"].values[0]

        p_value_df = p_value_df.loc[p_value_df['id'] == a_id]
        p_value_row = p_value_df.iloc[[0]]
        p_value_actually = p_value_row["mean"].values[0]

        r_squared_df = r_squared_df.loc[r_squared_df['id'] == a_id]
        r_squared_row = r_squared_df.iloc[[0]]
        r_squared_actually = r_squared_row["mean"].values[0]

        slope_df = slope_df.loc[slope_df['id'] == a_id]
        slope_row = slope_df.iloc[[0]]
        slope_actually = slope_row["mean"].values[0]


        new_row["p_value Intensity Trends [1985-2020]"] = p_value_actually
        new_row["r_squared Intensity Trends [1985-2020]"] = r_squared_actually
        new_row["slope Intensity Trends [1985-2020]"] = slope_actually




        intensity_citycen_stats_annual_df_id_df = a_intensity_citycen_stats_annual_df[a_intensity_citycen_stats_annual_df['id'] == a_id]
        intensity_min = intensity_citycen_stats_annual_df_id_df["min"].values[0]
        intensity_max = intensity_citycen_stats_annual_df_id_df["max"].values[0]
        intensity_mean = intensity_citycen_stats_annual_df_id_df["mean"].values[0]
        intensity_sd = intensity_citycen_stats_annual_df_id_df["sd"].values[0]

        #--------------------------------------------------------------------




        #--------------------------------------------------------------------

        new_row["class_"+str(a_class)+"_"+str("min")] = a_min
        new_row["class_" + str(a_class) + "_" + str("max")] = a_max
        new_row["class_" + str(a_class) + "_" + str("mean")] = a_mean
        new_row["class_" + str(a_class) + "_" + str("median")] = a_median
        new_row["class_" + str(a_class) + "_" + str("sd")] = a_sd


        matching_hotspot_id_df = a_hotspot_class_stats_annual_df[a_hotspot_class_stats_annual_df['id'] == a_id]

        for hotspot_row_index in range(len(matching_hotspot_id_df)):
            matching_hotspot_id_df_row = matching_hotspot_id_df.iloc[[hotspot_row_index]]

            a_hotspot_count = matching_hotspot_id_df_row["count"].values[0]
            a_hotspot_class = matching_hotspot_id_df_row["class"].values[0]

            new_row["class_"+str(a_hotspot_class)+"_hotspot_count"] = a_hotspot_count
            #dddd
            ABC = None


        new_row["intensity_min"] = intensity_min
        new_row["intensity_max"] = intensity_max
        new_row["intensity_mean"] = intensity_mean
        new_row["intensity_sd"] = intensity_sd

        #--------------------------------------------------------------------

        #yes I know it overwrites these each time
        new_row["city_center"] = a_id
        new_row["year"] = a_year

        #--------------------------------------------------------------------


    output_df = output_df.append(new_row, ignore_index=True)



#reorder the dataframe columns like this
#https://stackoverflow.com/questions/13148429/how-to-change-the-order-of-dataframe-columns
#df = df[cols]
print(output_df)
output_df.to_csv(output_csv_path,index=False)


ABC = None










ABC = None






















