import requests
import json
import time
import os
from joblib import Parallel, delayed

import OpenSSL
from socket import timeout, error as SocketError
import urllib3

import config

#-------------
#https://earthexplorer.usgs.gov/inventory/documentation/json-api
#-------------

base_url = 'https://earthexplorer.usgs.gov/inventory/json/v/1.4.0/'

username = config.USERNAME
password = config.PASSWORD

"""
https://earthexplorer.usgs.gov/inventory/json/v/1.4.0/datasets
{
    "bounds": {
        "north": 51.5332,
        "east": -64.4405,
        "south": 23.0284,
        "west": -128.5965
    },
    "datasetName": "ARD_TILE",
    "datasetFullName": "U.S. Landsat 4-8 Analysis Ready Data (ARD) Level-2 Tiles (Albers projection)",
    "datasetShortName": "None",
    "idnEntryId": "",
    "endDate": "2020-06-19",
    "startDate": "1982-11-11",
    "lastModifiedDate": "2020-07-08",
    "supportDownload": true,
    "supportBulkDownload": true,
    "bulkDownloadOrderLimit": 500,
    "supportCloudCover": true,
    "supportDeletionSearch": "None",
    "supportOrder": true,
    "orderLimit": 100,
    "totalScenes": 1587571
},
"""


#a_dataset_name = "ARD_TILE"

#a_start_year_inclusive = 1988
#a_end_year_inclusive = 1990
#a_tile_hv_names_to_match = ["H23V13", "H23V14", "H24V13", "H24V14"]
#a_workpath = "C:\\Users\\rhussain\\Desktop\\LSRD\\data\\atlanta"




def download_requested_data(dataset_name,start_year_inclusive,end_year_inclusive,tile_hv_names_to_match,workpath):
    #In the future 1.5 version of the EE api they are gonna remove
    #you having to specify the json in a weird jsonRequest post parameter
    #you will just be able to post the json

    #Step 1 login
    #curl -d "jsonRequest={\"username\":\"rhussain\", \"password\":\"passwordhere\",\"catalogId\":\"EE\",\"authType\":\"EROS\"}" -H "Content-Type: application/x-www-form-urlencoded" -X POST https://earthexplorer.usgs.gov/inventory/json/v/1.4.0/login
    post_data = {"jsonRequest":json.dumps({'username':username,'password':password,'catalogId':"EE"})}
    r = requests.post(base_url+"login", data=post_data,headers={'Content-type': 'application/x-www-form-urlencoded'})
    apikey = r.json()["data"]

    #step 2 get the dataset
    #like this https://earthexplorer.usgs.gov/inventory/json/v/1.4.0/datasets?jsonRequest={"apiKey":"24844a229c24414a9cc205865f3b5d5f","publicOnly":false}
    r2 = requests.get(base_url + "datasets", params={"jsonRequest": json.dumps({"apiKey": apikey,"publicOnly":False})})
    #f = r2.json()
    #print(r2.json())


    # step 3 get the scenelist for the dataset
    years_we_want = []
    for i in range(end_year_inclusive+1-start_year_inclusive):
        years_we_want.append(str(start_year_inclusive+i))


    def does_hv_tile_match_to_name(tile_hv_name,name):
        #the m2m names are inconsistent
        #eg the download url has LT05_CU_023013_19850114_C01_V01
        #but the "entity_ids" can have LT04_CU_006013_19871025_20190601_C01_V01_ST
        #so far from what I have observed the hv code is in the 2nd position
        #so for LT04_CU_006013_19871025_20190601_C01_V01_ST
        #006013 is the tile hv(first three num h, second three num v)
        tile_hv = name.split("_")[2]
        tile_h = int(tile_hv[0:3])
        tile_v = int(tile_hv[3:])
        matches = False
        # "H23V13"
        h_check = int(tile_hv_name[1:3])
        v_check = int(tile_hv_name[4:])
        if tile_h == h_check and v_check == tile_v:
            matches = True
        return matches


    def get_download_urls_between_calendar_dates(start_yyyymmdd,end_yyyymmdd):

        urls_we_are_interested_in = []
        #for a_tile_name in tile_hv_names_to_match:
        #    urls_we_are_interested_in[a_tile_name] = []

        successful = False
        while not successful:
            try:
                # max_results = 10
                max_results = 20000
                search_parameters = {"apiKey": apikey, "datasetName": dataset_name,
                                    "temporalFilter": {"startDate": start_yyyymmdd, "endDate": end_yyyymmdd},
                                    "maxResults": max_results}
                r3 = requests.get(base_url + "search", params={"jsonRequest": json.dumps(search_parameters)})
                successful = True
            except requests.exceptions.ConnectionError as ce:
                print(ce)
                print("connection error to m2m server, retrying in 5 minutes " + str(year))
                time.sleep(60 * 5)

        tile_entity_ids = []
        search_results_json = r3.json()["data"]["results"]

        for entity in search_results_json:
            # LT04_CU_006013_19871025_20190601_C01_V01_ST
            entity_id = entity["entityId"]
            # 006013 is the tile hv(first three num h, second three num v)
            #tile_hv = entity_id.split("_")[2]
            #tile_h = int(tile_hv[0:3])
            #tile_v = int(tile_hv[3:])
            #matches_what_we_want = False
            for a_tile_name in tile_hv_names_to_match:

                if does_hv_tile_match_to_name(a_tile_name,entity_id):
                    tile_entity_ids.append(entity["entityId"])

                '''
                # "H23V13"
                h_check = int(a_tile_name[1:3])
                v_check = int(a_tile_name[4:])
                if tile_h == h_check and v_check == tile_v:
                    tile_entity_ids.append(entity["entityId"])
                '''

        # example for how to get download options
        # LSR_LANDSAT_8_C1
        # ["'LC80090102013101LGN02'"]
        # notice they put single quotes and double quotes, it wont work without both :\
        # ARD_TILE
        # ["'LT04_CU_006014_19840315_C01_V01'"]
        # step4 get the download options
        # https://earthexplorer.usgs.gov/inventory/json/v/1.4.0/downloadoptions?
        # jsonRequest={"apiKey":"ef2549e0548345e7bb6d0e8734fbea15","datasetName":"ARD_TILE",
        # "entityIds":"'LT04_CU_006015_19840315_C01_V01'","machineOnly":true}
        # entity_ids = tile_entity_ids[0:2]#max len of 50k items

        entity_ids = tile_entity_ids
        download_options_parameters = {"apiKey": apikey, "datasetName": dataset_name, "entityIds": entity_ids}
        r4 = requests.get(base_url + "downloadoptions", params={"jsonRequest": json.dumps(download_options_parameters)})
        r4_json = r4.json()
        tiles = r4_json["data"]

        for a_tile in tiles:
            for a_choice in a_tile["downloadOptions"]:
                if a_choice["downloadCode"] == "ST":  # ST for surface temperature
                    url = a_choice["url"]
                    urls_we_are_interested_in.append(url)

        return urls_we_are_interested_in


    download_urls_we_want = []
    for year in years_we_want:
        print("fetching download urls for "+str(year))
        """
        it needs to be broken up so it makes a request per year
        otherwise it timesout serverside bc there are too many results for it I assume
        bc the endpoint times out server side if you request set max_results at 10k when u request all years
        even though the max allowed is 50k according to the api doc :\, 
        service endpoint ppl didnt test pulling 
        a full dataset from the url probably
        so do it like this, take a list of years, then add /01/01 and /12/31 to get the start and end
        so for one year it seems to top out at 11k, so using 20k max should be ok
        to summarize the problem: Service times out when requesting all years
        to summarize the solution: the workaround is to request one year at a time, with max limit of 20k
        """
        #we have to break the year in two, otherwise M2M cant handle it
        #this isn't mentioned in their documentation
        yyyymmdd_start_A = str(year) + "/01/01"
        yyyymmdd_end_A = str(year) + "/06/30"
        urls_for_interval1 = get_download_urls_between_calendar_dates(yyyymmdd_start_A, yyyymmdd_end_A)

        yyyymmdd_start_B = str(year) + "/06/30"
        yyyymmdd_end_B = str(year) + "/12/31"
        urls_for_interval2 = get_download_urls_between_calendar_dates(yyyymmdd_start_B, yyyymmdd_end_B)

        download_urls_we_want.extend(urls_for_interval1)
        download_urls_we_want.extend(urls_for_interval2)


    #now we setup the folders, and download to them
    #in the base folder, it has one folder for each hv tilename
    for a_tile_name in tile_hv_names_to_match:
        hv_tile_name_path = os.path.join(workpath,a_tile_name)
        print(hv_tile_name_path)
        if not os.path.exists(hv_tile_name_path):
            os.mkdir(hv_tile_name_path)



    download_url_filepath_tuples = []
    for a_tile_name in tile_hv_names_to_match:
        hv_tile_name_path = os.path.join(workpath,a_tile_name)
        for source_url in download_urls_we_want:
            if does_hv_tile_match_to_name(a_tile_name, source_url):
                #eg 'https://earthexplorer.usgs.gov/download/5e83a38babc33e90/LT05_CU_023013_19850114_C01_V01/M2M/'
                filename_to_use = source_url.split("/")[-3] + ".tar"
                file_path_to_use = os.path.join(hv_tile_name_path,filename_to_use)
                download_url_filepath_tuples.append((source_url,file_path_to_use))

    def download_url_filepath_tuple(tuple):
        successful = False
        while not successful:
            try:
                source_url,file_path_to_use = tuple
                # now download to the right folder
                # https://stackoverflow.com/questions/24346872/python-equivalent-of-a-given-wget-command
                print("downloading "+file_path_to_use)
                r = requests.get(source_url)
                f = open(file_path_to_use,'wb')
                f.write(r.content)
                f.close()
                successful = True
            except OpenSSL.SSL.SysCallError as sce:
                print("network error, retrying")
                time.sleep(10)
                print(str(sce))
            except SocketError as se:
                print("network error, retrying")
                time.sleep(10)
                print(str(se))
            except urllib3.exceptions.ProtocolError as pe:
                print("network error, retrying")
                time.sleep(10)
                print(str(pe))
            except requests.exceptions.ChunkedEncodingError as cee:
                print("network error, retrying")
                time.sleep(10)
                print(str(cee))




    #single threaded
    for some_tuple in download_url_filepath_tuples:
        download_url_filepath_tuple(some_tuple)
    #Parallel(n_jobs=8)(delayed(download_url_filepath_tuple)(download_url_filepath_tuples[i]) for i in range(len(download_url_filepath_tuples)))


    #fname = 'guppy-0.1.10.tar.gz'
    #url = 'https://pypi.python.org/packages/source/g/guppy/' + fname
    #r = requests.get(url)
    #open(fname , 'wb').write(r.content)
    ABC = None


#a_dataset_name = "ARD_TILE"
#a_start_year_inclusive = 1988
#a_end_year_inclusive = 1990
#a_tile_hv_names_to_match = ["H23V13", "H23V14", "H24V13", "H24V14"]
#a_workpath = "C:\\Users\\rhussain\\Desktop\\LSRD\\data\\atlanta"
#def download_requested_data(dataset_name,start_year_inclusive,end_year_inclusive,tile_hv_names_to_match,workpath):

a_dataset_name = "ARD_TILE"
a_start_year_inclusive = 1988
a_end_year_inclusive = 1989
#a_tile_hv_names_to_match = ["H23V13", "H23V14", "H24V13", "H24V14"]
a_tile_hv_names_to_match = ["H23V13"]
a_workpath = "/caldera/projects/usgs/eros/urban_heat_islands/atlanta"

download_requested_data(a_dataset_name,a_start_year_inclusive,a_end_year_inclusive,a_tile_hv_names_to_match,a_workpath)
