""" Map from the 30m SSURGO/STATSGO mukeyint mosaic, map a single property (properties), write .img. gNC20.t561.results_ptxt.mu_co_soc.results_spatial.map.Basic.01.py 1/21/2021 N. Bliss Delete extraneous comments (lots). Replace binary functions with ascii functions. Backward compatibility not maintained. Go to an old source (e.g., .t521) for prior methods. ssurgo.t521.SaS.mapunit_to_map.OCpct.NatWetland.02.py 8/7/2019 N. Bliss ssurgo.t521.SaS.mapunit_to_map.OCpct.NatWetland.01.py 8/7/2019 N. Bliss add in creating statistics and pyramids Run for 010_011 (and more) ssurgo.t521.SaS.mapunit_to_map.OCpct.NatWetland.01.py 8/6/2019 N. Bliss for National Wetlands (Camille Stagg, Lisamarie Wyndham-Myers, Birget Uhran) read directly from the mapunit file, select one attribute No data conversion beforehand (e.g., like in the single_dictionaries form) This version for the 1 cm increment source (e.g., 000_001) If Lisamarie requests a depth zone (e.g., 0-30cm), a different run will be needed. prior history deleted... (see .t521 or before) """ ##import sys,os,traceback,time,glob,shutil,zipfile, struct import sys,os,traceback,time, struct import arcpy # for statistics and pyramids import numpy as np from osgeo import gdal # Geospatial Data Abstraction Library (GDAL) def float_equal( num1, num2, epsilon=0.00001): # test if two floating point numbers are equal within a finite tolerance diff = num1 - num2 if abs( diff ) < epsilon: return True else: return False def value_f_mukeyint(mukeyint): # assumes value_d_mukeyint has been defined before this function is called 'Assume dictionary has been conditioned so all keys are in the dictionary.' return value_d_mukeyint[mukeyint] def value_f_mukeyint_robust(mukeyint): # assumes value_d_mukeyint has been defined before this function is called global mukeyint_spatial_missing_attribute_set # defined before this function is called 'Robust version, tolerates if not the case that the dictionary has been conditioned so all keys are in the dictionary.' if mukeyint in value_d_mukeyint: return value_d_mukeyint[mukeyint] else: mukeyint_spatial_missing_attribute_set.add( mukeyint ) return out_NoData_value # should be of the correct output data type ##### 2/7/2015 def bool_is_NoData_from_value_data_code_f( value, data_code ): # 12/ 8/2016 change calls from bool_is_NoData_f_value_data_code to bool_is_NoData_from_value_data_code_f, avoid using struct # 3/ 6/2014 is_NoData_value_f_data_code_value change the name to reflect order of the arguments and stress boolean result name_this_function = 'bool_is_NoData_from_value_data_code_f' # 8/2/2010 secure test for equality to a NoData code bool_return_value = False if 'string' in data_code: if value is None or len(value) == 0: bool_return_value = True elif 'float' in data_code: # need an approximate test for floating point because if written to and read from a file, a system-specific approximation is used. if value < ( formats_d_data_code[data_code][4] / 10. ): # e.g., -3.4e+38 even approximately is less than -3.4e+37 ## if struct.pack(' 200: print ' step %10.3f minutes, %10.3f hours' % ( step/60, step/3600 ) print ' elapsed %10.3f minutes, %10.3f hours' % ( elapsed/60, elapsed/3600 ) previoustime = current return previoustime def timer_final_f(message, starttime, previoustime): # updated 3/5/2010 current = time.clock() elapsed = current - starttime print 'Timer: %s: elapsed %15.5f' % ( message, elapsed) print ' elapsed %10.3f minutes, %10.3f hours' % ( elapsed/60, elapsed/3600 ) previoustime = current return previoustime def timestamp_f(): return '%4d%02d%02d%02d%02d%02d' % time.localtime()[:6] # string with year, month, day, hour, minute second: '20090204124750' def print_structure_tt_2_f( structure_tt, ): # no return value, prints output # 12/ 2/2016 change name from print_structure_binary_tt_2_f to print_structure_tt_2_f (works for binary or ascii) # 6/ 9/2016 change name from print_f_structure_binary_tt to print_structure_binary_tt_2_f (version 2, suffix _f indicates function) # 2/28/2014 print 'NoData' for any data type # 2/11/2014 print_f_structure_binary_tt print 'NoData' if min and max have not been reset... name_this_function = 'print_structure_tt_2_f' # 1/ 9/2013 N. Bliss allow for long variable names # 1/ 4/2013 N. Bliss neater printing with minimum and maximum based on format codes (may still be odd if all NoData) # 7/30/2010 N. Bliss # structure_tt a tuple of tuples representing the information in a header file # print option: choice of 'FULL', 'RAW', 'NEAT' where 'FULL' prints both the 'RAW' and 'NEAT' versions element_names = ['number_of_records', 'number_of_variables','name_t', 'key_type_t', 'data_code_t', 'minimum_t', 'maximum_t', 'count_NoData_t'] print 'printing an instance of structure_tt' print '%-28s %10d' % ( element_names[0], structure_tt[0] ) # number_of_records print '%-28s %10d' % ( element_names[1], structure_tt[1] ) # number_of_variables print name_t = structure_tt[2] len_name_max = 0 for name in name_t: len_name_max = max( len_name_max, len( name ) ) len_name_format = len_name_max + 2 # allow a bit of space len_tuple = len(structure_tt[2]) # all tuples should be the same length format_string = '%-' + '%d' % len_name_format + 's %-11s %-13s %15s %15s %15s' # HARDCODE for 6 elements print format_string % tuple( [ element_names[index] for index in range(2,8)] ) print '-' * ( 74 + len_name_format ) # HARDCODE with sum of format_string formats and spaces for index2 in range(len_tuple): name = structure_tt[2][index2] key_type = structure_tt[3][index2] data_code = structure_tt[4][index2] # 1/4/2013 update for neater printing count_NoData = int( structure_tt[7][index2] ) if 'string' in data_code or 'text' in data_code or 'int' in data_code: minimum = int( structure_tt[5][index2] ) maximum = int( structure_tt[6][index2] ) format_string = '%-' + '%d' % len_name_format + 's %-11s %-13s %15d %15d %15d' # HARDCODE for 6 elements elif 'float' in data_code: minimum = float( structure_tt[5][index2] ) maximum = float( structure_tt[6][index2] ) format_string = '%-' + '%d' % len_name_format + 's %-11s %-13s %15.5f %15.5f %15d' # HARDCODE for 6 elements else: print 'ERROR: unrecognized data_code in print_structure_tt_2_f, stopping...' print 'ERROR: data_code is %s' % data_code raise Exception if maximum == formats_d_data_code[data_code][2] and minimum == formats_d_data_code[data_code][3]: # The "minimum" was initialized to maximum for the data type and "maximum" was initialized to the minimum for the data type # if they are both unchanged, then no data were encountered format_string = '%-' + '%d' % len_name_format + 's %-11s %-13s %15s %15s %15d' # HARDCODE for 6 elements minimum = 'NoData' maximum = 'NoData' print format_string % tuple( [name, key_type, data_code, minimum, maximum, count_NoData] ) # no return from this function (it prints to the standard output (e.g., Python Shell) ##### # write a record to an ascii file def record_to_ascii_f( unit_ascii_out_open, data_code_list, output_t ): # 11/15/2016 # unit_ascii_out_open is an open file ("w") for writing ascii data # data_code_list: a list of the data codes (e.g., "string", "uint8" etc., with one for each value in the output tuple # output_t: a tuple with the output values to be written to the ascii file value_list = [] for index in range( len( data_code_list )): data_code = data_code_list[index] value = output_t[index] if value == '': ### USING EMPTY STRING as the NoData value, #### this may change ### string_out = value elif bool_is_NoData_from_value_data_code_f( value, data_code ): string_out = '' ### USING EMPTY STRING as the NoData value, #### this may change ### elif data_code == 'string': string_out = value elif data_code == 'uint8': string_out = str( value ) elif data_code == 'uint16': string_out = str( value ) elif data_code == 'uint32': string_out = str( value ) elif data_code == 'uint64': string_out = str( value ) elif data_code == 'float32_0': string_out = '%.0f' % value # could use formats_d_data_code elif data_code == 'float32_1': string_out = '%.1f' % value elif data_code == 'float32_2': string_out = '%.2f' % value elif data_code == 'float32_3': string_out = '%.3f' % value elif data_code == 'float32_4': string_out = '%.4f' % value elif data_code == 'float32_5': string_out = '%.5f' % value else: # for other codes, see "formats_d_data_code" above print 'ERROR: unrecognized data_code in record_to_ascii_f, stopping...' print 'ERROR: data_code', data_code raise Exception # write the ascii string value_list.append( string_out ) write_string = '|'.join( value_list ) unit_ascii_out_open.write( write_string + '\n' ) # no return value (i.e., return value is None), purpose is side-effect of writing a "record" to the ascii file at unit_ascii_out_open ##### # write a record to an ascii file def record_to_ascii_2_f( unit_ascii_out_open, data_code_list, output_t, delimiter='|', header_t=tuple() ): # 1/21/2021 # 1/21/2021 New version to allow choice of delimiter and optionally writing a header record # If a header is called for, it is written automatically on the first call to the function, no separate statement is needed. # default delimiter is the pipe ('|') but can be overwritten by specifying a different delimiter such as comma ',' or tab '\t' # The default header is an empty tuple (which means no header will be written), but a header can be specified with a tuple of strings of # the column names. If the tuple is not empty, then the header will be written. # unit_ascii_out_open is an open file ("w") for writing ascii data # data_code_list: a list of the data codes (e.g., "string", "uint8" etc., with one for each value in the output tuple # output_t: a tuple with the output values to be written to the ascii file # If the output file is empty (e.g., nothing yet written, file pointer at position zero) and header_t is not empty, then write a header point_ascii_out_open_tell = unit_ascii_out_open.tell() if point_ascii_out_open_tell == 0 and len( header_t ) != 0: write_string = delimiter.join( header_t ) # header as a string unit_ascii_out_open.write( write_string + '\n' ) # write the header, continue to the data record value_list = [] for index in range( len( data_code_list )): data_code = data_code_list[index] value = output_t[index] if value == '': ### USING EMPTY STRING as the NoData value, #### this may change ### string_out = value elif bool_is_NoData_from_value_data_code_f( value, data_code ): string_out = '' ### USING EMPTY STRING as the NoData value, #### this may change ### elif data_code == 'string': string_out = value elif data_code == 'uint8': string_out = str( value ) elif data_code == 'uint16': string_out = str( value ) elif data_code == 'uint32': string_out = str( value ) elif data_code == 'uint64': string_out = str( value ) elif data_code == 'float32_0': string_out = '%.0f' % value # could use formats_d_data_code elif data_code == 'float32_1': string_out = '%.1f' % value elif data_code == 'float32_2': string_out = '%.2f' % value elif data_code == 'float32_3': string_out = '%.3f' % value elif data_code == 'float32_4': string_out = '%.4f' % value elif data_code == 'float32_5': string_out = '%.5f' % value else: # for other codes, see "formats_d_data_code" above print 'ERROR: unrecognized data_code in record_to_ascii_f, stopping...' print 'ERROR: data_code', data_code raise Exception # write the ascii string value_list.append( string_out ) write_string = delimiter.join( value_list ) # 1/21/2021 make the delimiter an argument of the function unit_ascii_out_open.write( write_string + '\n' ) # no return value (i.e., return value is None), purpose is side-effect of writing a "record" to the ascii file at unit_ascii_out_open ##### # read a record from a ascii file def record_from_ascii_f(unit_ascii_in_open, point_seek, data_code_list): ## name_this_function = 'record_from_ascii_f' # 12/ 5/2016 fix to update point_seek, handle NoData (empty string) on input to the NoData value for analysis # 11/15/2016 adapt for reading ascii (rather than binary) # unit_ascii_in_open: an open file unit, e.g., opened with something like: unit_ascii_in_open = open( filename_ascii, 'r' ) # point_seek: the seek point in the file, initilize as zero, then use output from the previous call # data_code_list: a list such as data_code_list = ['uint32', 'string'] # NOTE: the data_code_list can also be passed in as a tuple (e.g., data_code_t) and the function will behave correctly. unit_ascii_in_open.seek( point_seek ) point_ascii_in_open_tell = unit_ascii_in_open.tell() if point_ascii_in_open_tell != point_seek: print 'ERROR: file position error, stopping...' print 'ERROR: unit_ascii_in_open', unit_ascii_in_open print 'ERROR: point_seek', point_seek print 'ERROR: point_ascii_in_open_tell', point_ascii_in_open_tell raise Exception line_in = unit_ascii_in_open.readline() # from point to and including the newline line_in_strip = line_in.strip() value_list = line_in_strip.split( '|' ) # HARDCODE the pipe delimiter for index in range(len(data_code_list)): value_in = value_list[index] data_code = data_code_list[index] if data_code == 'string': # value is already a string, no action needed, even if NoData pass elif 'int' in data_code: if value_in == '': value_list[index] = formats_d_data_code[data_code][4] # NoData value else: value_list[index] = int( value_list[index] ) # replace string with integer elif 'float' in data_code: if value_in == '': value_list[index] = formats_d_data_code[data_code][4] # NoData value else: value_list[index] = float( value_list[index] ) # replace string with float else: print 'ERROR: unexpected data code, stopping...' print 'ERROR: data_code', data_code raise Exception point_seek = point_seek + len( line_in ) + 1 # add 1 because next seek will be beyond current ending point value_t = tuple( value_list ) return ( point_seek, value_t ) ##### 12/ 5/2016 # read a record from a ascii file in csv (comma separated values) format def record_from_csv_f(unit_ascii_in_open, point_seek, data_code_list): ## name_this_function = 'record_from_csv_f' # 1/20/2021 read comma delimited (other option, add delimiter to the ascii function) # 12/ 5/2016 fix to update point_seek, handle NoData (empty string) on input to the NoData value for analysis # 11/15/2016 adapt for reading ascii (rather than binary) # unit_ascii_in_open: an open file unit, e.g., opened with something like: unit_ascii_in_open = open( filename_ascii, 'r' ) # point_seek: the seek point in the file, initilize as zero, then use output from the previous call # data_code_list: a list such as data_code_list = ['uint32', 'string'] # NOTE: the data_code_list can also be passed in as a tuple (e.g., data_code_t) and the function will behave correctly. unit_ascii_in_open.seek( point_seek ) point_ascii_in_open_tell = unit_ascii_in_open.tell() if point_ascii_in_open_tell != point_seek: print 'ERROR: file position error, stopping...' print 'ERROR: unit_ascii_in_open', unit_ascii_in_open print 'ERROR: point_seek', point_seek print 'ERROR: point_ascii_in_open_tell', point_ascii_in_open_tell raise Exception line_in = unit_ascii_in_open.readline() # from point to and including the newline line_in_strip = line_in.strip() value_list = line_in_strip.split( ',' ) # HARDCODE the comma delimiter for index in range(len(data_code_list)): value_in = value_list[index] data_code = data_code_list[index] if data_code == 'string': # value is already a string, no action needed, even if NoData pass elif 'int' in data_code: if value_in == '': value_list[index] = formats_d_data_code[data_code][4] # NoData value else: value_list[index] = int( value_list[index] ) # replace string with integer elif 'float' in data_code: if value_in == '': value_list[index] = formats_d_data_code[data_code][4] # NoData value else: value_list[index] = float( value_list[index] ) # replace string with float else: print 'ERROR: unexpected data code, stopping...' print 'ERROR: data_code', data_code raise Exception point_seek = point_seek + len( line_in ) + 1 # add 1 because next seek will be beyond current ending point value_t = tuple( value_list ) return ( point_seek, value_t ) ##### 12/ 5/2016 # function for interactive use, not used in this program def print_dictionaries( dir_list ): # syntax: >>> print_dictionaries( dir() ) # to return all dictionaries in memory count_dict = 0 for element in dir_list: exec_string = 'bool_dict = type(%s) == type( {} )' % element # True if a dictionary exec( exec_string ) if bool_dict: exec_string = 'len_dict = len(%s)' % element exec( exec_string ) print '%10d %s' % ( len_dict, element ) count_dict += 1 print '%d dictionaries among %d items in dir_list' % ( count_dict, len( dir_list ) ) ##### # function for interactive use, not used in this program def print_lists( dir_list ): # syntax: >>> print_lists( dir() ) # to return all lists in memory count_list = 0 for element in dir_list: exec_string = 'bool_dict = type(%s) == type( [] )' % element # True if a list exec( exec_string ) if bool_dict: exec_string = 'len_list = len(%s)' % element exec( exec_string ) print '%10d %s' % ( len_list, element ) count_list += 1 print '%d lists among %d items in dir_list' % ( count_list, len( dir_list ) ) ##### # function for interactive use, not used in this program def print_sets( dir_list ): # syntax: >>> print_sets( dir() ) # to return all sets in memory count_sets = 0 for element in dir_list: exec_string = 'bool_sets = type(%s) == type( set() )' % element # True if a set exec( exec_string ) if bool_sets: exec_string = 'len_set = len(%s)' % element exec( exec_string ) print '%10d %s' % ( len_set, element ) count_sets += 1 print '%d sets among %d items in dir_list' % ( count_sets, len( dir_list ) ) ##### # function for interactive use, not used in this program def print_functions( dir_list ): # syntax: >>> print_functions( dir() ) # to return all functions in memory count_functions = 0 for element in dir_list: exec_string = 'bool_functions = type(%s) == type( print_functions )' % element # True if a function (uses name of this function) exec( exec_string ) if bool_functions: print element count_functions += 1 print '%d functions among %d items in dir_list' % ( count_functions, len( dir_list ) ) ##### # print_ar only works if the program includes the statement: import numpy as np # function for interactive use, not used in this program def print_ar( dir_list ): # 7/25/2015 for element in dir_list: exec_string = "bool_ar = type( %s ) == type ( np.array([0]) )" % element exec( exec_string ) # defines variable bool_ar if bool_ar: exec_string = "shape_string = %s.shape" % element exec( exec_string ) # defines variable shape_string print '%-20s %s' % ( shape_string, element ) # prints, nothing returned ##### def print_tuples( dir_list ): # syntax: >>> print_tuples( dir() ) # to return all tuples in memory count_tuples = 0 for element in dir_list: exec_string = 'bool_tuples = type(%s) == type( tuple() )' % element # True if a tuple exec( exec_string ) if bool_tuples: exec_string = 'len_tuple = len(%s)' % element exec( exec_string ) print '%10d %s' % ( len_tuple, element ) count_tuples += 1 print '%d tuples among %d items in dir_list' % ( count_tuples, len( dir_list ) ) ##### # function for interactive use, not used in this program def print_units( dir_list ): # 12/31/2020 # syntax: >>> print_units( dir() ) count_units = 0 for element in dir_list: if file.__instancecheck__( eval( element ) ) : # a method on an object of type "file" count_units += 1 print element print repr( eval(element) ) print '%d unit_ among %d items in dir_list' % ( count_units, len( dir_list ) ) ##### # function for interactive use, not used in this program def print_stuff( dir_list ): # 8/18/2012 print print 'Dictionaries' print_dictionaries( dir_list ) print print 'Lists' print_lists( dir_list ) print print 'Sets' print_sets( dir_list ) print print 'Functions' print_functions( dir_list ) print try: # 2/21/2017 print 'Arrays (if numpy as np is active)' print_ar( dir_list ) except: print 'Skipping "print_ar" because numpy not imported' print print 'Tuples' print_tuples( dir_list ) print print 'Units (open or closed files)' print_units( dir_list ) ##### def print_stuff( dir_list ): # 1/16/2021 # 8/18/2012 # ================================================================================================ like main try: print 'Start ',time.ctime(), ' ' * 5, os.path.abspath(sys.argv[0]) # E.g., 'Start Sat Jun 27 11:09:37 2009 D:\Wylie.Owyhee\PythonScripts\ssurgo.extract.06.py' starttime = time.clock() previoustime = starttime message = 'Initial' previoustime = timer_f(message, starttime, previoustime) timestamp = timestamp_f() formats_d_data_code = { # value is a tuple of: ( .csv format, struct format, minimum, maximum, default_NoData_value, num_bytes ) 'uint8' : ( '%d', 'B', 0 , 255 , 255 , 1), # unsigned integer, 1 byte maximum 2**8 - 1 'int8' : ( '%d', 'b', -128 , 127 , -128 , 1), # signed integer, 1 byte minimum -1 * 2**7, maximum 2**7 - 1 'uint16' : ( '%d', 'H', 0 , 65535 , 65535 , 2), # unsigned integer, 2 bytes maximum 2**16 - 1 'int16' : ( '%d', 'h', -32768 , 32767 , -32768 , 2), # signed integer, 2 bytes minimum -1 * 2**15, maximum 2**15 - 1 'uint32' : ( '%d', 'I', 0 , 4294967295L, 4294967295L, 4), # unsigned integer, 4 bytes maximum 2**32 - 1 'int32' : ( '%d', 'i', -2147483648L, 2147483647L, -2147483648L, 4), # signed integer, 4 bytes minimum -1 * 2**31, maximum 2**31 - 1 'uint64' : ( '%d', 'Q', 0 , 18446744073709551615L, 18446744073709551615L, 8), # unsigned integer, 8 bytes maximum 2**64 - 1 # could depend on python implementation 'int64' : ( '%d', 'q', -9223372036854775808L, 9223372036854775807L, -9223372036854775808L, 8), # signed integer, 8 bytes minimum -1 * 2**63, maximum 2**63 - 1 # could depend on python implementation 'float32' : ( '%f', 'f', -3.4e+038, 3.4e+038, -3.4e+038 , 4), # float, 4 bytes (min and max sizes are not necessarily the exact limits) 'float32_0' : ( '%.0f', 'f', -3.4e+038, 3.4e+038, -3.4e+038 , 4), # float, 4 bytes (min and max sizes are not necessarily the exact limits) 'float32_1' : ( '%.1f', 'f', -3.4e+038, 3.4e+038, -3.4e+038 , 4), # float, 4 bytes (min and max sizes are not necessarily the exact limits) 'float32_2' : ( '%.2f', 'f', -3.4e+038, 3.4e+038, -3.4e+038 , 4), # float, 4 bytes (min and max sizes are not necessarily the exact limits) 'float32_3' : ( '%.3f', 'f', -3.4e+038, 3.4e+038, -3.4e+038 , 4), # float, 4 bytes (min and max sizes are not necessarily the exact limits) 'float32_4' : ( '%.4f', 'f', -3.4e+038, 3.4e+038, -3.4e+038 , 4), # float, 4 bytes (min and max sizes are not necessarily the exact limits) 'float32_5' : ( '%.5f', 'f', -3.4e+038, 3.4e+038, -3.4e+038 , 4), # float, 4 bytes (min and max sizes are not necessarily the exact limits) 'float64' : ( '%f', 'd', -3.4e+307, 3.4e+307, -3.4e+307 , 8), # double, 8 bytes (min and max sizes are not necessarily the exact limits) 'string' : ( '%s', 's', 0, 4294967295L, '' , 'variable' ), # 12/13/2014 NoData code is the empty string... 'binstring124' : ( '%s', 's', 0, 4294967295L, '' , 'variable' ), # 12/13/2014 NoData code is the empty string... } # formats_d_data_code float_NoData_value_test_limit = -3.4e37 # Ten times the NoData value, if value is less than this, it is a float32 NoData value ################################ CONTINUE HERE ############################### ## VERBOSE_OUTPUT = True VERBOSE_OUTPUT = False VERBOSE_OUTPUT_2 = True # allow printing of percentage progress within an image (all on one line) ## VERBOSE_OUTPUT_2 = False OVERWRITE_OUTPUT = True ## OVERWRITE_OUTPUT = False # 3/10/2017 pathname_mu_co_in = r'D:\gNATSGO.201120\gNATSGO_CONUS\gNC20_results_ptxt' basename_mu_co_in = 'mapunit_gNC20_SOC_mu_co_d_mukeyint.ptxt' filename_mu_co_in = pathname_mu_co_in + os.sep + basename_mu_co_in pathname_spatial_input = r'D:\gNATSGO.201120\gNATSGO_CONUS\gNC20_spatial_img' basename_spatial_input = 'mukeyint_30m_gN20.img' filename_spatial_input = pathname_spatial_input + os.sep + basename_spatial_input pathname_spatial_results = r'D:\gNATSGO.201120\gNATSGO_CONUS\gNC20_spatial_results' raster_extension_out = '.img' # Erdas Imagine raster format, also determines "driver" below # output filenames are created below based on attributes selected for mapping if not os.path.exists( pathname_spatial_results ): print 'Creating directory %s' % pathname_spatial_results os.makedirs( pathname_spatial_results ) # the "tiles" are internal to this program, and thus arbitrary # 3/1/2017 change variable names: num_row_tile to num_pixels_per_tile_row (likewise for col) # 8/6/2019 use 1500x1500 because the bool_use_tile_ar_1500x1500.txt file exists ## num_pixels_per_tile_row = 1500 # HARDCODE 3/1/2017 see if smaller tiles might be more efficient (less unnecessary I/O) ## num_pixels_per_tile_col = 1500 # HARDCODE 3/1/2017 see if smaller tiles might be more efficient (less unnecessary I/O) num_pixels_per_tile_row = 3000 # HARDCODE (compromise between big is efficient and too big causes Memory Error) num_pixels_per_tile_col = 3000 # HARDCODE (compromise between big is efficient and too big causes Memory Error) ## num_pixels_per_tile_row = 5000 # HARDCODE (compromise between big is efficient and too big causes Memory Error) ## num_pixels_per_tile_col = 5000 # HARDCODE (compromise between big is efficient and too big causes Memory Error) print 'numbers of columns and rows per internal virtual tile:' print ' num_pixels_per_tile_col %10d' % num_pixels_per_tile_col # number of columns per internal virtual tile print ' num_pixels_per_tile_row %10d' % num_pixels_per_tile_row # INPUT image # basename_in defined above (next to pathnames) ## print 'filename_spatial_input %s' % filename_spatial_input, ' Exists?', os.path.exists( filename_spatial_input ) print 'filename_spatial_input %s' % filename_spatial_input # open the input image, but don't read yet in_ds = gdal.Open( filename_spatial_input ) in_projection = in_ds.GetProjection() in_geotransform = in_ds.GetGeoTransform() in_rb = in_ds.GetRasterBand(1) in_nodatavalue = in_rb.GetNoDataValue() in_data_type = in_rb.DataType # 3 = signed integer 16, 4 = unsigned integer 32, 6 = float32 # 3/1/2017 change variable names: cols_pixel_input to num_pixels_col_input (likewise for row) num_pixels_col_input = int( in_rb.XSize ) # make integer so it can be used as index in "for" loops num_pixels_row_input = int( in_rb.YSize ) # compute the numbers of virtual tiles in each direction num_tiles_row = ( ( num_pixels_row_input - 1 ) // num_pixels_per_tile_row ) + 1 # ensure integer divison (truncation) with // num_tiles_col = ( ( num_pixels_col_input - 1 ) // num_pixels_per_tile_col ) + 1 # ensure integer divison (truncation) with // print print 'Input image parameters:' print 'in_projection ' if in_projection: print in_projection else: print 'WARNING: the input projection (in_projection) is not defined.' print 'in_geotransform ', in_geotransform if in_nodatavalue is not None: print 'in_nodatavalue ', in_nodatavalue in_nodatavalue_int = int( in_nodatavalue ) in_nodatavalue_ArcGIS = 2147483647 # also set for input nodata value else: # 3/3/2017 switch from WARNING to ERROR: program depends on this being defined print 'ERROR: the input NoData value (in_nodatavalue) is not defined, stopping...' raise Exception print 'in_data_type %10d' % in_data_type print print 'num_pixels_row_input %10d' % num_pixels_row_input print 'num_pixels_col_input %10d' % num_pixels_col_input print 'num_tiles_row %10d' % num_tiles_row print 'num_tiles_col %10d' % num_tiles_col print xmin_input = in_geotransform[0] ymax_input = in_geotransform[3] # 1/22/2021 If the boolean array does not exist, create it "on the fly" on the first iteration of the data, # rather than as a separate step # Write the array to ascii following the first map creation, but continue to use the array in memory for control # make a boolean array of the tiles: True indicates the tile has data, False indicates there is no mukeyint data for the tile (all NoData) # if the array has already been saved as an ASCII file, use it # if the array has not been created, do so and save as an ASCII file (about 15 minutes) basename_txt = 'bool_use_tile_ar_%dx%d.txt' % ( num_pixels_per_tile_row, num_pixels_per_tile_col ) filename_txt = pathname_spatial_input + os.sep + basename_txt bool_use_tile_ar = np.zeros( (num_tiles_row, num_tiles_col), dtype=np.bool_) # initialize to zero, representing False if os.path.exists( filename_txt ): print 'Reading bool_use_tile_ar from %s' % filename_txt unit_txt = open( filename_txt, 'r') for index_tile_row in range( num_tiles_row ): line_string = unit_txt.readline() for index_tile_col in range( num_tiles_col ): # line_string[index_tile_col] picks a single character digit out of the string, newline at the end of the string will be ignored bool_use_tile_ar[index_tile_row, index_tile_col] = int( line_string[index_tile_col] ) # numpy automatically converts integer to boolean (0 is False, 1 is True) unit_txt.close() bool_using_bool_use_tile_array = True else: # the array does not exist make it during the first loop count_tiles_data = 0 count_tiles_empty = 0 count_tiles_touched = 0 bool_using_bool_use_tile_array = False ## for index_tile_row in range( num_tiles_row ): ## for index_tile_col in range( num_tiles_col ): ## # find out if there are mukeyint data for this tile (not all NoData) ## row_pixel_in_start = index_tile_row * num_pixels_per_tile_row ## row_pixel_in_end_1 = row_pixel_in_start + num_pixels_per_tile_row ## row_pixel_in_end = min( row_pixel_in_end_1, num_pixels_row_input ) ## rows_in = row_pixel_in_end - row_pixel_in_start ## ## col_pixel_in_start = index_tile_col * num_pixels_per_tile_col ## col_pixel_in_end_1 = col_pixel_in_start + num_pixels_per_tile_col ## col_pixel_in_end = min( col_pixel_in_end_1, num_pixels_col_input ) ## cols_in = col_pixel_in_end - col_pixel_in_start ## ## # translate to ReadAsArray notation ## xoff = col_pixel_in_start ## yoff = row_pixel_in_start ## win_xsize = cols_in ## win_ysize = rows_in ## cols_out = cols_in ## rows_out = rows_in ## in_ar = in_rb.ReadAsArray( xoff, yoff, win_xsize, win_ysize ) ## # recode all NoData to zeros ## in_ar = np.where( (in_ar == in_nodatavalue_int) | (in_ar == in_nodatavalue_ArcGIS ), 0, in_ar ) # 2/9/2015 also the ArcGIS case ## if np.any( in_ar ): ## bool_use_tile_ar[index_tile_row, index_tile_col] = True ## count_tiles_data += 1 ## else: ## count_tiles_empty += 1 ## count_tiles_touched += 1 # for use in the following PROGRAMMING AID: 4/14/2014 BASED on run of 4/13/2012 (notes 140413) # tuple of tuples # fieldname # filename_root # data_type # PROGRAMMING AID: MAY need to modify the "elif" tests to handle specific requirements for "data_code_out" and "name_scale" # POTENTIAL ENHANCEMET: strip off the 'mu_' from the output name in the PROGRAMMING AID... fieldname_to_map_tt = ( # tuple of tuples ## >>> if True: ## # 3/10/2017 ## # PROGRAM AID run on the workstation after the MAPUNIT_31_T_D_MUKEYINT_SINGLE_DICTIONARIES_BINARY_2 step ## # name_mu_co_t was available in memory after D:\SSURGO\PythonScripts\ssurgo.t462.GSM_34LRH.generated.printing_legend_to_chorizon_plus.CDIaGSM.41.py ## for index in range( len( name_mu_co_t )): ## name = name_mu_co_t[index] ## data_code = data_code_mu_co_t[index] ## blanks_1 = ' ' * ( 40 - len( name ) ) ## blanks_2 = ' ' * ( 12 - len( data_code ) ) ## blanks_3 = ' ' * ( 4 - len( '' ) ) ## data_code_out = 'float32' ## if 'mu_soc_r_pa_g_cm2' in name: ## print "('%s',%s '%s',%s 'xe4',%s '%s',%s '%s',%s )," % ( name, blanks_1, data_code, blanks_2, blanks_3, name, blanks_1, data_code_out, blanks_2, ) ## ## print "('%s',%s '%s',%s 'name_fill_attribute', 'float32', 0, '', '%s',%s '%s',%s )," % ( name, blanks_1, data_code, blanks_2, name, blanks_1, data_code, blanks_2, ) # convert from g cm-2 to g m-2 by multiplying by 10,000 cm2 m-2 (square centimeters per square meter), 'xe4' references 10**4 = 10,000 # move 000p00_015p24 above others... ('mu_soc_r_pa_g_cm2_000p00_015p24', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_000p00_015p24', 'float32', ), ## ('mu_soc_r_pa_g_cm2_000_005', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_000_005', 'float32', ), ## ('mu_soc_r_pa_g_cm2_005_010', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_005_010', 'float32', ), ## ('mu_soc_r_pa_g_cm2_010_020', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_010_020', 'float32', ), ## ('mu_soc_r_pa_g_cm2_020_030', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_020_030', 'float32', ), ## ('mu_soc_r_pa_g_cm2_030_040', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_030_040', 'float32', ), ## ('mu_soc_r_pa_g_cm2_040_050', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_040_050', 'float32', ), ## ('mu_soc_r_pa_g_cm2_050_060', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_050_060', 'float32', ), ## ('mu_soc_r_pa_g_cm2_060_070', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_060_070', 'float32', ), ## ('mu_soc_r_pa_g_cm2_070_080', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_070_080', 'float32', ), ## ('mu_soc_r_pa_g_cm2_080_090', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_080_090', 'float32', ), ## ('mu_soc_r_pa_g_cm2_090_100', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_090_100', 'float32', ), ## ('mu_soc_r_pa_g_cm2_000_010', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_000_010', 'float32', ), ('mu_soc_r_pa_g_cm2_000_030', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_000_030', 'float32', ), ## ('mu_soc_r_pa_g_cm2_030_060', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_030_060', 'float32', ), ## ('mu_soc_r_pa_g_cm2_060_100', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_060_100', 'float32', ), ## ('mu_soc_r_pa_g_cm2_000_100', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_000_100', 'float32', ), ## ('mu_soc_r_pa_g_cm2_000_999', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_000_999', 'float32', ), #### ('mu_soc_r_pa_g_cm2_000p00_015p24', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_000p00_015p24', 'float32', ), ## ('mu_soc_r_pa_g_cm2_015p24_030p48', 'float32_5', 'xe4', 'mu_soc_r_pa_g_m2_015p24_030p48', 'float32', ), ) # fieldname_to_map_tt # read header file for the mu_co_in data mu_co_in_structure_binary_tt = structure_tt_from_header2_csv_f( filename_mu_co_in ) print_structure_tt_2_f( mu_co_in_structure_binary_tt ) mu_co_in_name_t = mu_co_in_structure_binary_tt[2] # key type in mu_co_in_structure_binary_tt[3] data_code_mu_co_in_list = list( mu_co_in_structure_binary_tt[4] ) count_map = -1 # initialize so that this becomes zero in the first loop # loop output variables for fieldname_to_map_t in fieldname_to_map_tt: count_map += 1 basename_in = fieldname_to_map_t[0] data_code_in = fieldname_to_map_t[1] name_scale = fieldname_to_map_t[2] basename_out = fieldname_to_map_t[3] data_code_out = fieldname_to_map_t[4] index_mu_co_in_name_t_mukeyint = mu_co_in_name_t.index( 'mukeyint' ) index_mu_co_in_name_t_basename_in = mu_co_in_name_t.index( basename_in ) if data_code_out == 'uint8': data_code_gdal_out = gdal.GDT_Byte # gdal.GDT_Byte is the integer 1 data_code_np_out = np.uint8 out_NoData_value = formats_d_data_code[data_code_out][4] elif data_code_out == 'uint16': data_code_gdal_out = gdal.GDT_UInt16 # gdal.GDT_UInt16 is the integer 2 data_code_np_out = np.uint16 out_NoData_value = formats_d_data_code[data_code_out][4] elif data_code_out == 'int16': data_code_gdal_out = gdal.GDT_Int16 # gdal.GDT_Int16 is the integer 3 data_code_np_out = np.int16 out_NoData_value = formats_d_data_code[data_code_out][4] elif data_code_out == 'uint32': data_code_gdal_out = gdal.GDT_UInt32 # gdal.GDT_UInt16 is the integer 4 data_code_np_out = np.uint32 out_NoData_value = formats_d_data_code[data_code_out][4] elif data_code_out == 'float32': data_code_gdal_out = gdal.GDT_Float32 # gdal.GDT_Float32 is the integer 6 data_code_np_out = np.float32 out_NoData_value = formats_d_data_code[data_code_out][4] else: print 'ERROR: data_code_out not correctly specified...' raise Exception if name_scale == '': # no scaling factor_scale = 1.0 elif name_scale == 'x10': factor_scale = 10.0 elif name_scale == 'x100': factor_scale = 100.0 elif name_scale == 'xe3': factor_scale = 1000.0 elif name_scale == 'xe4': factor_scale = 10000.0 # e.g., scale per cm2 to per m2 elif name_scale == 'd10': factor_scale = 0.1 elif name_scale == 'd100': factor_scale = 0.01 elif name_scale == 'de3': factor_scale = 0.001 else: print 'ERROR: unexpected name_scale %s, stopping...' % name_scale raise Exception if name_scale: # is not the empty string name_scale_2 = '_' + name_scale else: name_scale_2 = '' # OUTPUT image ## filename_out = pathname_spatial_results + os.sep + basename_out + name_scale_2 + raster_extension_out # 3/24/2015 Remove editing the output filename with "name_scale_2" # This is the USER RESPONSIBILITY when defining in the fieldname_to_map_tt filename_out = pathname_spatial_results + os.sep + basename_out + raster_extension_out if os.path.exists( filename_out ) and not OVERWRITE_OUTPUT: print 'WARNING: Not overwriting...' print 'WARNING: filename exists: ', filename_out print 'WARNING: OVERWRITE_OUTPUT', OVERWRITE_OUTPUT continue # skips the rest of this loop (for fieldname_to_map_t in fieldname_to_map_tt:) # open the output image # parameters for output image if in_projection: # is not the empty string out_projection = in_projection else: print 'ERROR: input projection is not defined, stopping...' raise Exception out_data_type = data_code_gdal_out num_pixels_col_output = num_pixels_col_input # int( in_rb.XSize ) num_pixels_row_output = num_pixels_row_input # int( in_rb.YSize ) print 'filename_out ', filename_out, ' (%d, %d)' % ( num_pixels_row_output, num_pixels_col_output ) print 'data_code_out ', data_code_out if not (( num_pixels_row_output > 0 ) and ( num_pixels_col_output > 0 )): # output image would be empty print 'Output image would be empty, do nothing...' else: # setup output raster using GDAL (Geospatial Data Abstraction Language) # PROGRAMMER NOTE: If changing "driver", also change "raster_extension_out" above if raster_extension_out == '.img': driver = gdal.GetDriverByName('HFA') # HFA is the name for the driver for Erdas Imagine raster format elif raster_extension_out == '.tif': driver = gdal.GetDriverByName('GTiff') # GTiff is the name for the driver for the GeoTIFF raster format else: print 'ERROR: unidentified raster_extension_out %s, stopping...' % raster_extension_out raise Exception # use "input" characteristics out_ds = driver.Create( filename_out, num_pixels_col_output, num_pixels_row_output, 1, out_data_type ) # set the projection and geotransformation out_ds.SetProjection(out_projection) # these use "input" characteristics because they become inputs to testing the sampling methods. # out_geotransform = ( xmin_input, dx_pixel_out, 0.0, ymax_input, 0.0, -1 * dy_pixel_out ) # the 0.0 values reflect no rotation, the 6th term is negative because an image processing convention is used (read from top downward) out_geotransform = in_geotransform out_ds.SetGeoTransform(out_geotransform) out_rb = out_ds.GetRasterBand(1) out_rb.SetNoDataValue(out_NoData_value) if VERBOSE_OUTPUT: print 'Output image parameters:' print 'out_projection' print out_projection print 'out_geotransform ', out_geotransform if out_NoData_value is not None: print 'out_NoData_value ', out_NoData_value else: print 'WARNING: out_NoData_value is not defined.' print 'out_data_type %10d' % out_data_type print 'num_pixels_row_output %10d' % num_pixels_row_output print 'num_pixels_col_output %10d' % num_pixels_col_output # input file _mu_co_in print_interval = 100000 # 100,000 # 8/6/2019 read whole mapunit file, not single dictionaries filename_mu_co_in = filename_mu_co_in print print 'Reading from %s' % filename_mu_co_in ## name_data_code_mu_co_in_tt = ( ## ( 'mukeyint', 'uint32' ), # the key ## ( basename_in, data_code_in ), ## ) # name_data_code_mu_co_in_tt ## name_mu_co_in_list = [] ## data_code_mu_co_in_list = [] ## for name_data_code_mu_co_in_t in name_data_code_mu_co_in_tt: ## ( name_mu_co_in, data_code_mu_co_in ) = name_data_code_mu_co_in_t ## name_mu_co_in_list.append( name_mu_co_in ) ## data_code_mu_co_in_list.append( data_code_mu_co_in ) unit_mu_co_in = open( filename_mu_co_in, 'r' ) count_record_mu_co_in = 0 temp_d_mukeyint = {} # find the length of the input file, then reset the pointer for reading from the beginning unit_mu_co_in.seek( 0, 2 ) # seek offset of zero from "whence = 2" where 2 denotes the end of the file. unit_mu_co_in_EOF = unit_mu_co_in.tell() # store the length of the file in bytes unit_mu_co_in.seek( 0 ) # return to start of file for reading data point_seek_unit_mu_co_in = 0 while point_seek_unit_mu_co_in < unit_mu_co_in_EOF: if count_record_mu_co_in % print_interval == 0: message = 'Row unit_mu_co_in %10d' % count_record_mu_co_in previoustime = timer_f(message, starttime, previoustime) # read a "record" ( point_seek_unit_mu_co_in, input_mu_co_in_t ) = record_from_ascii_f( unit_mu_co_in, point_seek_unit_mu_co_in, data_code_mu_co_in_list ) count_record_mu_co_in += 1 ## ( mukeyint, ## value, ) = input_mu_co_in_t mukeyint = input_mu_co_in_t[index_mu_co_in_name_t_mukeyint] value = input_mu_co_in_t[index_mu_co_in_name_t_basename_in] # store in a dictionary: cokeyint is a key and guaranteed to be unique, do not really need to test if it is in the dictionary if mukeyint not in temp_d_mukeyint: temp_d_mukeyint[mukeyint] = value else: print 'ERROR: duplicate mukeyint encountered in input_mu_co_in_t, stopping...' print 'ERROR: mukeyint', mukeyint raise Exception unit_mu_co_in.close() print 'Read %8d records from %s.' % ( count_record_mu_co_in, filename_mu_co_in ) print 'len( temp_d_mukeyint ) ', len( temp_d_mukeyint ) # 2/7/2015 problems with spatial data mukeyint not in the attributes mukeyint_spatial_missing_attribute_set = set() # Create a generic dictionary with the proper conversion to output data type and all mukeyint (even if not on the chorizon table) value_d_mukeyint = {} # set value for NoData for mukeyint (zero is the input NoData value for the mukeyint spatial dataset) value_d_mukeyint[0] = out_NoData_value # also set for input nodata value in_nodatavalue_int = int( in_nodatavalue ) value_d_mukeyint[in_nodatavalue_int] = out_NoData_value # also set for the strange NoData value introduced into uint32 images by ArcGIS (in my way of thinking, they should use 4294967295) in_nodatavalue_ArcGIS = 2147483647 value_d_mukeyint[in_nodatavalue_ArcGIS] = out_NoData_value # convert data and output NoData values as specified in the mapping structure ###### for mukeyint in mukeyint_spatial_set: ###### if mukeyint in temp_d_mukeyint: # defined with "exec" statement from the "real" dictionary for mukeyint in temp_d_mukeyint: # dictionary read in with the needed attribute (given a name for use in this program with exec() above) # assign the value if data_code_in == 'float32' and data_code_out == 'uint8': ## value = temp_d_mukeyint[mukeyint][0] # [0] to extract from tuple ## value = temp_d_mukeyint[mukeyint][0] # prior to 2/7/2015, value was in a tuple value = temp_d_mukeyint[mukeyint] # 2/7/2015 read using unit_mu_co_in method, not in a tuple if value < float_NoData_value_test_limit: value_d_mukeyint[mukeyint] = out_NoData_value else: value_int = int( round( value * factor_scale ) ) if value_int < 0 or value_int >= 255: print 'value out of range for %s, stopping...' % data_code_out print 'mukeyint ', mukeyint print 'temp_d_mukeyint[mukeyint]', temp_d_mukeyint[mukeyint] print 'value_int ', value_int raise Exception else: value_d_mukeyint[mukeyint] = value_int elif data_code_in == 'float32' and data_code_out == 'uint16': value = temp_d_mukeyint[mukeyint] if value < float_NoData_value_test_limit: value_d_mukeyint[mukeyint] = out_NoData_value else: value_int = int( round( value * factor_scale ) ) if value_int < 0 or value_int >= 65535: print 'value out of range for %s, stopping...' % data_code_out print 'mukeyint ', mukeyint print 'temp_d_mukeyint[mukeyint]', temp_d_mukeyint[mukeyint] print 'value_int ', value_int raise Exception else: value_d_mukeyint[mukeyint] = value_int elif 'float32' in data_code_in and 'float32' in data_code_out: # no conversion needed, both are float (allows for ascii float versions, such as 'float32_5') value = temp_d_mukeyint[mukeyint] value_d_mukeyint[mukeyint] = value elif data_code_in == data_code_out: value = temp_d_mukeyint[mukeyint] value_d_mukeyint[mukeyint] = value else: print 'PROGRAM ERROR: case for data_code_in %s and data_code_out %s not yet programmed, stopping...' % ( data_code_in, data_code_out ) raise Exception del( temp_d_mukeyint ) # replaced by value_d_mukeyint # test trade-off between storing the NoData array in memory and regenerating it each time. 3/6/2017 14:01 out_NoData_ar = np.zeros( (num_pixels_per_tile_row, num_pixels_per_tile_col), dtype=data_code_np_out ) + out_NoData_value # loop the "internal virtual tiles" if VERBOSE_OUTPUT_2 and not VERBOSE_OUTPUT: print 'percent complete: ', # initiate printing percentage complete on the same line (comma after print statement) ###### count_tiles = 0 #### count_row_tiles = 0 count_tiles_touched = 0 count_tiles_data = 0 count_tiles_empty = 0 row_start = 0 ###### row_start = 51000 # DEBUG TEST num_row_iterations = len( range( row_start, num_pixels_row_input, num_pixels_per_tile_row ) ) col_start = 0 ###### col_start = 108000 # DEBUG TEST num_col_iterations = len( range( col_start, num_pixels_col_input, num_pixels_per_tile_col ) ) num_total_iterations = num_row_iterations * num_col_iterations ###### for row_pixel_in_start in range( row_start, num_pixels_row_input, num_pixels_per_tile_row ): for index_tile_row in range( num_tiles_row ): row_pixel_in_start = index_tile_row * num_pixels_per_tile_row row_pixel_in_end_1 = row_pixel_in_start + num_pixels_per_tile_row row_pixel_in_end = min( row_pixel_in_end_1, num_pixels_row_input ) rows_in = row_pixel_in_end - row_pixel_in_start # translate to ReadAsArray notation yoff = row_pixel_in_start win_ysize = rows_in rows_out = rows_in for index_tile_col in range( num_tiles_col ): col_pixel_in_start = index_tile_col * num_pixels_per_tile_col col_pixel_in_end_1 = col_pixel_in_start + num_pixels_per_tile_col col_pixel_in_end = min( col_pixel_in_end_1, num_pixels_col_input ) cols_in = col_pixel_in_end - col_pixel_in_start # translate to ReadAsArray notation xoff = col_pixel_in_start win_xsize = cols_in cols_out = cols_in if bool_using_bool_use_tile_array: # the bool_use_tile_ar is already complete bool_use_tile = bool_use_tile_ar[index_tile_row, index_tile_col] else: # the bool_use_tile_ar is not yet complete, process the tile and find out if it is # needed in the future (e.g., it has data) or not needed in the future (all NoData) bool_use_tile = True if bool_use_tile: percent_complete = 100. * count_tiles_touched / num_total_iterations if VERBOSE_OUTPUT: # print statement to give a "progress report" print 'row_pixel_in_start %10d row_pixel_in_end %10d col_pixel_in_start %10d col_pixel_in_end %10d percent complete %5.1f' % ( row_pixel_in_start, row_pixel_in_end, col_pixel_in_start, col_pixel_in_end, percent_complete ) if VERBOSE_OUTPUT_2 and not VERBOSE_OUTPUT: print '%5.1f' % ( percent_complete ), # comma at the end: print on the same line ## print 'in_rb.ReadAsArray( %d, %d, %d, %d )' % ( xoff, yoff, win_xsize, win_ysize ) in_ar = in_rb.ReadAsArray( xoff, yoff, win_xsize, win_ysize ) ## print 'in_ar.shape', in_ar.shape # substitute the input nodata values with zero ## in_ar = np.where( in_ar == in_nodatavalue_int, 0, in_ar ) in_ar = np.where( (in_ar == in_nodatavalue_int) | (in_ar == in_nodatavalue_ArcGIS ), 0, in_ar ) # 2/9/2015 also the ArcGIS case # make the output array in_ar_flat = in_ar.flatten() ## del( in_ar ) # delete intermediate forms to avoid MemoryError in_flat_list = in_ar_flat.tolist() ## del( in_ar_flat ) out_flat_list = map( value_f_mukeyint_robust, in_flat_list) # map(func, seq) see Alex Martelli, Python in a Nutshell, 2nd Ed., p. 164 ## # use robust version for first data set, until learn the set of spatial mukeyint not in the attribute data. ## out_flat_list = map( value_f_mukeyint_robust, in_flat_list) # map(func, seq) see Alex Martelli, Python in a Nutshell, 2nd Ed., p. 164 ## del( in_flat_list ) out_ar = np.array( out_flat_list, dtype=data_code_np_out ).reshape(rows_out, cols_out) ## del( out_flat_list ) count_tiles_data += 1 # If setting the bool_use_tile_array in this loop (e.g., first map processed in the first run) if not bool_using_bool_use_tile_array: # Set the value of the bool_use_tile_array according to whether there are data in the output # the array will get a True if there are data to process in the tile # the array will get a False, if the whole tile is NoData if ( out_ar == out_NoData_value ).all(): # every pixel is NoData in output, don't need to process the tile in future iterations bool_use_tile_ar[index_tile_row, index_tile_col] = False else: bool_use_tile_ar[index_tile_row, index_tile_col] = True # some pixels in output have data else: # the input array is all "NoData", set the output to NoData (much faster) ## out_ar = np.zeros( (rows_out, cols_out), dtype=data_code_np_out ) + out_NoData_value # test trade-off between storing the NoData array in memory and regenerating it each time. 3/6/2017 14:01 out_ar = out_NoData_ar[:rows_out, :cols_out].copy() count_tiles_empty += 1 # write to the output image row_offset_tile_output = row_pixel_in_start col_offset_tile_output = col_pixel_in_start ## print 'out_rb.WriteArray(out_ar, %d, %d)' % ( col_offset_tile_output, row_offset_tile_output, ) out_rb.WriteArray(out_ar, col_offset_tile_output, row_offset_tile_output) count_tiles_touched += 1 # increment after writing if VERBOSE_OUTPUT_2 and not VERBOSE_OUTPUT: print # close out printing percentage complete on the same line when done with all rows del( out_rb ) # finish writing (flush the buffers) and close the files by deleting the "rb" raster band and "ds" data set osgeo pointers del( out_ds ) percent_complete = 100. * count_tiles_touched / num_total_iterations if VERBOSE_OUTPUT or VERBOSE_OUTPUT_2: print 'percent complete: %5.1f' % ( percent_complete ) print 'Created %s' % filename_out print print 'Counts of internal (virtual) tiles of nominal size %d x %d or less:' % ( num_pixels_per_tile_row, num_pixels_per_tile_col ) print 'count_tiles_data %10d' % ( count_tiles_data, ) print 'count_tiles_empty %10d' % ( count_tiles_empty, ) print 'count_tiles_touched %10d' % ( count_tiles_touched, ) # Compute statistics and pyramids print print 'Calculating statistics for %s' % filename_out arcpy.CalculateStatistics_management( filename_out, "", "", "" ) print arcpy.GetMessages() print print 'Building pyramids for %s' % filename_out arcpy.BuildPyramids_management( filename_out ) print arcpy.GetMessages() print # set of mukeyint in the spatial data but which are missing attribute data name_set = 'mukeyint_spatial_missing_attribute_' + basename_out + '_set' name_field = 'mukeyint_spatial_missing_attribute_' + basename_out # 8/7/2019 write the mukeyint_spatial_missing_attribute_set as an ASCII file basename_set_csv = name_set + '.csv' filename_set_csv = pathname_spatial_results + os.sep + basename_set_csv unit_set_csv = open( filename_set_csv, 'w' ) header_set_csv = 'mukeyint' unit_set_csv.write( header_set_csv + '\n' ) # write the header string for mukeyint in sorted( mukeyint_spatial_missing_attribute_set ): unit_set_csv.write( str( mukeyint ) + '\n' ) unit_set_csv.close() print 'Wrote %d records to %s' % ( len( mukeyint_spatial_missing_attribute_set ), filename_set_csv ) print message = 'Completed %s' % basename_in previoustime = timer_f(message, starttime, previoustime) print # if this is the first map and the bool_use_tile_ar was just created, write it to a text file if not bool_using_bool_use_tile_array: print print 'This is the first map and the bool_use_tile_ar was just created, write it to a text file' print 'Counts of internal (virtual) tiles of nominal size %d x %d or less:' % ( num_pixels_per_tile_row, num_pixels_per_tile_col ) print 'count_tiles_data %10d' % ( count_tiles_data, ) print 'count_tiles_empty %10d' % ( count_tiles_empty, ) print 'count_tiles_touched %10d' % ( count_tiles_touched, ) message = 'Done populating "bool_use_tile_ar"' previoustime = timer_f(message, starttime, previoustime) unit_txt = open( filename_txt, 'w') for index_tile_row in range( num_tiles_row ): for index_tile_col in range( num_tiles_col ): write_int = bool_use_tile_ar[index_tile_row, index_tile_col].astype(np.uint8) write_string = '%d' % write_int unit_txt.write( write_string ) unit_txt.write( '\n' ) # newline unit_txt.close() print # set the control variable so that the array will be used for future maps in this run bool_using_bool_use_tile_array = True del( in_rb ) # close the input raster band and data set del( in_ds ) # print PROGRAM AID for the next step (e.g., t261) # PROGRAM AID makes use of structure from ssurgo.t260.EPA_20141123.map_mosaics_810m.01.py or similar print print 'PROGRAM AID to help with the next program' print 'PROGRAM AID only gives the basic structure, at this point, need to edit every such case.' EPA_code = 4 MM_SD_string = 'MM_SD' # indicates to make Minimum-maximum scaling, Standard-Deviation scaling, or both for fieldname_to_map_t in fieldname_to_map_tt: ( # fieldname_to_map_t fieldname_in, data_code_in, name_fill_attribute, # defined but not edited or used data_code_fill_attribute, # defined but not edited or used value_fill_attribute, # defined but not edited or used name_scale, # called "name_scale" in a this program, called code_multiplier in a prior program fieldname_out, data_code_out, ) = fieldname_to_map_t print "('%s', '%s', '%s'," % (fieldname_in, data_code_in, name_scale, ) print " '%s', '%s'," % (fieldname_out, data_code_out, ) print " 'Title'," print " 'Red is zero percent, blue is 100 percent.'," % () print " 'Data', '%s', %d)," % (MM_SD_string, EPA_code) # Data 4/3/2016 default print print message = 'Finish' previoustime = timer_final_f(message, starttime, previoustime) print 'Finish ',time.ctime() , ' ' * 5, os.path.abspath(sys.argv[0]) # E.g., 'Finish Sat Jun 27 11:09:37 2009 D:\Wylie.Owyhee\PythonScripts\ssurgo.extract.06.py' except Exception, msg: print traceback.print_exc() print msg message = 'ERROR Finish' previoustime = timer_final_f(message, starttime, previoustime ) print 'ERROR Finish ',time.ctime() , ' ' * 5, os.path.abspath(sys.argv[0]) # E.g., 'Finish Sat Jun 27 11:09:37 2009 D:\Wylie.Owyhee\PythonScripts\ssurgo.extract.06.py'