# soil_chi_square # Import system modules import sys, string, os def get_soil_data(which_group): # creates dictionaries of soil_name, OID, and presence or absence of a soil within a single polygon global soil_name, OID, soil_data, soil_occurrence_totals # OID_counting comma = {} soil_directory = "C:\\DataFiles\\OSUSeedCert\\SpatialStats\\Soils_summary\\soils_" soil_data_filename = soil_directory+which_group+"_data.csv" soil_data_file = open(soil_data_filename,"r") file_line = soil_data_file.readline() comma_pos = file_line.find(",") comma[0] = comma_pos comma_count = soil_count = soil_loop_max = OID_counting = 0 #OID_counting = 0 #print file_line if file_line[:comma_pos] == "IDpossible": # process first line of file to get all the soil names print "got to here - line 19" while comma_pos != -1: comma_pos = file_line.find(",", comma[comma_count] + 1) soil_count += 1 comma_count += 1 comma[comma_count] = comma_pos soil_name[soil_count] = file_line[comma[soil_count - 1] + 1:comma[soil_count]] # soil_name is 1 based rather than 0 based vector if soil_name[soil_count] == "All_soils": soil_loop_max = soil_count # next step is to load in all the soil occurrence by OID number data process_line = soil_data_file.readline() # get second line of file more_OID = (process_line != "\n") while more_OID == 1: OID_counting += 1 # this will put OID into a 1 based rather than 0 based index comma = {} comma_pos = process_line.find(",") comma[0] = comma_pos comma_count = 0 OID[OID_counting] = process_line[:comma[0]] for soil_count in range(1, soil_loop_max): # process data for each soil within each single line from the data file comma_pos = process_line.find(",",comma[soil_count - 1] + 1) comma[soil_count] = comma_pos index = (OID[OID_counting],soil_count) soil_data[index] = int(process_line[comma[soil_count - 1] + 1:comma[soil_count]]) old_value = soil_occurrence_totals.get(soil_count,0) # not sure if this works if written all in a single expression soil_occurrence_totals[soil_count] = old_value + soil_data[index] process_line = soil_data_file.readline() more_OID = (process_line != "\n") else: print "possible error in datafile. Contents of cell A1 should be \"IDpossible\"" soil_data_file.close() highest_number = max(OID.items()) return soil_loop_max, highest_number def get_weed_data(): # uses OID already created global weed_name, OID, verify_OID, weed_data, weed_occurrence_totals, weed_occurrence_minimal_totals, weed_occurrence_serious_totals # OID_counting #global weed_severity_totals comma = {} soil_directory = "C:\\DataFiles\\OSUSeedCert\\SpatialStats\\Soils_summary\\" weed_data_filename = soil_directory+"weed_rev_data.csv" weed_data_file = open(weed_data_filename,"r") file_line = weed_data_file.readline() comma_pos = file_line.find(",") comma[0] = comma_pos comma_count = weed_count = weed_loop_max = OID_counting = 0 #print file_line #print "how is this? "+file_line[:comma_pos]+" as my column title for comma_pos = "+str(comma_pos) if file_line[:comma_pos] == "FID_": print "got to here OK - line 69" while weed_loop_max == 0: comma_pos = file_line.find(",", comma[comma_count] + 1) #weed_count = 0 comma_count += 1 comma[comma_count] = comma_pos if comma_count > 3: weed_count = comma_count - 3 weed_name[weed_count] = file_line[comma[comma_count - 1] + 1:comma[comma_count]] # weed_name is 1 based rather than 0 based vector if weed_name[weed_count] == "wPOATR" : weed_loop_max = weed_count + 1 # returns value that can be used as stop value in Python range loops elif comma_count == 2: # test for OBJECTID in OID test_ID = file_line[comma[0] + 1:comma[1]] if not test_ID == "OBJECTID": print "warning, something is rotten in Denmark!" print file_line else: pass # ignore columns 1,3,4 # next step is to load in all the soil occurrence by OID number data process_line = weed_data_file.readline() #print process_line more_OID = (process_line != "\n") #more_OID = 0 while more_OID == 1: # looping thru all the records in file OID_counting += 1 # this will put OID into a 1 based rather than 0 based index comma = {} comma_pos = process_line.find(",") comma[0] = comma_pos comma_count = 0 comma_pos = process_line.find(",",comma[0] + 1) comma[1] = comma_pos comma_count += 1 verify_OID[OID_counting] = process_line[comma[0] + 1:comma[1]] if not verify_OID[OID_counting] in OID.values(): print "warning, something is rotten in Denmark!" print "weed_data_OID = "+verify_OID[OID_counting]+" while possible OID values from soils data are :" print OID.keys() comma_pos = process_line.find(",",comma[1] + 1) comma[2] = comma_pos comma_count += 1 # ignoring values in SHAPE_Leng comma_pos = process_line.find(",",comma[2] + 1) comma[3] = comma_pos comma_count += 1 # ignoring values in SHAPE_Area for weed_count in range(1, weed_loop_max): comma_count = weed_count + 2 comma_pos = process_line.find(",",comma[comma_count] + 1) comma[comma_count + 1] = comma_pos index = (verify_OID[OID_counting],weed_count) weed_data[index] = int(process_line[comma[comma_count] + 1:comma[comma_count + 1]]) #old_severity = weed_severity_totals.get(weed_count,0) old_occurrence = weed_occurrence_totals.get(weed_count,0) old_occurrence_minimal = weed_occurrence_minimal_totals.get(weed_count,0) old_occurrence_serious = weed_occurrence_serious_totals.get(weed_count,0) #weed_severity_totals[weed_count] = old_severity + weed_data[index] this_weed_present = (weed_data[index] > 0) this_weed_minimal = (weed_data[index] == 1) this_weed_serious = (weed_data[index] > 1) weed_occurrence_totals[weed_count] = old_occurrence + this_weed_present weed_occurrence_minimal_totals[weed_count] = old_occurrence_minimal + this_weed_minimal weed_occurrence_serious_totals[weed_count] = old_occurrence_serious + this_weed_serious process_line = weed_data_file.readline() more_OID = (process_line != "\n") #if OID_counting > 5: # more_OID = 0 else: print "possible error in datafile. Contents of cell A1 should be \"FID_\"" weed_data_file.close() highest_number = max(verify_OID.items()) return weed_loop_max, highest_number # OID_counting def save_soil_by_weed_data(weeds_to_do,soils_to_do,group): global soil_name, weed_name, verify_OID, soil_data, weed_data, soil_occurrence_totals, weed_occurrence_totals, weed_occurrence_minimal_totals, weed_occurrence_serious_totals #global soil_severity_totals, weed_severity_totals #severity_totals = {} occurrence_totals = {} occurrence_minimal_totals = {} occurrence_serious_totals = {} OID_list = verify_OID.values() #OID_list.remove('2513') data_directory = "C:\\DataFiles\\OSUSeedCert\\SpatialStats\\Soils_summary\\" #severity_filename = data_directory+"weed_severity_by_soil_"+group+"_results.csv" occurrence_filename = data_directory+"weed_occurrence_by_soil_"+group+"_results.csv" occurrence_minimal_filename = data_directory+"weed_occurrence_minimal_by_soil_"+group+"_results.csv" occurrence_serious_filename = data_directory+"weed_occurrence_serious_by_soil_"+group+"_results.csv" #severity_file = open(severity_filename,"w") occurrence_file = open(occurrence_filename,"w") occurrence_minimal_file = open(occurrence_minimal_filename,"w") occurrence_serious_file = open(occurrence_serious_filename,"w") params = "soil_name, " for weed_numbers in range(1,weeds_to_do): params += weed_name[weed_numbers]+", " params += "soil_counts\n" #severity_file.write(params) occurrence_file.write(params) occurrence_minimal_file.write(params) occurrence_serious_file.write(params) #severity_file.flush() occurrence_file.flush() occurrence_minimal_file.flush() occurrence_serious_file.flush() for soil_numbers in range(1,soils_to_do): #severity = {} occurrence = {} occurrence_minimal = {} occurrence_serious = {} #params_severity = soil_name[soil_numbers]+", " params_occurrence = soil_name[soil_numbers]+", " params_occurrence_minimal = soil_name[soil_numbers]+", " params_occurrence_serious = soil_name[soil_numbers]+", " #soil_severity_total = 0 for weed_numbers in range(1,weeds_to_do): #weed_severity_total = 0 weed_occurrence_total = weed_occurrence_minimal_total = weed_occurrence_serious_total = 0 for OID_cases in OID_list: weed_index = (OID_cases,weed_numbers) #print weed_index soil_index = (OID_cases,soil_numbers) #print soil_index this_severity = weed_data[weed_index] this_occurrence = (this_severity > 0) this_occurrence_minimal = (this_severity == 1) this_occurrence_serious = (this_severity > 1) #weed_severity_total += soil_data[soil_index] * this_severity weed_occurrence_total += soil_data[soil_index] * this_occurrence weed_occurrence_minimal_total += soil_data[soil_index] * this_occurrence_minimal weed_occurrence_serious_total += soil_data[soil_index] * this_occurrence_serious #soil_severity_total += weed_severity_total #severity[weed_numbers] = weed_severity_total #severity[weed_numbers] = weed_occurrence_total occurrence[weed_numbers] = weed_occurrence_total occurrence_minimal[weed_numbers] = weed_occurrence_minimal_total occurrence_serious[weed_numbers] = weed_occurrence_serious_total #params_severity += str(severity[weed_numbers])+", " params_occurrence += str(occurrence[weed_numbers])+", " params_occurrence_minimal += str(occurrence_minimal[weed_numbers])+", " params_occurrence_serious += str(occurrence_serious[weed_numbers])+", " #soil_severity_totals[soil_numbers] = soil_severity_total #params_severity += str(soil_severity_totals[soil_numbers])+"\n" params_occurrence += str(soil_occurrence_totals[soil_numbers])+"\n" # this part works OK params_occurrence_minimal += str(soil_occurrence_totals[soil_numbers])+"\n" # this part works OK params_occurrence_serious += str(soil_occurrence_totals[soil_numbers])+"\n" # this part works OK #severity_file.write(params_severity) occurrence_file.write(params_occurrence) occurrence_minimal_file.write(params_occurrence_minimal) occurrence_serious_file.write(params_occurrence_serious) #severity_file.flush() occurrence_file.flush() occurrence_minimal_file.flush() occurrence_serious_file.flush() #params_severity = params_occurrence = params_occurrence_minimal = params_occurrence_serious = " , " for weed_numbers in range(1,weeds_to_do): #params_severity += str(weed_severity_totals[weed_numbers])+", " params_occurrence += str(weed_occurrence_totals[weed_numbers])+", " params_occurrence_minimal += str(weed_occurrence_minimal_totals[weed_numbers])+", " params_occurrence_serious += str(weed_occurrence_serious_totals[weed_numbers])+", " #params_severity += " , \n" params_occurrence += " , \n" params_occurrence_minimal += " , \n" params_occurrence_serious += " , \n" #severity_file.write(params_severity) occurrence_file.write(params_occurrence) occurrence_minimal_file.write(params_occurrence_minimal) occurrence_serious_file.write(params_occurrence_serious) #severity_file.flush() occurrence_file.flush() occurrence_minimal_file.flush() occurrence_serious_file.flush() #severity_file.close() occurrence_file.close() occurrence_minimal_file.close() occurrence_serious_file.close() return def pick_group(i): group_list = ("gt21percent","gt10percent","gt5percent","any_amount","majority") return group_list[i] # main program section # manually set soil group to one of five cases #soil_group = "gt21percent" soil_group = "gt10percent" soil_group = "gt5percent" soil_group = "any_amount" soil_group = "majority" #soil_group = pick_group(4) # run this with parameters = 0,1,2,3,4 soil_group = "gt21percent_rev" soil_name = {} # index is 1-based rather than 0-based weed_name = {} # index is 1-based rather than 0-based OID = {} verify_OID = {} soil_data = {} weed_data = {} soil_occurrence_totals = {} # index is 1-based rather than 0-based weed_occurrence_totals = {} # index is 1-based rather than 0-based weed_occurrence_minimal_totals = {} # index is 1-based rather than 0-based weed_occurrence_serious_totals = {} # index is 1-based rather than 0-based #weed_severity_totals = {} # index is 1-based rather than 0-based #soil_severity_totals = {} # index is 1-based rather than 0-based #OID_counting = 0 save_line = "initial condition" print "\nstarting main program" soils_count,record_count = get_soil_data(soil_group) weed_count,weed_record_count = get_weed_data() print soils_count print record_count print weed_count print weed_record_count save_soil_by_weed_data(weed_count,soils_count,soil_group) print "all done now" #print soil_data