# soil_chi_square
# Import system modules
import sys, string, os

def get_soil_data(which_group):
    # creates dictionaries of soil_name, OID, and presence or absence of a soil within a single polygon
    global soil_name, OID, soil_data, soil_occurrence_totals # OID_counting
    comma = {}
    soil_directory = "C:\\DataFiles\\OSUSeedCert\\SpatialStats\\Soils_summary\\soils_"
    soil_data_filename = soil_directory+which_group+"_data.csv"
    soil_data_file = open(soil_data_filename,"r")
    file_line = soil_data_file.readline()
    comma_pos = file_line.find(",")
    comma[0] = comma_pos
    comma_count = soil_count = soil_loop_max = OID_counting = 0
    #OID_counting = 0
    #print file_line
    if file_line[:comma_pos] == "IDpossible": # process first line of file to get all the soil names
        print "got to here - line 19"
        while comma_pos != -1:
            comma_pos = file_line.find(",", comma[comma_count] + 1)
            soil_count += 1
            comma_count += 1
            comma[comma_count] = comma_pos
            soil_name[soil_count] = file_line[comma[soil_count - 1] + 1:comma[soil_count]] # soil_name is 1 based rather than 0 based vector
            if soil_name[soil_count] == "All_soils":
                soil_loop_max = soil_count
        # next step is to load in all the soil occurrence by OID number data
        process_line = soil_data_file.readline() # get second line of file
        more_OID = (process_line != "\n") 
        while more_OID == 1:
            OID_counting += 1 # this will put OID into a 1 based rather than 0 based index
            comma = {}
            comma_pos = process_line.find(",")
            comma[0] = comma_pos
            comma_count = 0
            OID[OID_counting] = process_line[:comma[0]]
            for soil_count in range(1, soil_loop_max): # process data for each soil within each single line from the data file
                comma_pos = process_line.find(",",comma[soil_count - 1] + 1)
                comma[soil_count] = comma_pos
                index = (OID[OID_counting],soil_count)
                soil_data[index] = int(process_line[comma[soil_count - 1] + 1:comma[soil_count]])
                old_value = soil_occurrence_totals.get(soil_count,0) # not sure if this works if written all in a single expression
                soil_occurrence_totals[soil_count] = old_value + soil_data[index]           
            process_line = soil_data_file.readline()
            more_OID = (process_line != "\n")
    else:
        print "possible error in datafile. Contents of cell A1 should be \"IDpossible\""      
    soil_data_file.close()
    highest_number = max(OID.items())
    return soil_loop_max, highest_number

def get_weed_data():
    # uses OID already created
    global weed_name, OID, verify_OID, weed_data, weed_occurrence_totals, weed_occurrence_minimal_totals, weed_occurrence_serious_totals # OID_counting
    #global weed_severity_totals
    comma = {}
    soil_directory = "C:\\DataFiles\\OSUSeedCert\\SpatialStats\\Soils_summary\\"
    weed_data_filename = soil_directory+"weed_rev_data.csv"
    weed_data_file = open(weed_data_filename,"r")
    file_line = weed_data_file.readline()
    comma_pos = file_line.find(",")
    comma[0] = comma_pos
    comma_count = weed_count = weed_loop_max = OID_counting = 0
    #print file_line
    #print "how is this? "+file_line[:comma_pos]+" as my column title for comma_pos = "+str(comma_pos)
    if file_line[:comma_pos] == "FID_":
        print "got to here OK - line 69"
        while weed_loop_max == 0:
            comma_pos = file_line.find(",", comma[comma_count] + 1)
            #weed_count = 0
            comma_count += 1
            comma[comma_count] = comma_pos
            if comma_count > 3:
                weed_count = comma_count - 3
                weed_name[weed_count] = file_line[comma[comma_count - 1] + 1:comma[comma_count]] # weed_name is 1 based rather than 0 based vector
                if weed_name[weed_count] == "wPOATR" :
                    weed_loop_max = weed_count + 1 # returns value that can be used as stop value in Python range loops
            elif comma_count == 2:
                # test for OBJECTID in OID
                test_ID = file_line[comma[0] + 1:comma[1]]
                if not test_ID == "OBJECTID":
                    print "warning, something is rotten in Denmark!"
                    print file_line
            else:
                pass # ignore columns 1,3,4
           
        # next step is to load in all the soil occurrence by OID number data
        process_line = weed_data_file.readline()
        #print process_line
        more_OID = (process_line != "\n")
        #more_OID = 0
        while more_OID == 1: # looping thru all the records in file
            OID_counting += 1 # this will put OID into a 1 based rather than 0 based index
            comma = {}
            comma_pos = process_line.find(",")
            comma[0] = comma_pos
            comma_count = 0
            comma_pos = process_line.find(",",comma[0] + 1)
            comma[1] = comma_pos
            comma_count += 1
            verify_OID[OID_counting] = process_line[comma[0] + 1:comma[1]]
            if not verify_OID[OID_counting] in OID.values():
                print "warning, something is rotten in Denmark!"
                print "weed_data_OID = "+verify_OID[OID_counting]+" while possible OID values from soils data are :"
                print OID.keys()
            comma_pos = process_line.find(",",comma[1] + 1)
            comma[2] = comma_pos
            comma_count += 1 # ignoring values in SHAPE_Leng
            comma_pos = process_line.find(",",comma[2] + 1)
            comma[3] = comma_pos
            comma_count += 1 # ignoring values in SHAPE_Area
            for weed_count in range(1, weed_loop_max):
                comma_count = weed_count + 2
                comma_pos = process_line.find(",",comma[comma_count] + 1)
                comma[comma_count + 1] = comma_pos
                index = (verify_OID[OID_counting],weed_count)
                weed_data[index] = int(process_line[comma[comma_count] + 1:comma[comma_count + 1]])
                #old_severity = weed_severity_totals.get(weed_count,0)
                old_occurrence = weed_occurrence_totals.get(weed_count,0)
                old_occurrence_minimal = weed_occurrence_minimal_totals.get(weed_count,0)
                old_occurrence_serious = weed_occurrence_serious_totals.get(weed_count,0)
                #weed_severity_totals[weed_count] = old_severity + weed_data[index]
                this_weed_present = (weed_data[index] > 0)
                this_weed_minimal = (weed_data[index] == 1)
                this_weed_serious = (weed_data[index] > 1)
                weed_occurrence_totals[weed_count] = old_occurrence + this_weed_present
                weed_occurrence_minimal_totals[weed_count] = old_occurrence_minimal + this_weed_minimal
                weed_occurrence_serious_totals[weed_count] = old_occurrence_serious + this_weed_serious
            process_line = weed_data_file.readline()
            more_OID = (process_line != "\n")
            #if OID_counting > 5:
            #    more_OID = 0           
    else:
        print "possible error in datafile. Contents of cell A1 should be \"FID_\"" 
    weed_data_file.close()
    highest_number = max(verify_OID.items())
    return weed_loop_max, highest_number # OID_counting

def save_soil_by_weed_data(weeds_to_do,soils_to_do,group):
    global soil_name, weed_name, verify_OID, soil_data, weed_data, soil_occurrence_totals, weed_occurrence_totals, weed_occurrence_minimal_totals, weed_occurrence_serious_totals
    #global soil_severity_totals, weed_severity_totals 
    #severity_totals = {}
    occurrence_totals = {}
    occurrence_minimal_totals = {}
    occurrence_serious_totals = {}
    OID_list = verify_OID.values()
    #OID_list.remove('2513')
    data_directory = "C:\\DataFiles\\OSUSeedCert\\SpatialStats\\Soils_summary\\"
    #severity_filename = data_directory+"weed_severity_by_soil_"+group+"_results.csv"
    occurrence_filename = data_directory+"weed_occurrence_by_soil_"+group+"_results.csv"
    occurrence_minimal_filename = data_directory+"weed_occurrence_minimal_by_soil_"+group+"_results.csv"
    occurrence_serious_filename = data_directory+"weed_occurrence_serious_by_soil_"+group+"_results.csv"
    #severity_file = open(severity_filename,"w")
    occurrence_file = open(occurrence_filename,"w")
    occurrence_minimal_file = open(occurrence_minimal_filename,"w")
    occurrence_serious_file = open(occurrence_serious_filename,"w")
    params = "soil_name, "
    for weed_numbers in range(1,weeds_to_do):
        params += weed_name[weed_numbers]+", "
    params += "soil_counts\n"    
    #severity_file.write(params)
    occurrence_file.write(params)
    occurrence_minimal_file.write(params)
    occurrence_serious_file.write(params)
    #severity_file.flush()
    occurrence_file.flush()
    occurrence_minimal_file.flush()
    occurrence_serious_file.flush() 
    for soil_numbers in range(1,soils_to_do):
        #severity = {}
        occurrence = {}
        occurrence_minimal = {}
        occurrence_serious = {}
        #params_severity = soil_name[soil_numbers]+", "
        params_occurrence = soil_name[soil_numbers]+", "        
        params_occurrence_minimal = soil_name[soil_numbers]+", "
        params_occurrence_serious = soil_name[soil_numbers]+", "
        #soil_severity_total = 0
        for weed_numbers in range(1,weeds_to_do):
            #weed_severity_total = 0
            weed_occurrence_total = weed_occurrence_minimal_total = weed_occurrence_serious_total = 0
            for OID_cases in OID_list:
                weed_index = (OID_cases,weed_numbers)
                #print weed_index
                soil_index = (OID_cases,soil_numbers)
                #print soil_index
                this_severity = weed_data[weed_index]
                this_occurrence = (this_severity > 0)
                this_occurrence_minimal = (this_severity == 1)
                this_occurrence_serious = (this_severity > 1)
                #weed_severity_total += soil_data[soil_index] * this_severity
                weed_occurrence_total += soil_data[soil_index] * this_occurrence
                weed_occurrence_minimal_total += soil_data[soil_index] * this_occurrence_minimal
                weed_occurrence_serious_total += soil_data[soil_index] * this_occurrence_serious
            #soil_severity_total += weed_severity_total    
            #severity[weed_numbers] = weed_severity_total
            #severity[weed_numbers] = weed_occurrence_total
            occurrence[weed_numbers] = weed_occurrence_total
            occurrence_minimal[weed_numbers] = weed_occurrence_minimal_total
            occurrence_serious[weed_numbers] = weed_occurrence_serious_total
            #params_severity += str(severity[weed_numbers])+", "
            params_occurrence += str(occurrence[weed_numbers])+", "
            params_occurrence_minimal += str(occurrence_minimal[weed_numbers])+", "
            params_occurrence_serious += str(occurrence_serious[weed_numbers])+", "
        #soil_severity_totals[soil_numbers] = soil_severity_total    
        #params_severity += str(soil_severity_totals[soil_numbers])+"\n"
        params_occurrence += str(soil_occurrence_totals[soil_numbers])+"\n" # this part works OK
        params_occurrence_minimal += str(soil_occurrence_totals[soil_numbers])+"\n" # this part works OK
        params_occurrence_serious += str(soil_occurrence_totals[soil_numbers])+"\n" # this part works OK
        #severity_file.write(params_severity)
        occurrence_file.write(params_occurrence)
        occurrence_minimal_file.write(params_occurrence_minimal)
        occurrence_serious_file.write(params_occurrence_serious) 
        #severity_file.flush()        
        occurrence_file.flush()
        occurrence_minimal_file.flush()
        occurrence_serious_file.flush()
    #params_severity =
    params_occurrence = params_occurrence_minimal = params_occurrence_serious = " , "    
    for weed_numbers in range(1,weeds_to_do):
        #params_severity += str(weed_severity_totals[weed_numbers])+", "
        params_occurrence += str(weed_occurrence_totals[weed_numbers])+", "
        params_occurrence_minimal += str(weed_occurrence_minimal_totals[weed_numbers])+", "
        params_occurrence_serious += str(weed_occurrence_serious_totals[weed_numbers])+", "
    #params_severity += " , \n"
    params_occurrence += " , \n"
    params_occurrence_minimal += " , \n"
    params_occurrence_serious += " , \n"
    #severity_file.write(params_severity)
    occurrence_file.write(params_occurrence)
    occurrence_minimal_file.write(params_occurrence_minimal)
    occurrence_serious_file.write(params_occurrence_serious)
    #severity_file.flush()        
    occurrence_file.flush()
    occurrence_minimal_file.flush()
    occurrence_serious_file.flush()
    #severity_file.close()
    occurrence_file.close()
    occurrence_minimal_file.close()
    occurrence_serious_file.close()
    return

def pick_group(i):
    group_list = ("gt21percent","gt10percent","gt5percent","any_amount","majority")
    return group_list[i]

# main program section
# manually set soil group to one of five cases 
#soil_group = "gt21percent" soil_group = "gt10percent" soil_group = "gt5percent" soil_group = "any_amount" soil_group = "majority"
#soil_group = pick_group(4) # run this with parameters = 0,1,2,3,4
soil_group = "gt21percent_rev"
soil_name = {} # index is 1-based rather than 0-based
weed_name = {} # index is 1-based rather than 0-based
OID = {}
verify_OID = {}
soil_data = {}
weed_data = {}
soil_occurrence_totals = {} # index is 1-based rather than 0-based
weed_occurrence_totals = {} # index is 1-based rather than 0-based
weed_occurrence_minimal_totals = {} # index is 1-based rather than 0-based
weed_occurrence_serious_totals = {} # index is 1-based rather than 0-based
#weed_severity_totals = {} # index is 1-based rather than 0-based
#soil_severity_totals = {} # index is 1-based rather than 0-based

#OID_counting = 0
save_line = "initial condition"
print "\nstarting main program"
soils_count,record_count = get_soil_data(soil_group)
weed_count,weed_record_count = get_weed_data()
print soils_count
print record_count
print weed_count
print weed_record_count
save_soil_by_weed_data(weed_count,soils_count,soil_group)
print "all done now"

#print soil_data