#Evaluate predictive performance of predicter.py

#Percentvise correct predections of each type
import string, math, os

#path = raw_input('State working directorey: ')
#path = 'D:\DTU-studier\Master speciale\myShare\test\Ny mappe (2)'

predictions = open('predictions.txt').readlines()
output = open('predictive_evaluations.txt','w')

N_list = [1,0,0,0]
for line in predictions:
    line = line.replace('"','')
    if line.startswith('Hyp') == True:
        N_list[0] += 1
    elif line.startswith('Ther') == True:
        N_list[1] += 1
    elif line.startswith('Meso') == True:
        N_list[2] += 1
    elif line.startswith('Psy') == True:
        N_list[3] += 1
    
correct_list = [0,0,0,0]
typelist = ['Hyp_','Ther_','Meso_','Psy_']

for line in predictions:
    words = string.split(line)
    if words[0].count(words[1]) > 0:
        correct_list[typelist.index(words[1])] += 1

print correct_list
print N_list

count = 0
print '\n'
for i in correct_list:
    print float(i)/N_list[count]
    output.write(str(float(i)/N_list[count]))
    output.write('\n')
    count += 1
output.write('\n')
print '\n', '\n'

#Mathews correlation coefficient
TP_list = [0,0,0,0]
FP_list = [0,0,0,0]
TN_list = [0,0,0,0]
FN_list = [0,0,0,0]
for line in predictions: #Caluculate the TP, FP, TN and TN values of each type
    line = line.replace('"','')
    words = line.split()
    for typ in typelist:
        if words[0].startswith(typ) == True and words[1].startswith(typ) == True: # true positive
            TP_list[typelist.index(typ)] += 1
        elif words[0].startswith(typ) == False and words[1].startswith(typ) == True: # false positive
            FP_list[typelist.index(typ)] += 1
        elif words[0].startswith(typ) == False and words[1].startswith(typ) == False: # true negtive
            TN_list[typelist.index(typ)] += 1
        elif words[0].startswith(typ) == True and words[1].startswith(typ) == False: # false negative
            FN_list[typelist.index(typ)] += 1


#Do the Mathews evaluation
MCC_list = [0,0,0,0]
for typ in typelist:
    TP = TP_list[typelist.index(typ)]
    FP = FP_list[typelist.index(typ)]
    TN = TN_list[typelist.index(typ)]
    FN = FN_list[typelist.index(typ)]

    if (TP+FP)*(TP+FN)*(TN+FP)*(TN+FN) == 0:
        print 'ZERO OVERHERE!'
        MCC = (TP*TN - FP*FN)/1
    else:
        MCC = (TP*TN - FP*FN)/math.sqrt( (TP+FP)*(TP+FN)*(TN+FP)*(TN+FN) )
    MCC_list[typelist.index(typ)] += MCC

print 'Mathews correlation coefficients'
for typ in typelist:
    print typ, MCC_list[typelist.index(typ)]
    output.write(typ)
    output.write('\t')
    output.write(str(MCC_list[typelist.index(typ)]))
    output.write('\n')

#end = input('')
os.startfile('predictive_evaluations.txt')
output.close()
