# Using digits to classify images in the test data set

# Copyright (c) 2017, zhangys@zjgsu.edu.cn

# requires NVidia Digits to be installed

DICT = {}
DICT["VGG-16_transfer_learning"] = "20170719-171742-5c31"
DICT["VGG-16_from_scratch"] = '20170722-143332-f077'
DICT["GoogLeNet_from_scratch"] = '20170722-113606-c59c'
DICT["GoogLeNet_transfer_learning"] = '20170722-190454-1af1' # '20170719-185942-bf65'
DICT["AlexNet_transfer_learning"] = '20170803-121318-b153'#'20170802-164043-96f9'# '20170722-205930-3ac4' # '20170720-094939-b665'
DICT["AlexNet_from_scratch"]= '20170802-163356-7f3d'#'20170802-162531-3531' #'20170722-152531-da8d'

def GetDictKeys():
     return DICT.keys()

FILE_LIST="/home/zys/Desktop/test2/data/FileList.txt"
LABEL_LIST="/home/zys/Desktop/test2/data/LabelList.txt"

JOBS_DIR = "/home/zys/DIGITS-master/digits/jobs"  # "/var/lib/digits/jobs/"


import os
import sys

caffe_root = os.environ['CAFFE_ROOT'] # '/home/zys/nv-caffe-0.15' #  # When use sudo, the environ var may be different than the current terminal 
sys.path.insert(0,caffe_root+'python')

import numpy
import pandas
from sklearn.metrics import roc_curve, auc, precision_recall_curve
from pandas_ml import ConfusionMatrix
import matplotlib.pyplot as plt
import shutil
import inspect

# Add path for DIGITS package
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
import digits.config
from digits.inference.errors import InferenceError
from digits.job import Job
from digits import utils

# To solve error "Check failed: error == cudaSuccess (10 vs. 0)  invalid device ordinal"
import caffe
print (caffe.__file__)
caffe.set_device(0)

"""
Perform inference on a list of images using the specified model
"""
def nv_digits_infer(input_list,
          output_dir,
          jobs_dir,
          model_id,
          epoch,
          batch_size,
          layers,
          gpu):
    """
    Perform inference on a list of images using the specified model
    """
    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load model job
    model_dir = os.path.join(jobs_dir, model_id)
    assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
    model = Job.load(model_dir)

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, model.dataset_id)
    assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir
    dataset = Job.load(dataset_dir)
    for task in model.tasks:
        task.dataset = dataset

    # retrieve snapshot file
    task = model.train_task()
    snapshot_filename = None
    epoch = float(epoch)
    if epoch == -1 and len(task.snapshots):
        # use last epoch
        epoch = task.snapshots[-1][1]
        snapshot_filename = task.snapshots[-1][0]
    else:
        for f, e in task.snapshots:
            if e == epoch:
                snapshot_filename = f
                break
    if not snapshot_filename:
        raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch))

    # retrieve image dimensions and resize mode
    image_dims = dataset.get_feature_dims()
    height = image_dims[0]
    width = image_dims[1]
    channels = image_dims[2]
    resize_mode = dataset.resize_mode if hasattr(dataset, 'resize_mode') else 'squash'

    n_input_samples = 0  # number of samples we were able to load
    input_ids = []       # indices of samples within file list
    input_data = []      # sample data

    
    # load paths from file
    paths = None
    with open(input_list) as infile:
        paths = infile.readlines()
    # load and resize images
    for idx, path in enumerate(paths):
        path = path.strip()
        try:
            image = utils.image.load_image(path.strip())            
            image = utils.image.resize_image(
                image,
                height,
                width,
                channels=channels,
                resize_mode=resize_mode)
           
            input_ids.append(idx)
            input_data.append(image)
            n_input_samples = n_input_samples + 1
        except utils.errors.LoadImageError as e:
            print e

    # perform inference

    if layers != 'none':
        raise InferenceError("Layer visualization is not supported for multiple inference")
    outputs = model.train_task().infer_many(
        input_data,
        snapshot_epoch=epoch,
        gpu=gpu,
        resize=True)
    
    return outputs["softmax"]

def nv_digits_infer_one_round(threshold, softmax1, softmax2):
        
    predicts1 = []
    for idx, probs in enumerate(softmax1):      
        # print paths[idx], probs[0], probs[1], probs[2]

        prediction = False
        if(probs[0]>threshold):
            prediction = True
        # predictions.append(probs.argmax())        
        predicts1.append(prediction)               
            		  
    predicts2 = []
    for idx, probs in enumerate(softmax2):      
        # print paths[idx], probs[0], probs[1], probs[2]

        prediction = False
        if(probs[0]>threshold):
            prediction = True
        # predictions.append(probs.argmax())        
        predicts2.append(prediction)        
    
    P= len(predicts1)
    N= len(predicts2)    
           
    tp = predicts1.count(True) 
    fn = predicts1.count(False)
    tn = predicts2.count(False)
    fp = predicts2.count(True)
    
    precision = 1.0*tp/(tp+fp) #ppv
    recall = 1.0*tp/(tp+fn)
    
    f1 = 2.0*precision*recall/(precision+recall)
       
    accuracy1 = (predicts1.count(0) * 1.0 / P)
    accuracy2 = (predicts2.count(1) * 1.0 / N)
    accuracy = (tp + tn)*1.0/(P+N)
    
    sensativity = 1.0*tp/(tp+fn) #tpr, recall
    specificity = 1.0*tn/(fp+tn) #tnr
    fpr = 1.0*fp/N # fp/(fp+tn) = 1.0-specificity
    fpr = 1.0*fp/(fp+tn)
    fpr = 1-specificity
    tpr = sensativity
    recall = sensativity
    
    print P, N, threshold, accuracy1, accuracy2, accuracy, tp, tn, fp, fn, precision, recall, f1, sensativity, specificity, fpr
    return tpr, fpr

def nv_digits_infer_test_model(model):    

    print ('####### ' + model + ' #########')

    labels = []
    with open(LABEL_LIST) as f:
        for line in f:
            if('0' in line):
                labels.append(True)
            elif('1' in line):
                labels.append(False)
            else:
                print "Unrecognized label!"

    DIGITS_JOB_ID = DICT[model]
    softmax = nv_digits_infer(FILE_LIST, "/home/zys/data/C3R/test/tmp/",  JOBS_DIR,  DIGITS_JOB_ID,  -1,  1,  'none',  0)

    lines=[]    

    probs = []
    for idx, p in enumerate(softmax):      
        probs.append(p[0])
        lines.append(str(p[0])+" "+str(p[1])+" "+str(p[2]))
            
    with open(model+'_softmax_probs.txt', 'w') as the_file:
        for line in lines:
            the_file.write(line+'\n')
            
    nv_digits_infer_test_model_analyze_result(probs, labels, model)
    

def nv_digits_infer_test_model2(fname1, fname2, model):
    
    probs=[]
    
    with open(fname1) as f:
        for line in f:
            fields = line.split()
            if(len(fields) == 8):
                ind = fields.index("Disease")
                
                v = float(fields[ind+1].strip('%'))/100
                probs.append(v)

    labels = []
    with open(fname2) as f:
        for line in f:
            if('0' in line):
                labels.append(True)
            elif('1' in line):
                labels.append(False)
            else:
                print "Unrecognized label!"        
        
    nv_digits_infer_test_model_analyze_result(probs, labels, model)       
    

def nv_digits_infer_test_model_analyze_result(probs, labels, model):
            
    
    # fprs = []
    # tprs = []    
    # #for threshold in range(1,1000):
    #    tpr, fpr = nv_digits_infer_one_round(threshold*0.001, softmax1, softmax2)
    #    fprs.append(fpr)
    #    tprs.append(tpr)

    y_actu = labels
    y_pred = probs

    #### CONFUSINO MATRIX ####    
    #cm = ConfusionMatrix(y_actu, y_pred)
    #print cm
    #cm.print_stats()
    
    #### ROC Curve #####
    fpr, tpr, thresholds = roc_curve(y_actu, y_pred, pos_label=True)    
    
    # The Youden index J (Youden, 1950) is defined as:   
    # J = max { sensitivityc + specificityc - 1 }
    # where c ranges over all possible criterion values.
    # Graphically, J is the maximum vertical distance between the ROC curve and the diagonal line.
    # The criterion value corresponding with the Youden index J is the optimal criterion value only when disease prevalence is 50%, equal weight is given to sensitivity and specificity, and costs of various decisions are ignored.

    youden =  tpr - fpr # = tpr + (1-fpr) -1 = tpr - fpr  # Sensitivity + Specificity - 1
    best_ind =  youden.argmax()
    best_threshold = thresholds[best_ind]

    orig_stdout = sys.stdout
    f = open(model + '.txt', 'w')
    sys.stdout = f
    
    print 'ROC curve optimal cutoff ', model, ' = ', best_threshold    
    
    # Find prediction to the dataframe applying threshold
    y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > best_threshold else False)
    cm = ConfusionMatrix(y_actu, y_pred_labels)
    # print cm
    cm.print_stats()
    
    # Get FP and FN samples      
    cwd = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
    directoryFP = cwd + '/' + model+'_FP'
    if os.path.exists(directoryFP):
        shutil.rmtree(directoryFP)
    os.mkdir(directoryFP)
    
    directoryFN = cwd + '/' +  model+'_FN'
    if os.path.exists(directoryFN):
        shutil.rmtree(directoryFN)
    os.mkdir(directoryFN)
    
    i=0
    with open(FILE_LIST) as f:
        for line in f:
            line = line.strip()
            pred_label = y_pred_labels[i]
            actu_label = y_actu[i]
            basename = os.path.basename(line)            
            new_file_name="{0:.3f}".format(y_pred[i]) + "_" +basename
            if(actu_label == True and pred_label == False):                
                shutil.copyfile(line, directoryFN+"/"+ new_file_name)
            if(actu_label == False and pred_label == True):                
                shutil.copyfile(line, directoryFP+"/"+ new_file_name)
            i+=1

    roc_auc = auc(fpr, tpr)
    print 'ROC AUC = ', roc_auc
    
    # Use precision recall curve for skewed / unbalanced data
    precision, recall, pr_thresholds = precision_recall_curve(y_actu, y_pred, pos_label=True)
    f1 = 2.0*precision*recall/(precision+recall)
    pr_best_ind = f1.argmax()
    pr_best_threshold = pr_thresholds[pr_best_ind]
    print 'precision_recall_curve optimal cutoff ', model, ' = ', pr_best_threshold
    
    sys.stdout = orig_stdout
    f.close()

    plt.figure()
    plt.plot(fpr, tpr, lw=1, label='ROC (area = %0.3f)' % (roc_auc))
    plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')
    plt.plot(fpr[best_ind], tpr[best_ind], 'ro', label='optimal cutoff') # (probability = %0.3f)' % (best_threshold))
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve (' + model + ') ' )
    plt.legend(loc="lower right")
    plt.savefig(model + "_roc_curve.png")
    #plt.show()
    plt.close()
    
    # pr_auc=auc(precision, recall)  
    plt.figure()
    plt.plot(recall, precision, lw=1, label='P-R Curve')
    # plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')
    # plt.plot(recall[pr_best_ind], precision[pr_best_ind], 'ro') #, label='best threshold (%0.3f)' % (best_threshold))
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.ylabel('Precision')
    plt.xlabel('Recall')
    plt.title('Precision-Recall Curve (' + model + ') ' )
    plt.legend(loc="lower right")
    plt.savefig(model + "_pr_curve.png")
    #plt.show()
    plt.close()
       

def nv_digits_infer_test_all_models():
    i =0
    for key in DICT:
        plt.figure(i)        
        i += 1
        nv_digits_infer_test_model(key)
            
# nv_digits_infer_test_all_models()

# special treatment for GoogLeNet_transfer_learning. It requires nv-caffe-0.15
# nv_digits_infer_test_model2("/home/zys/Desktop/scripts/GoogLeNet_ResultsFromDigits.txt", LABEL_LIST, "GoogLeNet_Transfer_Learning")

# nv_digits_infer_test_model2("AlexNet_T1.txt","AlexNet_T2.txt", "AlexNet")

# nv_digits_infer_test_model(DICT.keys()[int(sys.argv[1])])

# nv_digits_infer_test_model2("/home/zys/Desktop/scripts/GoogLeNet_ResultsFromDigits.txt", "/home/zys/Desktop/test2/data/LabelList.txt", "GoogLeNet_Transfer_Learning")