# Using digits to classify images in the test data set # Copyright (c) 2017, zhangys@zjgsu.edu.cn # requires NVidia Digits to be installed DICT = {} DICT["VGG-16_transfer_learning"] = "20170719-171742-5c31" DICT["VGG-16_from_scratch"] = '20170722-143332-f077' DICT["GoogLeNet_from_scratch"] = '20170722-113606-c59c' DICT["GoogLeNet_transfer_learning"] = '20170722-190454-1af1' # '20170719-185942-bf65' DICT["AlexNet_transfer_learning"] = '20170803-121318-b153'#'20170802-164043-96f9'# '20170722-205930-3ac4' # '20170720-094939-b665' DICT["AlexNet_from_scratch"]= '20170802-163356-7f3d'#'20170802-162531-3531' #'20170722-152531-da8d' def GetDictKeys(): return DICT.keys() FILE_LIST="/home/zys/Desktop/test2/data/FileList.txt" LABEL_LIST="/home/zys/Desktop/test2/data/LabelList.txt" JOBS_DIR = "/home/zys/DIGITS-master/digits/jobs" # "/var/lib/digits/jobs/" import os import sys caffe_root = os.environ['CAFFE_ROOT'] # '/home/zys/nv-caffe-0.15' # # When use sudo, the environ var may be different than the current terminal sys.path.insert(0,caffe_root+'python') import numpy import pandas from sklearn.metrics import roc_curve, auc, precision_recall_curve from pandas_ml import ConfusionMatrix import matplotlib.pyplot as plt import shutil import inspect # Add path for DIGITS package sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) import digits.config from digits.inference.errors import InferenceError from digits.job import Job from digits import utils # To solve error "Check failed: error == cudaSuccess (10 vs. 0) invalid device ordinal" import caffe print (caffe.__file__) caffe.set_device(0) """ Perform inference on a list of images using the specified model """ def nv_digits_infer(input_list, output_dir, jobs_dir, model_id, epoch, batch_size, layers, gpu): """ Perform inference on a list of images using the specified model """ # job directory defaults to that defined in DIGITS config if jobs_dir == 'none': jobs_dir = digits.config.config_value('jobs_dir') # load model job model_dir = os.path.join(jobs_dir, model_id) assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir model = Job.load(model_dir) # load dataset job dataset_dir = os.path.join(jobs_dir, model.dataset_id) assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir dataset = Job.load(dataset_dir) for task in model.tasks: task.dataset = dataset # retrieve snapshot file task = model.train_task() snapshot_filename = None epoch = float(epoch) if epoch == -1 and len(task.snapshots): # use last epoch epoch = task.snapshots[-1][1] snapshot_filename = task.snapshots[-1][0] else: for f, e in task.snapshots: if e == epoch: snapshot_filename = f break if not snapshot_filename: raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch)) # retrieve image dimensions and resize mode image_dims = dataset.get_feature_dims() height = image_dims[0] width = image_dims[1] channels = image_dims[2] resize_mode = dataset.resize_mode if hasattr(dataset, 'resize_mode') else 'squash' n_input_samples = 0 # number of samples we were able to load input_ids = [] # indices of samples within file list input_data = [] # sample data # load paths from file paths = None with open(input_list) as infile: paths = infile.readlines() # load and resize images for idx, path in enumerate(paths): path = path.strip() try: image = utils.image.load_image(path.strip()) image = utils.image.resize_image( image, height, width, channels=channels, resize_mode=resize_mode) input_ids.append(idx) input_data.append(image) n_input_samples = n_input_samples + 1 except utils.errors.LoadImageError as e: print e # perform inference if layers != 'none': raise InferenceError("Layer visualization is not supported for multiple inference") outputs = model.train_task().infer_many( input_data, snapshot_epoch=epoch, gpu=gpu, resize=True) return outputs["softmax"] def nv_digits_infer_one_round(threshold, softmax1, softmax2): predicts1 = [] for idx, probs in enumerate(softmax1): # print paths[idx], probs[0], probs[1], probs[2] prediction = False if(probs[0]>threshold): prediction = True # predictions.append(probs.argmax()) predicts1.append(prediction) predicts2 = [] for idx, probs in enumerate(softmax2): # print paths[idx], probs[0], probs[1], probs[2] prediction = False if(probs[0]>threshold): prediction = True # predictions.append(probs.argmax()) predicts2.append(prediction) P= len(predicts1) N= len(predicts2) tp = predicts1.count(True) fn = predicts1.count(False) tn = predicts2.count(False) fp = predicts2.count(True) precision = 1.0*tp/(tp+fp) #ppv recall = 1.0*tp/(tp+fn) f1 = 2.0*precision*recall/(precision+recall) accuracy1 = (predicts1.count(0) * 1.0 / P) accuracy2 = (predicts2.count(1) * 1.0 / N) accuracy = (tp + tn)*1.0/(P+N) sensativity = 1.0*tp/(tp+fn) #tpr, recall specificity = 1.0*tn/(fp+tn) #tnr fpr = 1.0*fp/N # fp/(fp+tn) = 1.0-specificity fpr = 1.0*fp/(fp+tn) fpr = 1-specificity tpr = sensativity recall = sensativity print P, N, threshold, accuracy1, accuracy2, accuracy, tp, tn, fp, fn, precision, recall, f1, sensativity, specificity, fpr return tpr, fpr def nv_digits_infer_test_model(model): print ('####### ' + model + ' #########') labels = [] with open(LABEL_LIST) as f: for line in f: if('0' in line): labels.append(True) elif('1' in line): labels.append(False) else: print "Unrecognized label!" DIGITS_JOB_ID = DICT[model] softmax = nv_digits_infer(FILE_LIST, "/home/zys/data/C3R/test/tmp/", JOBS_DIR, DIGITS_JOB_ID, -1, 1, 'none', 0) lines=[] probs = [] for idx, p in enumerate(softmax): probs.append(p[0]) lines.append(str(p[0])+" "+str(p[1])+" "+str(p[2])) with open(model+'_softmax_probs.txt', 'w') as the_file: for line in lines: the_file.write(line+'\n') nv_digits_infer_test_model_analyze_result(probs, labels, model) def nv_digits_infer_test_model2(fname1, fname2, model): probs=[] with open(fname1) as f: for line in f: fields = line.split() if(len(fields) == 8): ind = fields.index("Disease") v = float(fields[ind+1].strip('%'))/100 probs.append(v) labels = [] with open(fname2) as f: for line in f: if('0' in line): labels.append(True) elif('1' in line): labels.append(False) else: print "Unrecognized label!" nv_digits_infer_test_model_analyze_result(probs, labels, model) def nv_digits_infer_test_model_analyze_result(probs, labels, model): # fprs = [] # tprs = [] # #for threshold in range(1,1000): # tpr, fpr = nv_digits_infer_one_round(threshold*0.001, softmax1, softmax2) # fprs.append(fpr) # tprs.append(tpr) y_actu = labels y_pred = probs #### CONFUSINO MATRIX #### #cm = ConfusionMatrix(y_actu, y_pred) #print cm #cm.print_stats() #### ROC Curve ##### fpr, tpr, thresholds = roc_curve(y_actu, y_pred, pos_label=True) # The Youden index J (Youden, 1950) is defined as: # J = max { sensitivityc + specificityc - 1 } # where c ranges over all possible criterion values. # Graphically, J is the maximum vertical distance between the ROC curve and the diagonal line. # The criterion value corresponding with the Youden index J is the optimal criterion value only when disease prevalence is 50%, equal weight is given to sensitivity and specificity, and costs of various decisions are ignored. youden = tpr - fpr # = tpr + (1-fpr) -1 = tpr - fpr # Sensitivity + Specificity - 1 best_ind = youden.argmax() best_threshold = thresholds[best_ind] orig_stdout = sys.stdout f = open(model + '.txt', 'w') sys.stdout = f print 'ROC curve optimal cutoff ', model, ' = ', best_threshold # Find prediction to the dataframe applying threshold y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > best_threshold else False) cm = ConfusionMatrix(y_actu, y_pred_labels) # print cm cm.print_stats() # Get FP and FN samples cwd = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) directoryFP = cwd + '/' + model+'_FP' if os.path.exists(directoryFP): shutil.rmtree(directoryFP) os.mkdir(directoryFP) directoryFN = cwd + '/' + model+'_FN' if os.path.exists(directoryFN): shutil.rmtree(directoryFN) os.mkdir(directoryFN) i=0 with open(FILE_LIST) as f: for line in f: line = line.strip() pred_label = y_pred_labels[i] actu_label = y_actu[i] basename = os.path.basename(line) new_file_name="{0:.3f}".format(y_pred[i]) + "_" +basename if(actu_label == True and pred_label == False): shutil.copyfile(line, directoryFN+"/"+ new_file_name) if(actu_label == False and pred_label == True): shutil.copyfile(line, directoryFP+"/"+ new_file_name) i+=1 roc_auc = auc(fpr, tpr) print 'ROC AUC = ', roc_auc # Use precision recall curve for skewed / unbalanced data precision, recall, pr_thresholds = precision_recall_curve(y_actu, y_pred, pos_label=True) f1 = 2.0*precision*recall/(precision+recall) pr_best_ind = f1.argmax() pr_best_threshold = pr_thresholds[pr_best_ind] print 'precision_recall_curve optimal cutoff ', model, ' = ', pr_best_threshold sys.stdout = orig_stdout f.close() plt.figure() plt.plot(fpr, tpr, lw=1, label='ROC (area = %0.3f)' % (roc_auc)) plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') plt.plot(fpr[best_ind], tpr[best_ind], 'ro', label='optimal cutoff') # (probability = %0.3f)' % (best_threshold)) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('ROC Curve (' + model + ') ' ) plt.legend(loc="lower right") plt.savefig(model + "_roc_curve.png") #plt.show() plt.close() # pr_auc=auc(precision, recall) plt.figure() plt.plot(recall, precision, lw=1, label='P-R Curve') # plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') # plt.plot(recall[pr_best_ind], precision[pr_best_ind], 'ro') #, label='best threshold (%0.3f)' % (best_threshold)) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.ylabel('Precision') plt.xlabel('Recall') plt.title('Precision-Recall Curve (' + model + ') ' ) plt.legend(loc="lower right") plt.savefig(model + "_pr_curve.png") #plt.show() plt.close() def nv_digits_infer_test_all_models(): i =0 for key in DICT: plt.figure(i) i += 1 nv_digits_infer_test_model(key) # nv_digits_infer_test_all_models() # special treatment for GoogLeNet_transfer_learning. It requires nv-caffe-0.15 # nv_digits_infer_test_model2("/home/zys/Desktop/scripts/GoogLeNet_ResultsFromDigits.txt", LABEL_LIST, "GoogLeNet_Transfer_Learning") # nv_digits_infer_test_model2("AlexNet_T1.txt","AlexNet_T2.txt", "AlexNet") # nv_digits_infer_test_model(DICT.keys()[int(sys.argv[1])]) # nv_digits_infer_test_model2("/home/zys/Desktop/scripts/GoogLeNet_ResultsFromDigits.txt", "/home/zys/Desktop/test2/data/LabelList.txt", "GoogLeNet_Transfer_Learning")