Supplement
A ROP (retinopathy of prematurity) Screening Classifier for Retinal Fundus Photograph based on Transfer Learning
DNN Model Test Results
- AlexNet_from_scratch.txt . . . . . . 612 B . . . . . . 5/5/2022 12:04:28 PM
- AlexNet_from_scratch_pr_curve.png . . . . . . 31 KB . . . . . . 5/5/2022 12:04:28 PM
- AlexNet_from_scratch_roc_curve.png . . . . . . 40 KB . . . . . . 5/5/2022 12:04:28 PM
- AlexNet_from_scratch_softmax_probs.txt . . . . . . 48 KB . . . . . . 5/5/2022 12:04:28 PM
- AlexNet_transfer_learning.txt . . . . . . 635 B . . . . . . 5/5/2022 12:04:28 PM
- AlexNet_transfer_learning_pr_curve.png . . . . . . 32 KB . . . . . . 5/5/2022 12:04:28 PM
- AlexNet_transfer_learning_roc_curve.png . . . . . . 41 KB . . . . . . 5/5/2022 12:04:28 PM
- AlexNet_transfer_learning_softmax_probs.txt . . . . . . 49 KB . . . . . . 5/5/2022 12:04:28 PM
- VGG-16_from_scratch.txt . . . . . . 587 B . . . . . . 5/5/2022 12:04:28 PM
- VGG-16_from_scratch_pr_curve.png . . . . . . 33 KB . . . . . . 5/5/2022 12:04:28 PM
- VGG-16_from_scratch_roc_curve.png . . . . . . 40 KB . . . . . . 5/5/2022 12:04:28 PM
- VGG-16_from_scratch_softmax_probs.txt . . . . . . 49 KB . . . . . . 5/5/2022 12:04:28 PM
- VGG-16_transfer_learning.txt . . . . . . 628 B . . . . . . 5/5/2022 12:04:28 PM
- VGG-16_transfer_learning_pr_curve.png . . . . . . 25 KB . . . . . . 5/5/2022 12:04:28 PM
- VGG-16_transfer_learning_roc_curve.png . . . . . . 39 KB . . . . . . 5/5/2022 12:04:28 PM
- VGG-16_transfer_learning_softmax_probs.txt . . . . . . 53 KB . . . . . . 5/5/2022 12:04:28 PM
- GoogLeNet_from_cratch.txt . . . . . . 601 B . . . . . . 5/5/2022 12:04:28 PM
- GoogLeNet_from_cratch_softmax_probs.txt . . . . . . 49 KB . . . . . . 5/5/2022 12:04:28 PM
- GoogLeNet_from_scratch.txt . . . . . . 603 B . . . . . . 5/5/2022 12:04:28 PM
- GoogLeNet_from_scratch_pr_curve.png . . . . . . 33 KB . . . . . . 5/5/2022 12:04:28 PM
- GoogLeNet_from_scratch_roc_curve.png . . . . . . 41 KB . . . . . . 5/5/2022 12:04:28 PM
- GoogLeNet_from_scratch_softmax_probs.txt . . . . . . 49 KB . . . . . . 5/5/2022 12:04:28 PM
- GoogLeNet_Transfer_Learning.txt . . . . . . 610 B . . . . . . 5/5/2022 12:04:28 PM
- GoogLeNet_Transfer_Learning_pr_curve.png . . . . . . 26 KB . . . . . . 5/5/2022 12:04:28 PM
- GoogLeNet_Transfer_Learning_roc_curve.png . . . . . . 39 KB . . . . . . 5/5/2022 12:04:28 PM
- GoogLeNet_transfer_learning_softmax_probs.txt . . . . . . 233 KB . . . . . . 5/5/2022 12:04:28 PM
Human Experts Performance Data
- CHEN_DESKTOP-C7O8G5I20170802183814.txt . . . . . . 68 KB . . . . . . 5/5/2022 12:04:28 PM
- TIAN_8L-HZB-220170803195744.txt . . . . . . 67 KB . . . . . . 5/5/2022 12:04:28 PM
- WU_8L-HZB-220170802173250.txt . . . . . . 67 KB . . . . . . 5/5/2022 12:04:28 PM
- ZENG_8L-HZB-220170803141821.txt . . . . . . 67 KB . . . . . . 5/5/2022 12:04:28 PM
- ZHANG_RETCAM320170803174040.txt . . . . . . 67 KB . . . . . . 5/5/2022 12:04:28 PM
- DNN_measures.txt . . . . . . 3 KB . . . . . . 5/5/2022 12:04:28 PM
- DNN_pr_curve.txt . . . . . . 8 KB . . . . . . 5/5/2022 12:04:28 PM
- DNN_softmax_probs.txt . . . . . . 53 KB . . . . . . 5/5/2022 12:04:28 PM
- DNN_vs_Human_pr_curve.png . . . . . . 26 KB . . . . . . 5/5/2022 12:04:28 PM
- DNN_vs_Human_roc_curve.png . . . . . . 37 KB . . . . . . 5/5/2022 12:04:28 PM
- Human_Performance_Summary.txt . . . . . . 7 KB . . . . . . 5/5/2022 12:04:28 PM
Source Codes and Scripts for DNN Batch Inference
binaryproto2npy.py . . . . . . 395 B . . . . . . 5/5/2022 12:04:28 PM
# convert mean file from binaryproto to npy import caffe import numpy as np import sys if len(sys.argv) != 3: print "Usage: python convert_protomean.py proto.mean out.npy" sys.exit() blob = caffe.proto.caffe_pb2.BlobProto() data = open( sys.argv[1] , 'rb' ).read() blob.ParseFromString(data) arr = np.array( caffe.io.blobproto_to_array(blob) ) out = arr[0] np.save( sys.argv[2] , out )
digits_classify_folder.py . . . . . . 5 KB . . . . . . 5/5/2022 12:04:28 PM
# Using digits to classify images under a specific folder
# Copyright (c) 2017, zhangys@zjgsu.edu.cn
# requires NVidia Digits to be installed
SOURCE_FOLDER = "/home/zys/seagate/Extra_20170723/"
SOURCE_FILE_LIST = "/home/zys/seagate/Extra_20170723.txt"
REF_FOLDER = '' #"/home/zys/seagate/ROP_Classified_20170603_Combined/images"+"/" # the files with the same name in this folder will not be processed
TARGET_FOLDER = '/home/zys/seagate/Extra_Selected_20170723/' #"/home/zys/seagate/ROP_201707/A"+"/"
JOBS_DIR = "/home/zys/DIGITS-master/digits/jobs" # "/var/lib/digits/jobs/"
import os
import sys
import shutil
caffe_root = os.environ['CAFFE_ROOT']
sys.path.insert(0,caffe_root+'python')
# Add path for DIGITS package
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
import digits.config
from digits.inference.errors import InferenceError
from digits.job import Job
from digits import utils
# To solve error "Check failed: error == cudaSuccess (10 vs. 0) invalid device ordinal"
import caffe
caffe.set_device(0)
"""
Perform inference on a list of images using the specified model
"""
def nv_digits_infer(input_list,
output_dir,
jobs_dir,
model_id,
epoch,
batch_size,
layers,
gpu):
"""
Perform inference on a list of images using the specified model
"""
# job directory defaults to that defined in DIGITS config
if jobs_dir == 'none':
jobs_dir = digits.config.config_value('jobs_dir')
# load model job
model_dir = os.path.join(jobs_dir, model_id)
assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
model = Job.load(model_dir)
# load dataset job
dataset_dir = os.path.join(jobs_dir, model.dataset_id)
assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir
dataset = Job.load(dataset_dir)
for task in model.tasks:
task.dataset = dataset
# retrieve snapshot file
task = model.train_task()
snapshot_filename = None
epoch = float(epoch)
if epoch == -1 and len(task.snapshots):
# use last epoch
epoch = task.snapshots[-1][1]
snapshot_filename = task.snapshots[-1][0]
else:
for f, e in task.snapshots:
if e == epoch:
snapshot_filename = f
break
if not snapshot_filename:
raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch))
# retrieve image dimensions and resize mode
image_dims = dataset.get_feature_dims()
height = image_dims[0]
width = image_dims[1]
channels = image_dims[2]
resize_mode = dataset.resize_mode if hasattr(dataset, 'resize_mode') else 'squash'
n_input_samples = 0 # number of samples we were able to load
input_ids = [] # indices of samples within file list
input_data = [] # sample data
# load paths from file
paths = None
with open(input_list) as infile:
paths = infile.readlines()
# load and resize images
for idx, path in enumerate(paths):
path = path.strip()
try:
image = utils.image.load_image(path.strip())
image = utils.image.resize_image(
image,
height,
width,
channels=channels,
resize_mode=resize_mode)
input_ids.append(idx)
input_data.append(image)
n_input_samples = n_input_samples + 1
except utils.errors.LoadImageError as e:
print e
# perform inference
if layers != 'none':
raise InferenceError("Layer visualization is not supported for multiple inference")
outputs = model.train_task().infer_many(
input_data,
snapshot_epoch=epoch,
gpu=gpu,
resize=True)
return outputs["softmax"]
def nv_digits_classify(filelist, model):
DIGITS_JOB_ID = DICT[model]
softmax = nv_digits_infer(filelist, "/home/zys/data/C3R/test/tmp/",
JOBS_DIR,
DIGITS_JOB_ID,
-1,
1,
'none',
0)
cls = []
for idx, p in enumerate(softmax):
cls.append(p.argmax())
i = 0
with open(filelist) as f:
for line in f:
line = line.strip()
c = str(cls[i])
i+=1
directory = TARGET_FOLDER+"/" + c;
if not os.path.exists(directory):
os.makedirs(directory)
shutil.copyfile(line, directory + "/" + os.path.basename(line))
def split_filelist(filelist, MAX_LINE_PER_FILE):
i=0
fidx=0
files=[]
currentfile = None
with open(filelist) as f:
for line in f:
if(i%MAX_LINE_PER_FILE == 0):
if (currentfile is not None):
currentfile.close()
currentfile = open(filelist+str(fidx), 'w')
files.append(filelist+str(fidx))
fidx+=1
line = line.strip()
currentfile.write("%s\n" % line)
i+=1
return files
# nv_digits_classify(SOURCE_FILE_LIST, "VGG-16")
files = split_filelist(SOURCE_FILE_LIST, 500)
i=0
import time
caffe.set_mode_gpu()
# call in the shell: Every 3 rounds, will throw error. So use for loop to handle it.
# for((i=30;i<=132;i=i+3));do sudo python ...py $i;done
print ("------",sys.argv[1],"------")
for f in files:
if(i >= int( sys.argv[1])):
nv_digits_classify(f, "VGG-16")
print(i, "*************", f, " Finished *************")
i+=1;
#if(i%5==0): # In GPU mode, CUDA handle is not released immediately
# caffe.set_mode_gpu()
#else:
# caffe.set_mode_cpu()
#i+=1
time.sleep(0)
digits_predict_folder.py . . . . . . 12 KB . . . . . . 5/5/2022 12:04:28 PM
# Using digits to classify images in the test data set
# Copyright (c) 2017, zhangys@zjgsu.edu.cn
# requires NVidia Digits to be installed
DICT = {}
DICT["VGG-16_transfer_learning"] = "20170719-171742-5c31"
DICT["VGG-16_from_scratch"] = '20170722-143332-f077'
DICT["GoogLeNet_from_scratch"] = '20170722-113606-c59c'
DICT["GoogLeNet_transfer_learning"] = '20170722-190454-1af1' # '20170719-185942-bf65'
DICT["AlexNet_transfer_learning"] = '20170803-121318-b153'#'20170802-164043-96f9'# '20170722-205930-3ac4' # '20170720-094939-b665'
DICT["AlexNet_from_scratch"]= '20170802-163356-7f3d'#'20170802-162531-3531' #'20170722-152531-da8d'
def GetDictKeys():
return DICT.keys()
FILE_LIST="/home/zys/Desktop/test2/data/FileList.txt"
LABEL_LIST="/home/zys/Desktop/test2/data/LabelList.txt"
JOBS_DIR = "/home/zys/DIGITS-master/digits/jobs" # "/var/lib/digits/jobs/"
import os
import sys
caffe_root = os.environ['CAFFE_ROOT'] # '/home/zys/nv-caffe-0.15' # # When use sudo, the environ var may be different than the current terminal
sys.path.insert(0,caffe_root+'python')
import numpy
import pandas
from sklearn.metrics import roc_curve, auc, precision_recall_curve
from pandas_ml import ConfusionMatrix
import matplotlib.pyplot as plt
import shutil
import inspect
# Add path for DIGITS package
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
import digits.config
from digits.inference.errors import InferenceError
from digits.job import Job
from digits import utils
# To solve error "Check failed: error == cudaSuccess (10 vs. 0) invalid device ordinal"
import caffe
print (caffe.__file__)
caffe.set_device(0)
"""
Perform inference on a list of images using the specified model
"""
def nv_digits_infer(input_list,
output_dir,
jobs_dir,
model_id,
epoch,
batch_size,
layers,
gpu):
"""
Perform inference on a list of images using the specified model
"""
# job directory defaults to that defined in DIGITS config
if jobs_dir == 'none':
jobs_dir = digits.config.config_value('jobs_dir')
# load model job
model_dir = os.path.join(jobs_dir, model_id)
assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
model = Job.load(model_dir)
# load dataset job
dataset_dir = os.path.join(jobs_dir, model.dataset_id)
assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir
dataset = Job.load(dataset_dir)
for task in model.tasks:
task.dataset = dataset
# retrieve snapshot file
task = model.train_task()
snapshot_filename = None
epoch = float(epoch)
if epoch == -1 and len(task.snapshots):
# use last epoch
epoch = task.snapshots[-1][1]
snapshot_filename = task.snapshots[-1][0]
else:
for f, e in task.snapshots:
if e == epoch:
snapshot_filename = f
break
if not snapshot_filename:
raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch))
# retrieve image dimensions and resize mode
image_dims = dataset.get_feature_dims()
height = image_dims[0]
width = image_dims[1]
channels = image_dims[2]
resize_mode = dataset.resize_mode if hasattr(dataset, 'resize_mode') else 'squash'
n_input_samples = 0 # number of samples we were able to load
input_ids = [] # indices of samples within file list
input_data = [] # sample data
# load paths from file
paths = None
with open(input_list) as infile:
paths = infile.readlines()
# load and resize images
for idx, path in enumerate(paths):
path = path.strip()
try:
image = utils.image.load_image(path.strip())
image = utils.image.resize_image(
image,
height,
width,
channels=channels,
resize_mode=resize_mode)
input_ids.append(idx)
input_data.append(image)
n_input_samples = n_input_samples + 1
except utils.errors.LoadImageError as e:
print e
# perform inference
if layers != 'none':
raise InferenceError("Layer visualization is not supported for multiple inference")
outputs = model.train_task().infer_many(
input_data,
snapshot_epoch=epoch,
gpu=gpu,
resize=True)
return outputs["softmax"]
def nv_digits_infer_one_round(threshold, softmax1, softmax2):
predicts1 = []
for idx, probs in enumerate(softmax1):
# print paths[idx], probs[0], probs[1], probs[2]
prediction = False
if(probs[0]>threshold):
prediction = True
# predictions.append(probs.argmax())
predicts1.append(prediction)
predicts2 = []
for idx, probs in enumerate(softmax2):
# print paths[idx], probs[0], probs[1], probs[2]
prediction = False
if(probs[0]>threshold):
prediction = True
# predictions.append(probs.argmax())
predicts2.append(prediction)
P= len(predicts1)
N= len(predicts2)
tp = predicts1.count(True)
fn = predicts1.count(False)
tn = predicts2.count(False)
fp = predicts2.count(True)
precision = 1.0*tp/(tp+fp) #ppv
recall = 1.0*tp/(tp+fn)
f1 = 2.0*precision*recall/(precision+recall)
accuracy1 = (predicts1.count(0) * 1.0 / P)
accuracy2 = (predicts2.count(1) * 1.0 / N)
accuracy = (tp + tn)*1.0/(P+N)
sensativity = 1.0*tp/(tp+fn) #tpr, recall
specificity = 1.0*tn/(fp+tn) #tnr
fpr = 1.0*fp/N # fp/(fp+tn) = 1.0-specificity
fpr = 1.0*fp/(fp+tn)
fpr = 1-specificity
tpr = sensativity
recall = sensativity
print P, N, threshold, accuracy1, accuracy2, accuracy, tp, tn, fp, fn, precision, recall, f1, sensativity, specificity, fpr
return tpr, fpr
def nv_digits_infer_test_model(model):
print ('####### ' + model + ' #########')
labels = []
with open(LABEL_LIST) as f:
for line in f:
if('0' in line):
labels.append(True)
elif('1' in line):
labels.append(False)
else:
print "Unrecognized label!"
DIGITS_JOB_ID = DICT[model]
softmax = nv_digits_infer(FILE_LIST, "/home/zys/data/C3R/test/tmp/", JOBS_DIR, DIGITS_JOB_ID, -1, 1, 'none', 0)
lines=[]
probs = []
for idx, p in enumerate(softmax):
probs.append(p[0])
lines.append(str(p[0])+" "+str(p[1])+" "+str(p[2]))
with open(model+'_softmax_probs.txt', 'w') as the_file:
for line in lines:
the_file.write(line+'\n')
nv_digits_infer_test_model_analyze_result(probs, labels, model)
def nv_digits_infer_test_model2(fname1, fname2, model):
probs=[]
with open(fname1) as f:
for line in f:
fields = line.split()
if(len(fields) == 8):
ind = fields.index("Disease")
v = float(fields[ind+1].strip('%'))/100
probs.append(v)
labels = []
with open(fname2) as f:
for line in f:
if('0' in line):
labels.append(True)
elif('1' in line):
labels.append(False)
else:
print "Unrecognized label!"
nv_digits_infer_test_model_analyze_result(probs, labels, model)
def nv_digits_infer_test_model_analyze_result(probs, labels, model):
# fprs = []
# tprs = []
# #for threshold in range(1,1000):
# tpr, fpr = nv_digits_infer_one_round(threshold*0.001, softmax1, softmax2)
# fprs.append(fpr)
# tprs.append(tpr)
y_actu = labels
y_pred = probs
#### CONFUSINO MATRIX ####
#cm = ConfusionMatrix(y_actu, y_pred)
#print cm
#cm.print_stats()
#### ROC Curve #####
fpr, tpr, thresholds = roc_curve(y_actu, y_pred, pos_label=True)
# The Youden index J (Youden, 1950) is defined as:
# J = max { sensitivityc + specificityc - 1 }
# where c ranges over all possible criterion values.
# Graphically, J is the maximum vertical distance between the ROC curve and the diagonal line.
# The criterion value corresponding with the Youden index J is the optimal criterion value only when disease prevalence is 50%, equal weight is given to sensitivity and specificity, and costs of various decisions are ignored.
youden = tpr - fpr # = tpr + (1-fpr) -1 = tpr - fpr # Sensitivity + Specificity - 1
best_ind = youden.argmax()
best_threshold = thresholds[best_ind]
orig_stdout = sys.stdout
f = open(model + '.txt', 'w')
sys.stdout = f
print 'ROC curve optimal cutoff ', model, ' = ', best_threshold
# Find prediction to the dataframe applying threshold
y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > best_threshold else False)
cm = ConfusionMatrix(y_actu, y_pred_labels)
# print cm
cm.print_stats()
# Get FP and FN samples
cwd = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
directoryFP = cwd + '/' + model+'_FP'
if os.path.exists(directoryFP):
shutil.rmtree(directoryFP)
os.mkdir(directoryFP)
directoryFN = cwd + '/' + model+'_FN'
if os.path.exists(directoryFN):
shutil.rmtree(directoryFN)
os.mkdir(directoryFN)
i=0
with open(FILE_LIST) as f:
for line in f:
line = line.strip()
pred_label = y_pred_labels[i]
actu_label = y_actu[i]
basename = os.path.basename(line)
new_file_name="{0:.3f}".format(y_pred[i]) + "_" +basename
if(actu_label == True and pred_label == False):
shutil.copyfile(line, directoryFN+"/"+ new_file_name)
if(actu_label == False and pred_label == True):
shutil.copyfile(line, directoryFP+"/"+ new_file_name)
i+=1
roc_auc = auc(fpr, tpr)
print 'ROC AUC = ', roc_auc
# Use precision recall curve for skewed / unbalanced data
precision, recall, pr_thresholds = precision_recall_curve(y_actu, y_pred, pos_label=True)
f1 = 2.0*precision*recall/(precision+recall)
pr_best_ind = f1.argmax()
pr_best_threshold = pr_thresholds[pr_best_ind]
print 'precision_recall_curve optimal cutoff ', model, ' = ', pr_best_threshold
sys.stdout = orig_stdout
f.close()
plt.figure()
plt.plot(fpr, tpr, lw=1, label='ROC (area = %0.3f)' % (roc_auc))
plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')
plt.plot(fpr[best_ind], tpr[best_ind], 'ro', label='optimal cutoff') # (probability = %0.3f)' % (best_threshold))
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve (' + model + ') ' )
plt.legend(loc="lower right")
plt.savefig(model + "_roc_curve.png")
#plt.show()
plt.close()
# pr_auc=auc(precision, recall)
plt.figure()
plt.plot(recall, precision, lw=1, label='P-R Curve')
# plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')
# plt.plot(recall[pr_best_ind], precision[pr_best_ind], 'ro') #, label='best threshold (%0.3f)' % (best_threshold))
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.ylabel('Precision')
plt.xlabel('Recall')
plt.title('Precision-Recall Curve (' + model + ') ' )
plt.legend(loc="lower right")
plt.savefig(model + "_pr_curve.png")
#plt.show()
plt.close()
def nv_digits_infer_test_all_models():
i =0
for key in DICT:
plt.figure(i)
i += 1
nv_digits_infer_test_model(key)
# nv_digits_infer_test_all_models()
# special treatment for GoogLeNet_transfer_learning. It requires nv-caffe-0.15
# nv_digits_infer_test_model2("/home/zys/Desktop/scripts/GoogLeNet_ResultsFromDigits.txt", LABEL_LIST, "GoogLeNet_Transfer_Learning")
# nv_digits_infer_test_model2("AlexNet_T1.txt","AlexNet_T2.txt", "AlexNet")
# nv_digits_infer_test_model(DICT.keys()[int(sys.argv[1])])
# nv_digits_infer_test_model2("/home/zys/Desktop/scripts/GoogLeNet_ResultsFromDigits.txt", "/home/zys/Desktop/test2/data/LabelList.txt", "GoogLeNet_Transfer_Learning")
digits_predict_folder.sh . . . . . . 1 KB . . . . . . 5/5/2022 12:04:28 PM
# This is a linux shell script, which calls digits_predict_folder.py to batch process mutliple DNN models.
# sudo python -c 'import digits_predict_folder; digits_predict_folder.nv_digits_infer_test_model("AlexNet_from_scratch")'
# sudo python -c 'import digits_predict_folder; digits_predict_folder.nv_digits_infer_test_model("AlexNet_transfer_learning")'
sudo python -c 'import digits_predict_folder; keys=digits_predict_folder.GetDictKeys(); digits_predict_folder.nv_digits_infer_test_model(keys[0])'
sudo python -c 'import digits_predict_folder; keys=digits_predict_folder.GetDictKeys(); digits_predict_folder.nv_digits_infer_test_model(keys[1])'
sudo python -c 'import digits_predict_folder; keys=digits_predict_folder.GetDictKeys(); digits_predict_folder.nv_digits_infer_test_model(keys[2])'
sudo python -c 'import digits_predict_folder; keys=digits_predict_folder.GetDictKeys(); digits_predict_folder.nv_digits_infer_test_model(keys[4])'
sudo python -c 'import digits_predict_folder; keys=digits_predict_folder.GetDictKeys(); digits_predict_folder.nv_digits_infer_test_model(keys[5])'
'
# special treatment for GoogLeNet_transfer_learning. It requires nv-caffe-0.15
python -c 'import digits_predict_folder; digits_predict_folder.nv_digits_infer_test_model2("/home/zys/Desktop/scripts/GoogLeNet_ResultsFromDigits.txt", "/home/zys/Desktop/test2/data/LabelList.txt", "GoogLeNet_transfer_learning")'
sudo chown $USER -R .
digits_predict_folder_with_human_experts.py . . . . . . 16 KB . . . . . . 5/5/2022 12:04:28 PM
# Analyze DNN performance with human experts
# Copyright (c) 2017, zhangys@zjgsu.edu.cn
# requires NVidia Digits to be installed
DICT = {}
DICT["DNN_vs_Human"] = "20170719-171742-5c31"
DICT["VGG-16_transfer_learning"] = "20170719-171742-5c31"
DICT["VGG-16_from_scratch"] = '20170722-143332-f077'
DICT["GoogLeNet_from_scratch"] = '20170722-113606-c59c'
DICT["GoogLeNet_transfer_learning"] = '20170722-190454-1af1' # '20170719-185942-bf65'
DICT["AlexNet_transfer_learning"] = '20170803-121318-b153'#'20170802-164043-96f9'# '20170722-205930-3ac4' # '20170720-094939-b665'
DICT["AlexNet_from_scratch"]= '20170802-163356-7f3d'#'20170802-162531-3531' #'20170722-152531-da8d'
def GetDictKeys():
return DICT.keys()
FILE_LIST="/home/zys/Desktop/test2/data/FileList.txt"
LABEL_LIST="/home/zys/Desktop/test2/data/LabelList.txt"
JOBS_DIR = "/home/zys/DIGITS-master/digits/jobs" # "/var/lib/digits/jobs/"
import os
import sys
caffe_root = os.environ['CAFFE_ROOT'] # '/home/zys/nv-caffe-0.15' # # When use sudo, the environ var may be different than the current terminal
sys.path.insert(0,caffe_root+'python')
import numpy
import pandas
from sklearn.metrics import roc_curve, auc, precision_recall_curve
from pandas_ml import ConfusionMatrix
import matplotlib.pyplot as plt
import shutil
import inspect
# Add path for DIGITS package
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
import digits.config
from digits.inference.errors import InferenceError
from digits.job import Job
from digits import utils
# To solve error "Check failed: error == cudaSuccess (10 vs. 0) invalid device ordinal"
import caffe
print (caffe.__file__)
caffe.set_device(0)
"""
Perform inference on a list of images using the specified model
"""
def nv_digits_infer(input_list,
output_dir,
jobs_dir,
model_id,
epoch,
batch_size,
layers,
gpu):
"""
Perform inference on a list of images using the specified model
"""
# job directory defaults to that defined in DIGITS config
if jobs_dir == 'none':
jobs_dir = digits.config.config_value('jobs_dir')
# load model job
model_dir = os.path.join(jobs_dir, model_id)
assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
model = Job.load(model_dir)
# load dataset job
dataset_dir = os.path.join(jobs_dir, model.dataset_id)
assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir
dataset = Job.load(dataset_dir)
for task in model.tasks:
task.dataset = dataset
# retrieve snapshot file
task = model.train_task()
snapshot_filename = None
epoch = float(epoch)
if epoch == -1 and len(task.snapshots):
# use last epoch
epoch = task.snapshots[-1][1]
snapshot_filename = task.snapshots[-1][0]
else:
for f, e in task.snapshots:
if e == epoch:
snapshot_filename = f
break
if not snapshot_filename:
raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch))
# retrieve image dimensions and resize mode
image_dims = dataset.get_feature_dims()
height = image_dims[0]
width = image_dims[1]
channels = image_dims[2]
resize_mode = dataset.resize_mode if hasattr(dataset, 'resize_mode') else 'squash'
n_input_samples = 0 # number of samples we were able to load
input_ids = [] # indices of samples within file list
input_data = [] # sample data
# load paths from file
paths = None
with open(input_list) as infile:
paths = infile.readlines()
# load and resize images
for idx, path in enumerate(paths):
path = path.strip()
try:
image = utils.image.load_image(path.strip())
image = utils.image.resize_image(
image,
height,
width,
channels=channels,
resize_mode=resize_mode)
input_ids.append(idx)
input_data.append(image)
n_input_samples = n_input_samples + 1
except utils.errors.LoadImageError as e:
print e
# perform inference
if layers != 'none':
raise InferenceError("Layer visualization is not supported for multiple inference")
outputs = model.train_task().infer_many(
input_data,
snapshot_epoch=epoch,
gpu=gpu,
resize=True)
return outputs["softmax"]
def nv_digits_infer_test_model(model, human_results_dir):
print ('####### ' + model + ' #########')
labels = []
with open(LABEL_LIST) as f:
for line in f:
if('0' in line):
labels.append(True)
elif('1' in line):
labels.append(False)
else:
print "Unrecognized label!"
DIGITS_JOB_ID = DICT[model]
softmax = nv_digits_infer(FILE_LIST, "/home/zys/data/C3R/test/tmp/", JOBS_DIR, DIGITS_JOB_ID, -1, 1, 'none', 0)
lines=[]
probs = []
for idx, p in enumerate(softmax):
probs.append(p[0])
lines.append(str(p[0])+" "+str(p[1])+" "+str(p[2]))
with open(model+'_softmax_probs.txt', 'w') as the_file:
for line in lines:
the_file.write(line+'\n')
files=[]
with open(FILE_LIST) as f:
for line in f:
line = line.strip()
files.append(line)
dic = {}
i=0
for f in files:
dic[os.path.basename(f)] = labels[i]
i+=1
humanTitles = []
humanTPR = []
humanFPR = []
humanPrecision=[]
humanRecall = []
for fn in os.listdir(human_results_dir):
ks=[]
ls=[]
print "\n\n******", fn, "******"
humanTitles.append(fn)
with open(os.path.join(human_results_dir, fn)) as f:
for line in f:
line = line.strip()
k = line.split(',')[0]
l = line.split(',')[1]
if(k not in ks):
ks.append(k)
if(l=='1'):
ls.append(True)
else:
ls.append(False)
rs=[]
for k in ks:
rs.append(dic[k])
cm = ConfusionMatrix(rs, ls)
# print cm
cm.print_stats()
s= cm.stats()
humanTPR.append(s['TPR'])
humanFPR.append(s['FPR']) # 1- TNR
humanPrecision.append(s['PPV'])
humanRecall.append(s['TPR'])
nv_digits_infer_test_model_analyze_result(probs, labels, model, humanTPR, humanFPR, humanPrecision, humanRecall)
def nv_digits_infer_test_model3(fname1, fname2, model, human_results_dir):
probs=[]
with open(fname1) as f:
for line in f:
fields = line.split()
if(len(fields) == 8):
ind = fields.index("Disease")
v = float(fields[ind+1].strip('%'))/100
probs.append(v)
labels = []
with open(fname2) as f:
for line in f:
if('0' in line):
labels.append(True)
elif('1' in line):
labels.append(False)
else:
print "Unrecognized label!"
files=[]
with open(FILE_LIST) as f:
for line in f:
line = line.strip()
files.append(line)
dic = {}
i=0
for f in files:
dic[os.path.basename(f)] = labels[i]
i+=1
orig_stdout = sys.stdout
log = open(model + '_humans.txt', 'w')
sys.stdout = log
humanTitles = []
humanTPR = []
humanFPR = []
humanPrecision=[]
humanRecall = []
humanTNR=[]
humanF1=[]
humanMCC=[]
humanInformedness=[]
humanPPV=[]
for fn in os.listdir(human_results_dir):
ks=[]
ls=[]
rs=[]
log.write( "\n\n******" + fn + "******\n")
log.write( "\n---- Misclassified Files (filename, truth, label)---\n")
humanTitles.append(fn)
with open(os.path.join(human_results_dir, fn)) as f:
for line in f:
line = line.strip()
k = line.split(',')[0]
l = line.split(',')[1]
if(k not in ks):
ks.append(k)
b=(l=='1')
ls.append(b)
rs.append(dic[k])
if(b!=dic[k]):
log.write(k + '\t' + str( dic[k]) + '\t' + str(b) + '\n')
cm = ConfusionMatrix(rs, ls)
# print cm
cm.print_stats()
s= cm.stats()
humanTPR.append(s['TPR'])
humanFPR.append(s['FPR']) # 1- TNR
humanPrecision.append(s['PPV'])
humanRecall.append(s['TPR'])
humanTNR.append(s['TNR'])
humanF1.append(s['F1_score'])
humanMCC.append(s['MCC'])
humanInformedness.append(s['informedness'])
humanPPV.append(s['PPV'])
log.write( '\n\n ******** Summary *******\n')
log.write( "\nExpert\tTPR\t\FPR\tPPV\tTNR\tF1_score\tMCC\tinformednessn\n")
i=0
for human in humanTitles:
print human, '\t', humanTPR[i], '\t', humanFPR[i], '\t', humanPPV[i], '\t', humanTNR[i], '\t', humanF1[i], '\t', humanMCC[i], '\t',humanInformedness[i], '\n\n'
i+=1
sys.stdout = orig_stdout
log.close()
nv_digits_infer_test_model_analyze_result(probs, labels, model, humanTPR, humanFPR, humanPrecision, humanRecall)
def nv_digits_infer_test_model_analyze_result(probs, labels, model, humanTPR, humanFPR, humanPrecision, humanRecall):
# fprs = []
# tprs = []
# #for threshold in range(1,1000):
# tpr, fpr = nv_digits_infer_one_round(threshold*0.001, softmax1, softmax2)
# fprs.append(fpr)
# tprs.append(tpr)
y_actu = labels
y_pred = probs
#### CONFUSINO MATRIX ####
#cm = ConfusionMatrix(y_actu, y_pred)
#print cm
#cm.print_stats()
#### ROC Curve #####
fpr, tpr, thresholds = roc_curve(y_actu, y_pred, pos_label=True)
# The Youden index J (Youden, 1950) is defined as:
# J = max { sensitivityc + specificityc - 1 }
# where c ranges over all possible criterion values.
# Graphically, J is the maximum vertical distance between the ROC curve and the diagonal line.
# The criterion value corresponding with the Youden index J is the optimal criterion value only when disease prevalence is 50%, equal weight is given to sensitivity and specificity, and costs of various decisions are ignored.
youden = tpr - fpr # = tpr + (1-fpr) -1 = tpr - fpr # Sensitivity + Specificity - 1
best_ind = youden.argmax()
best_threshold = thresholds[best_ind]
orig_stdout = sys.stdout
f = open(model + '.txt', 'w')
sys.stdout = f
thresh = 0.164
print '\nP-R curve cutoff = ', thresh
# Find prediction to the dataframe applying threshold
y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > thresh else False)
cm = ConfusionMatrix(y_actu, y_pred_labels)
# print cm
cm.print_stats()
thresh = 0.174
print '\nP-R curve cutoff = ', thresh
# Find prediction to the dataframe applying threshold
y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > thresh else False)
cm = ConfusionMatrix(y_actu, y_pred_labels)
# print cm
cm.print_stats()
thresh = 0.379
print '\nP-R curve cutoff = ', thresh
# Find prediction to the dataframe applying threshold
y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > thresh else False)
cm = ConfusionMatrix(y_actu, y_pred_labels)
# print cm
cm.print_stats()
thresh = 0.519 # argmax(F1 score) point for VGG
print '\nP-R curve cutoff = ', thresh
# Find prediction to the dataframe applying threshold
y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > thresh else False)
cm = ConfusionMatrix(y_actu, y_pred_labels)
# print cm
cm.print_stats()
thresh = 0.535
print '\nP-R curve cutoff = ', thresh
# Find prediction to the dataframe applying threshold
y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > thresh else False)
cm = ConfusionMatrix(y_actu, y_pred_labels)
# print cm
cm.print_stats()
print '\nROC curve optimal cutoff = ', best_threshold
# Find prediction to the dataframe applying threshold
y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > best_threshold else False)
cm = ConfusionMatrix(y_actu, y_pred_labels)
# print cm
cm.print_stats()
# Get FP and FN samples
thresh = 0.519
y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > thresh else False)
cwd = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
directoryFP = cwd + '/' + model+'_FP'
if os.path.exists(directoryFP):
shutil.rmtree(directoryFP)
os.mkdir(directoryFP)
directoryFN = cwd + '/' + model+'_FN'
if os.path.exists(directoryFN):
shutil.rmtree(directoryFN)
os.mkdir(directoryFN)
i=0
with open(FILE_LIST) as f:
for line in f:
line = line.strip()
pred_label = y_pred_labels[i]
actu_label = y_actu[i]
basename = os.path.basename(line)
new_file_name="{0:.3f}".format(y_pred[i]) + "_" +basename
if(actu_label == True and pred_label == False):
shutil.copyfile(line, directoryFN+"/"+ new_file_name)
if(actu_label == False and pred_label == True):
shutil.copyfile(line, directoryFP+"/"+ new_file_name)
i+=1
roc_auc = auc(fpr, tpr)
print 'ROC AUC = ', roc_auc
# Use precision recall curve for skewed / unbalanced data
precision, recall, pr_thresholds = precision_recall_curve(y_actu, y_pred, pos_label=True)
f1 = 2.0*precision*recall/(precision+recall)
pr_best_ind = f1.argmax()
pr_best_threshold = pr_thresholds[pr_best_ind]
print 'precision_recall_curve optimal cutoff ', model, ' = ', pr_best_threshold
sys.stdout = orig_stdout
f.close()
plt.figure()
plt.plot(fpr, tpr, lw=1, label='ROC (area = %0.3f)' % (roc_auc))
plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')
# plt.plot(fpr[best_ind], tpr[best_ind], 'ro', label='optimal cutoff') # (probability = %0.3f)' % (best_threshold))
plt.plot(humanFPR, humanTPR, 'bo', label='human experts')
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve (' + model + ') ' )
plt.legend(loc="lower right")
plt.savefig(model + "_roc_curve.png")
#plt.show()
plt.close()
# pr_auc=auc(precision, recall)
plt.figure()
plt.plot(recall, precision, lw=1, label='P-R Curve')
plt.plot(humanRecall, humanPrecision, 'bo', label='human experts')
# plt.plot(recall[pr_best_ind], precision[pr_best_ind], 'ro') #, label='best threshold (%0.3f)' % (best_threshold))
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.ylabel('Precision')
plt.xlabel('Recall')
plt.title('Precision-Recall Curve (' + model + ') ' )
plt.legend(loc="lower right")
plt.savefig(model + "_pr_curve.png")
#plt.show()
plt.close()
myfile = open(model + "_pr_curve.txt", "w")
myfile.write('threshold\trecall\tprecision\n\n')
i=0
for th in pr_thresholds:
myfile.write(str(th) + str(recall[i]) + '\t'+ str(precision[i]) + '\n')
i+=1
nv_digits_infer_test_model('DNN_vs_Human',"/home/zys/Desktop/scripts/human") # Batch inference
# nv_digits_infer_test_model3("/home/zys/Desktop/scripts/GoogLeNet_ResultsFromDigits.txt", "/home/zys/Desktop/test2/data/LabelList.txt", "GoogLeNet vs Human", "/home/zys/Desktop/scripts/human") # special treatment for GoogLeNet_transfer_learning. It requires nv-caffe-0.15. Use provided probs file