DenseNet121_Chexpert_CWBCE_E5_B32_C0_N12_D256_Masked_NoFinding - TobiasSchmidtDE/DeepL-MedicalImaging GitHub Wiki
Version: 1
Trained DenseNet121 architecture using the 'Chexpert_CWBCE_E5_B32_C0_N12_D256_Masked_NoFinding' benchmark. The benchmark was initialized for the chexpert_full dataset with batch size of 32, shuffle set to True and images rescaled to dimension (256, 256). The training was done for 5 epochs using the Adam optimizer and weighted_binary_crossentropy loss. A total of 12 labels/pathologies were included in the training and encoded using the 'uzeroes' method. The traing set included 142320 number of sample, the validation set 36162, and the test set 44932.
from pathlib import Path
from dotenv import load_dotenv, find_dotenv
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import json
import os
import re
import pprint
basepath = Path(os.getcwd())
if basepath.name != "idp-radio-1":
os.chdir(basepath.parent.parent)
print(os.getcwd())
load_dotenv(find_dotenv())
from src.preprocessing.split.train_test_split import train_test_split
/srv/idp-radio-1
data = json.loads(os.environ['EXP_DATA'])
history = data['history']
for s in data["description"].split(".")[:-1]:
print(s + ".\n")
Trained DenseNet121 architecture using the 'Chexpert_CWBCE_E5_B32_C0_N12_D256_Masked_NoFinding' benchmark.
The benchmark was initialized for the chexpert_full dataset with batch size of 32, shuffle set to True and images rescaled to dimension (256, 256).
The training was done for 5 epochs using the Adam optimizer and weighted_binary_crossentropy loss.
A total of 12 labels/pathologies were included in the training and encoded using the 'uzeroes' method.
The traing set included 142320 number of sample, the validation set 36162, and the test set 44932.
# if there are any metrics that were renamed, add this new name here as ("default_name":"new_name")
metric_custom_names={"auc":"AUC_ROC"}
metric_names = [re.sub("([a-z0-9])([A-Z])","\g<1> \g<2>",name) for name in data["benchmark"]["metrics"]]
metric_keys = [re.sub("([a-z0-9])([A-Z])","\g<1>_\g<2>",name).lower() for name in data["benchmark"]["metrics"]]
for default_name, custom_name in metric_custom_names.items():
if not default_name in history.keys() and default_name in metric_keys:
#replace default name with custom name
metric_keys[metric_keys.index(default_name)]=custom_name
def print_or_plot_metric(metric_key, metric_name, figure_name):
if len(history[metric_key]) == 1:
print("Data for {m_name} only available for a single epoch. \nSkipping plot and printing data...".format(m_name=metric_name))
print('Train {}: '.format(metric_name), history[metric_key])
print('Validation {}: '.format(metric_name), history['val_'+metric_key])
print()
else:
plot_epoch_metric(metric_key, metric_name, figure_name)
def plot_epoch_metric(metric_key, metric_name, figure_name):
figure(num=None, figsize=(10, 6))
plt.plot(history[metric_key])
if 'val_'+metric_key in history.keys():
plt.plot(history['val_'+metric_key])
plt.title(figure_name)
plt.ylabel(metric_name)
plt.xlabel('Epoch')
if 'val_'+metric_key in history.keys():
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
for i, metric_key in enumerate(metric_keys):
print_or_plot_metric(metric_key, metric_names[i], "Model "+metric_names[i])
print_or_plot_metric("loss", "Loss", "Model loss")
if "lr" in history.keys():
plot_epoch_metric("lr", "Learning Rate", "Learning Rate")
if 'classification_report' in data.keys() and data['classification_report']:
print(data['classification_report'])
precision recall f1-score support
Enlarged Cardiomediastinum 0.05 0.84 0.09 2214
Cardiomegaly 0.11 0.02 0.03 5294
Lung Opacity 0.47 0.75 0.58 21324
Lung Lesion 0.04 0.83 0.08 1901
Edema 0.24 0.10 0.14 10461
Consolidation 0.07 0.43 0.12 3063
Pneumonia 0.03 0.53 0.05 1225
Atelectasis 0.15 0.93 0.26 6912
Pneumothorax 0.09 0.99 0.16 3894
Pleural Effusion 0.39 0.54 0.45 17656
Pleural Other 0.02 0.86 0.03 747
Fracture 0.04 0.96 0.08 1863
micro avg 0.13 0.58 0.21 76554
macro avg 0.14 0.65 0.17 76554
weighted avg 0.29 0.58 0.33 76554
samples avg 0.12 0.49 0.18 76554
if 'test' in data.keys() and data['test']:
for score_name, score in data["test"].items():
print('Test {}: '.format(score_name), score)
Test auc: 0.617991030216217
Test precision: 0.1574753373861313
Test recall: 0.718407928943634
Test f2_score: 0.41953110694885254
Test binary_accuracy: 0.41429322957992554
Test accuracy_enlarged_cardiomediastinum: 0.1953618824481964
Test accuracy_cardiomegaly: 0.8748775720596313
Test accuracy_lung_opacity: 0.6179782748222351
Test accuracy_lung_lesion: 0.18841806054115295
Test accuracy_edema: 0.7437238693237305
Test accuracy_consolidation: 0.6109899282455444
Test accuracy_pneumonia: 0.455755352973938
Test accuracy_atelectasis: 0.21394552290439606
Test accuracy_pneumothorax: 0.10150894522666931
Test accuracy_pleural_effusion: 0.7284340858459473
Test accuracy_pleural_other: 0.15799430012702942
Test accuracy_fracture: 0.08256921917200089
Test auc_enlarged_cardiomediastinum: 0.5088314414024353
Test auc_cardiomegaly: 0.6645708680152893
Test auc_lung_opacity: 0.6906000971794128
Test auc_lung_lesion: 0.5080466270446777
Test auc_edema: 0.6834651231765747
Test auc_consolidation: 0.6840184330940247
Test auc_pneumonia: 0.5935426950454712
Test auc_atelectasis: 0.6020093560218811
Test auc_pneumothorax: 0.7039108276367188
Test auc_pleural_effusion: 0.818939745426178
Test auc_pleural_other: 0.5319847464561462
Test auc_fracture: 0.42597246170043945
Test precision_enlarged_cardiomediastinum: 0.0494849756360054
Test precision_cardiomegaly: 0.2939698398113251
Test precision_lung_opacity: 0.561536431312561
Test precision_lung_lesion: 0.04318980872631073
Test precision_edema: 0.375
Test precision_consolidation: 0.10915301740169525
Test precision_pneumonia: 0.03286007419228554
Test precision_atelectasis: 0.16142642498016357
Test precision_pneumothorax: 0.08778393268585205
Test precision_pleural_effusion: 0.6134359240531921
Test precision_pleural_other: 0.01801310107111931
Test precision_fracture: 0.04151620343327522
Test recall_enlarged_cardiomediastinum: 0.8419150710105896
Test recall_cardiomegaly: 0.04420098289847374
Test recall_lung_opacity: 0.8898892998695374
Test recall_lung_lesion: 0.8595476150512695
Test recall_edema: 0.15113277733325958
Test recall_consolidation: 0.6571988463401794
Test recall_pneumonia: 0.6669387817382812
Test recall_atelectasis: 0.9797453880310059
Test recall_pneumothorax: 0.9974319338798523
Test recall_pleural_effusion: 0.8352401256561279
Test recall_pleural_other: 0.9277108311653137
Test recall_fracture: 0.95652174949646
Test f2_score_enlarged_cardiomediastinum: 0.20032671093940735
Test f2_score_cardiomegaly: 0.05324959009885788
Test f2_score_lung_opacity: 0.7967150211334229
Test f2_score_lung_lesion: 0.17980940639972687
Test f2_score_edema: 0.17162397503852844
Test f2_score_consolidation: 0.327914297580719
Test f2_score_pneumonia: 0.13725095987319946
Test f2_score_atelectasis: 0.48650121688842773
Test f2_score_pneumothorax: 0.32463517785072327
Test f2_score_pleural_effusion: 0.7789127826690674
Test f2_score_pleural_other: 0.08357453346252441
Test f2_score_fracture: 0.17687344551086426
pp = pprint.PrettyPrinter(indent=4)
if "benchmark" in data.keys():
pp.pprint(data["benchmark"])
{ 'batch_size': 32,
'benchmark_name': 'Chexpert_CWBCE_E5_B32_C0_N12_D256_Masked_NoFinding',
'crop': False,
'dataset_folder': 'data/chexpert/full',
'dataset_name': 'chexpert_full',
'dim': [256, 256],
'drop_last': True,
'epochs': 5,
'label_columns': [ 'Enlarged Cardiomediastinum',
'Cardiomegaly',
'Lung Opacity',
'Lung Lesion',
'Edema',
'Consolidation',
'Pneumonia',
'Atelectasis',
'Pneumothorax',
'Pleural Effusion',
'Pleural Other',
'Fracture'],
'loss': 'weighted_binary_crossentropy',
'metrics': [ 'auc',
'precision',
'recall',
'f2_score',
'binary_accuracy',
'accuracy_enlarged_cardiomediastinum',
'accuracy_cardiomegaly',
'accuracy_lung_opacity',
'accuracy_lung_lesion',
'accuracy_edema',
'accuracy_consolidation',
'accuracy_pneumonia',
'accuracy_atelectasis',
'accuracy_pneumothorax',
'accuracy_pleural_effusion',
'accuracy_pleural_other',
'accuracy_fracture',
'auc_enlarged_cardiomediastinum',
'auc_cardiomegaly',
'auc_lung_opacity',
'auc_lung_lesion',
'auc_edema',
'auc_consolidation',
'auc_pneumonia',
'auc_atelectasis',
'auc_pneumothorax',
'auc_pleural_effusion',
'auc_pleural_other',
'auc_fracture',
'precision_enlarged_cardiomediastinum',
'precision_cardiomegaly',
'precision_lung_opacity',
'precision_lung_lesion',
'precision_edema',
'precision_consolidation',
'precision_pneumonia',
'precision_atelectasis',
'precision_pneumothorax',
'precision_pleural_effusion',
'precision_pleural_other',
'precision_fracture',
'recall_enlarged_cardiomediastinum',
'recall_cardiomegaly',
'recall_lung_opacity',
'recall_lung_lesion',
'recall_edema',
'recall_consolidation',
'recall_pneumonia',
'recall_atelectasis',
'recall_pneumothorax',
'recall_pleural_effusion',
'recall_pleural_other',
'recall_fracture',
'f2_score_enlarged_cardiomediastinum',
'f2_score_cardiomegaly',
'f2_score_lung_opacity',
'f2_score_lung_lesion',
'f2_score_edema',
'f2_score_consolidation',
'f2_score_pneumonia',
'f2_score_atelectasis',
'f2_score_pneumothorax',
'f2_score_pleural_effusion',
'f2_score_pleural_other',
'f2_score_fracture'],
'models_dir': 'models',
'n_channels': 3,
'nan_replacement': -1,
'negative_weights': [ 1.222679853439331,
1.705918550491333,
4.265995025634766,
1.372056245803833,
2.003997325897217,
1.2122849225997925,
1.1395741701126099,
1.5882190465927124,
1.2877885103225708,
2.468735694885254,
1.1382708549499512,
1.3659368753433228],
'optimizer': 'Adam',
'path_column': 'Path',
'path_column_prefix': '',
'positive_weights': [ 5.49075174331665,
2.4165940284729004,
1.3061853647232056,
3.6877658367156982,
1.996018648147583,
5.710649490356445,
8.164649963378906,
2.7000467777252197,
4.474773406982422,
1.6808576583862305,
8.232182502746582,
3.7327115535736084],
'shuffle': True,
'split_seed': 6122156,
'test_num_samples': 44932,
'train_num_samples': 142320,
'u_enc': 'uzeroes',
'unc_value': -1,
'use_class_weights': False,
'valid_num_samples': 36162}
if 'benchmark' in data.keys() and 'split_seed' in data['benchmark']:
benchmark = data['benchmark']
dataset_path = Path(benchmark['dataset_folder'])
train_labels = benchmark['train_labels'] if 'train_labels' in benchmark.keys() else 'train.csv'
split_test_size = benchmark['split_test_size'] if 'split_test_size' in benchmark.keys() else 0.2
split_valid_size = benchmark['split_valid_size'] if 'split_valid_size' in benchmark.keys() else 0.2
split_group = benchmark['split_group'] if 'split_group' in benchmark.keys() else 'patient_id'
split_seed = benchmark['split_seed']
all_labels = pd.read_csv(dataset_path / train_labels)
train_labels, test_labels = train_test_split(all_labels, test_size=split_test_size, group=split_group, seed=split_seed)
train_labels, validation_labels = train_test_split(train_labels, test_size=split_valid_size, group=split_group, seed=split_seed)
from src.datasets.u_encoding import uencode
def get_distribution(labels):
if 'nan_replacement' in benchmark.keys():
labels = labels.fillna(benchmark['nan_replacement'])
data = labels.to_numpy()
data = uencode(benchmark['u_enc'], data, unc_value=benchmark['unc_value'])
data = pd.DataFrame(data, columns=labels.columns)
labels = data[benchmark['label_columns']]
d = {'Pathology': [], 'Positive': [], 'Positive %': [], 'Negative': [], 'Negative %': [],}
for label in labels.columns:
values = labels.groupby(label)
d['Pathology'].append(label)
positive = values.size()[1.0] if 1.0 in values.size() else 0
positive_percent = positive / labels.shape[0] * 100
d['Positive'].append(positive)
d['Positive %'].append(round(positive_percent))
negative = values.size()[-0.0] if -0.0 in values.size() else 0
negative_percent = negative / labels.shape[0] * 100
d['Negative'].append(negative)
d['Negative %'].append(round(negative_percent))
df = pd.DataFrame(d)
df = df.set_index('Pathology')
return df
if 'benchmark' in data.keys() and 'split_seed' in data['benchmark']:
train = get_distribution(train_labels)
val = get_distribution(validation_labels)
test = get_distribution(test_labels)
positives = train[['Positive %']].merge(val[['Positive %']], left_index=True, right_index=True).merge(test[['Positive %']], left_index=True, right_index=True).rename(columns={"Positive %_x": "Positives Train", "Positive %_y": "Positives Validation", "Positive %": "Positives Test", })
positives.copy().plot(kind='bar', figsize=(10,7), title="Positive Labels Distribution")
negatives = train[['Negative %']].merge(val[['Negative %']], left_index=True, right_index=True).merge(test[['Negative %']], left_index=True, right_index=True).rename(columns={"Negative %_x": "Negative Train", "Negative %_y": "Negative Validation", "Negative %": "Negative Test", })
negatives.copy().plot(kind='bar', figsize=(10,7), title="Negative Labels Distribution")
train[['Positive %', 'Negative %']].copy().plot(kind='bar', figsize=(10,7), title="Training set")
val[['Positive %', 'Negative %']].copy().plot(kind='bar', figsize=(10,7), title="Validation set")
test[['Positive %', 'Negative %']].copy().plot(kind='bar', figsize=(10,7), title="Test set")