DenseNet121_Chexpert_BCE_E5_B32_C0_N5_D256_DS9505_LR4 - TobiasSchmidtDE/DeepL-MedicalImaging GitHub Wiki
Version: 1
Trained DenseNet121 architecture using the 'Chexpert_BCE_E5_B32_C0_N5_D256_DS9505_LR4' benchmark. The benchmark was initialized for the chexpert_full dataset with batch size of 32, shuffle set to True and images rescaled to dimension (256, 256). The training was done for 5 epochs using the Adam optimizer and binary_crossentropy loss. A total of 5 labels/pathologies were included in the training and encoded using the 'uzeroes' method. The traing set included 211818 number of sample, the validation set 11596, and the test set 234.
from pathlib import Path
from dotenv import load_dotenv, find_dotenv
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import json
import os
import re
import pprint
basepath = Path(os.getcwd())
if basepath.name != "idp-radio-1":
os.chdir(basepath.parent.parent)
print(os.getcwd())
load_dotenv(find_dotenv())
from src.preprocessing.split.train_test_split import train_test_split
/srv/idp-radio-1
data = json.loads(os.environ['EXP_DATA'])
history = data['history']
for s in data["description"].split(".")[:-1]:
print(s + ".\n")
Trained DenseNet121 architecture using the 'Chexpert_BCE_E5_B32_C0_N5_D256_DS9505_LR4' benchmark.
The benchmark was initialized for the chexpert_full dataset with batch size of 32, shuffle set to True and images rescaled to dimension (256, 256).
The training was done for 5 epochs using the Adam optimizer and binary_crossentropy loss.
A total of 5 labels/pathologies were included in the training and encoded using the 'uzeroes' method.
The traing set included 211818 number of sample, the validation set 11596, and the test set 234.
# if there are any metrics that were renamed, add this new name here as ("default_name":"new_name")
metric_custom_names={"auc":"AUC_ROC"}
metric_names = [re.sub("([a-z0-9])([A-Z])","\g<1> \g<2>",name) for name in data["benchmark"]["metrics"]]
metric_keys = [re.sub("([a-z0-9])([A-Z])","\g<1>_\g<2>",name).lower() for name in data["benchmark"]["metrics"]]
for default_name, custom_name in metric_custom_names.items():
if not default_name in history.keys() and default_name in metric_keys:
#replace default name with custom name
metric_keys[metric_keys.index(default_name)]=custom_name
def print_or_plot_metric(metric_key, metric_name, figure_name):
if len(history[metric_key]) == 1:
print("Data for {m_name} only available for a single epoch. \nSkipping plot and printing data...".format(m_name=metric_name))
print('Train {}: '.format(metric_name), history[metric_key])
print('Validation {}: '.format(metric_name), history['val_'+metric_key])
print()
else:
plot_epoch_metric(metric_key, metric_name, figure_name)
def plot_epoch_metric(metric_key, metric_name, figure_name):
figure(num=None, figsize=(10, 6))
plt.plot(history[metric_key])
if 'val_'+metric_key in history.keys():
plt.plot(history['val_'+metric_key])
plt.title(figure_name)
plt.ylabel(metric_name)
plt.xlabel('Epoch')
if 'val_'+metric_key in history.keys():
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
for i, metric_key in enumerate(metric_keys):
print_or_plot_metric(metric_key, metric_names[i], "Model "+metric_names[i])
print_or_plot_metric("loss", "Loss", "Model loss")
if "lr" in history.keys():
plot_epoch_metric("lr", "Learning Rate", "Learning Rate")
if 'classification_report' in data.keys() and data['classification_report']:
print(data['classification_report'])
precision recall f1-score support
Cardiomegaly 0.44 0.12 0.19 68
Edema 0.20 0.29 0.24 45
Consolidation 0.00 0.00 0.00 33
Atelectasis 0.29 0.03 0.05 80
Pleural Effusion 0.36 0.31 0.34 67
micro avg 0.30 0.15 0.20 293
macro avg 0.26 0.15 0.16 293
weighted avg 0.29 0.15 0.17 293
samples avg 0.13 0.08 0.09 293
if 'test' in data.keys() and data['test']:
for score_name, score in data["test"].items():
print('Test {}: '.format(score_name), score)
Test loss: 0.5014373064041138
Test auc: 0.8351618051528931
Test precision: 0.662162184715271
Test recall: 0.3344709873199463
Test f2_score: 0.37121209502220154
Test binary_accuracy: 0.7905986905097961
Test accuracy_cardiomegaly: 0.7606837749481201
Test accuracy_edema: 0.8034188151359558
Test accuracy_consolidation: 0.8589743375778198
Test accuracy_atelectasis: 0.688034176826477
Test accuracy_pleural_effusion: 0.8418803215026855
Test auc_cardiomegaly: 0.7818036675453186
Test auc_edema: 0.877366304397583
Test auc_consolidation: 0.8826322555541992
Test auc_atelectasis: 0.7445617318153381
Test auc_pleural_effusion: 0.8894450068473816
Test precision_cardiomegaly: 0.8333333134651184
Test precision_edema: 0.4923076927661896
Test precision_consolidation: 0.0
Test precision_atelectasis: 1.0
Test precision_pleural_effusion: 0.7586206793785095
Test recall_cardiomegaly: 0.22058823704719543
Test recall_edema: 0.7111111283302307
Test recall_consolidation: 0.0
Test recall_atelectasis: 0.08749999850988388
Test recall_pleural_effusion: 0.6567164063453674
Test f2_score_cardiomegaly: 0.2586207091808319
Test f2_score_edema: 0.6530612111091614
Test f2_score_consolidation: 0.0
Test f2_score_atelectasis: 0.1070336326956749
Test f2_score_pleural_effusion: 0.6748466491699219
pp = pprint.PrettyPrinter(indent=4)
if "benchmark" in data.keys():
pp.pprint(data["benchmark"])
{ 'augmentation': None,
'batch_size': 32,
'benchmark_name': 'Chexpert_BCE_E5_B32_C0_N5_D256_DS9505_LR4',
'crop': False,
'dataset_folder': 'data/chexpert/full',
'dataset_name': 'chexpert_full',
'dim': [256, 256],
'drop_last': False,
'epochs': 5,
'label_columns': [ 'Cardiomegaly',
'Edema',
'Consolidation',
'Atelectasis',
'Pleural Effusion'],
'learning_rate': 5e-05,
'loss': 'binary_crossentropy',
'metrics': [ 'auc',
'precision',
'recall',
'f2_score',
'binary_accuracy',
'accuracy_cardiomegaly',
'accuracy_edema',
'accuracy_consolidation',
'accuracy_atelectasis',
'accuracy_pleural_effusion',
'auc_cardiomegaly',
'auc_edema',
'auc_consolidation',
'auc_atelectasis',
'auc_pleural_effusion',
'precision_cardiomegaly',
'precision_edema',
'precision_consolidation',
'precision_atelectasis',
'precision_pleural_effusion',
'recall_cardiomegaly',
'recall_edema',
'recall_consolidation',
'recall_atelectasis',
'recall_pleural_effusion',
'f2_score_cardiomegaly',
'f2_score_edema',
'f2_score_consolidation',
'f2_score_atelectasis',
'f2_score_pleural_effusion'],
'models_dir': 'models',
'n_channels': 3,
'nan_replacement': 0,
'negative_weights': [ 1.136679768562317,
1.3051981925964355,
1.0711511373519897,
1.1755650043487549,
1.6276289224624634],
'num_samples_test': 234,
'num_samples_train': 211818,
'num_samples_validation': 11596,
'optimizer': 'Adam',
'path_column': 'Path',
'path_column_prefix': '',
'positive_weights': [ 8.31637191772461,
4.276559829711914,
15.054584503173828,
6.695896625518799,
2.5932981967926025],
'shuffle': True,
'split_seed': 6122156,
'u_enc': 'uzeroes',
'unc_value': -1,
'use_class_weights': False}
if 'benchmark' in data.keys() and 'split_seed' in data['benchmark']:
benchmark = data['benchmark']
dataset_path = Path(benchmark['dataset_folder'])
train_labels = benchmark['train_labels'] if 'train_labels' in benchmark.keys() else 'train.csv'
test_labels = benchmark['test_labels'] if 'test_labels' in benchmark.keys() else None
split_test_size = benchmark['split_test_size'] if 'split_test_size' in benchmark.keys() else 0.1
split_valid_size = benchmark['split_valid_size'] if 'split_valid_size' in benchmark.keys() else 0.1
split_group = benchmark['split_group'] if 'split_group' in benchmark.keys() else 'patient_id'
split_seed = benchmark['split_seed']
if test_labels is None:
# read all labels from one file and split into train/test/valid
all_labels = pd.read_csv(dataset_path / train_labels)
train_labels, test_labels = train_test_split(
all_labels, test_size=split_test_size, group=split_group, seed=split_seed)
train_labels, validation_labels = train_test_split(
train_labels, test_size=split_valid_size, group=split_group, seed=split_seed)
else:
# read train and valid labels from one file and test from another.
train_labels = pd.read_csv(dataset_path / train_labels)
train_labels, validation_labels = train_test_split(
train_labels, test_size=split_valid_size, group=split_group, seed=split_seed)
test_labels = pd.read_csv(dataset_path / test_labels)
from src.datasets.u_encoding import uencode
def get_distribution(labels):
if 'nan_replacement' in benchmark.keys():
labels = labels.fillna(benchmark['nan_replacement'])
data = labels.to_numpy()
data = uencode(benchmark['u_enc'], data, unc_value=benchmark['unc_value'])
data = pd.DataFrame(data, columns=labels.columns)
labels = data[benchmark['label_columns']]
d = {'Pathology': [], 'Positive': [], 'Positive %': [], 'Negative': [], 'Negative %': [],}
for label in labels.columns:
values = labels.groupby(label)
d['Pathology'].append(label)
positive = values.size()[1.0] if 1.0 in values.size() else 0
positive_percent = positive / labels.shape[0] * 100
d['Positive'].append(positive)
d['Positive %'].append(round(positive_percent))
negative = values.size()[-0.0] if -0.0 in values.size() else 0
negative_percent = negative / labels.shape[0] * 100
d['Negative'].append(negative)
d['Negative %'].append(round(negative_percent))
df = pd.DataFrame(d)
df = df.set_index('Pathology')
return df
if 'benchmark' in data.keys() and 'split_seed' in data['benchmark']:
train = get_distribution(train_labels)
val = get_distribution(validation_labels)
test = get_distribution(test_labels)
positives = train[['Positive %']].merge(val[['Positive %']], left_index=True, right_index=True).merge(test[['Positive %']], left_index=True, right_index=True).rename(columns={"Positive %_x": "Positives Train", "Positive %_y": "Positives Validation", "Positive %": "Positives Test", })
positives.copy().plot(kind='bar', figsize=(10,7), title="Positive Labels Distribution")
negatives = train[['Negative %']].merge(val[['Negative %']], left_index=True, right_index=True).merge(test[['Negative %']], left_index=True, right_index=True).rename(columns={"Negative %_x": "Negative Train", "Negative %_y": "Negative Validation", "Negative %": "Negative Test", })
negatives.copy().plot(kind='bar', figsize=(10,7), title="Negative Labels Distribution")
train[['Positive %', 'Negative %']].copy().plot(kind='bar', figsize=(10,7), title="Training set")
val[['Positive %', 'Negative %']].copy().plot(kind='bar', figsize=(10,7), title="Validation set")
test[['Positive %', 'Negative %']].copy().plot(kind='bar', figsize=(10,7), title="Test set")