InceptionV3_Chexpert_CWBCE_E10_B32_C1_N12 - TobiasSchmidtDE/DeepL-MedicalImaging GitHub Wiki
Version: 1
Trained InceptionV3 architecture using the 'Chexpert_CWBCE_E10_B32_C1_N12' benchmark. The benchmark was initialized for the chexpert_preprocessed-256-crop dataset with batch size of 32, shuffle set to True and images rescaled to dimension (256, 256). The training was done for 10 epochs using the Adam optimizer and weighted_binary_crossentropy loss. A total of 12 labels/pathologies were included in the training and encoded using the 'uzeroes' method. The traing set included 142320 number of sample, the validation set 36162, and the test set 44932.
from pathlib import Path
from dotenv import load_dotenv, find_dotenv
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import json
import os
import re
import pprint
basepath = Path(os.getcwd())
if basepath.name != "idp-radio-1":
os.chdir(basepath.parent.parent)
print(os.getcwd())
load_dotenv(find_dotenv())
from src.preprocessing.split.train_test_split import train_test_split
/srv/idp-radio-1
data = json.loads(os.environ['EXP_DATA'])
history = data['history']
for s in data["description"].split(".")[:-1]:
print(s + ".\n")
Trained InceptionV3 architecture using the 'Chexpert_CWBCE_E10_B32_C1_N12' benchmark.
The benchmark was initialized for the chexpert_preprocessed-256-crop dataset with batch size of 32, shuffle set to True and images rescaled to dimension (256, 256).
The training was done for 10 epochs using the Adam optimizer and weighted_binary_crossentropy loss.
A total of 12 labels/pathologies were included in the training and encoded using the 'uzeroes' method.
The traing set included 142320 number of sample, the validation set 36162, and the test set 44932.
# if there are any metrics that were renamed, add this new name here as ("default_name":"new_name")
metric_custom_names={"auc":"AUC_ROC"}
metric_names = [re.sub("([a-z0-9])([A-Z])","\g<1> \g<2>",name) for name in data["benchmark"]["metrics"]]
metric_keys = [re.sub("([a-z0-9])([A-Z])","\g<1>_\g<2>",name).lower() for name in data["benchmark"]["metrics"]]
for default_name, custom_name in metric_custom_names.items():
if not default_name in history.keys() and default_name in metric_keys:
#replace default name with custom name
metric_keys[metric_keys.index(default_name)]=custom_name
def print_or_plot_metric(metric_key, metric_name, figure_name):
if len(history[metric_key]) == 1:
print("Data for {m_name} only available for a single epoch. \nSkipping plot and printing data...".format(m_name=metric_name))
print('Train {}: '.format(metric_name), history[metric_key])
print('Validation {}: '.format(metric_name), history['val_'+metric_key])
print()
else:
plot_epoch_metric(metric_key, metric_name, figure_name)
def plot_epoch_metric(metric_key, metric_name, figure_name):
figure(num=None, figsize=(10, 6))
plt.plot(history[metric_key])
if 'val_'+metric_key in history.keys():
plt.plot(history['val_'+metric_key])
plt.title(figure_name)
plt.ylabel(metric_name)
plt.xlabel('Epoch')
if 'val_'+metric_key in history.keys():
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
for i, metric_key in enumerate(metric_keys):
print_or_plot_metric(metric_key, metric_names[i], "Model "+metric_names[i])
print_or_plot_metric("loss", "Loss", "Model loss")
if "lr" in history.keys():
plot_epoch_metric("lr", "Learning Rate", "Learning Rate")
if 'classification_report' in data.keys() and data['classification_report']:
print(data['classification_report'])
precision recall f1-score support
Enlarged Cardiomediastinum 0.05 0.50 0.09 2214
Cardiomegaly 0.11 0.35 0.17 5294
Lung Opacity 0.47 0.58 0.52 21324
Lung Lesion 0.04 0.35 0.07 1901
Edema 0.23 0.42 0.30 10461
Consolidation 0.07 0.42 0.12 3063
Pneumonia 0.03 0.33 0.05 1225
Atelectasis 0.16 0.46 0.23 6912
Pneumothorax 0.09 0.22 0.12 3894
Pleural Effusion 0.40 0.45 0.42 17656
Pleural Other 0.02 0.29 0.03 747
Fracture 0.04 0.42 0.08 1863
micro avg 0.16 0.46 0.24 76554
macro avg 0.14 0.40 0.18 76554
weighted avg 0.29 0.46 0.33 76554
samples avg 0.15 0.38 0.20 76554
if 'test' in data.keys() and data['test']:
for score_name, score in data["test"].items():
print('Test {}: '.format(score_name), score)
Test loss: 1.2296303510665894
Test auc: 0.7467536330223083
Test precision: 0.26566487550735474
Test recall: 0.7385374903678894
Test f2_score: 0.5446474552154541
Test binary_accuracy: 0.6730316877365112
pp = pprint.PrettyPrinter(indent=4)
if "benchmark" in data.keys():
pp.pprint(data["benchmark"])
{ 'batch_size': 32,
'benchmark_name': 'Chexpert_CWBCE_E10_B32_C1_N12',
'crop': False,
'dataset_folder': 'data/chexpert/preprocessed-256-crop',
'dataset_name': 'chexpert_preprocessed-256-crop',
'dim': [256, 256],
'drop_last': True,
'epochs': 10,
'label_columns': [ 'Enlarged Cardiomediastinum',
'Cardiomegaly',
'Lung Opacity',
'Lung Lesion',
'Edema',
'Consolidation',
'Pneumonia',
'Atelectasis',
'Pneumothorax',
'Pleural Effusion',
'Pleural Other',
'Fracture'],
'loss': 'weighted_binary_crossentropy',
'metrics': ['auc', 'precision', 'recall', 'f2_score', 'binary_accuracy'],
'models_dir': 'models',
'n_channels': 3,
'nan_replacement': 0,
'negative_weights': [ 1.050268292427063,
1.1378039121627808,
1.8928438425064087,
1.0430703163146973,
1.3037590980529785,
1.0706472396850586,
1.0274509191513062,
1.1762994527816772,
1.0968482494354248,
1.6219568252563477,
1.0159201622009277,
1.041581630706787],
'optimizer': 'Adam',
'path_column': 'Path',
'path_column_prefix': '',
'positive_weights': [ 20.893260955810547,
8.25668716430664,
2.120016574859619,
24.21783447265625,
4.292082786560059,
15.154845237731934,
37.4287223815918,
6.672168254852295,
11.325428009033203,
2.6078288555145264,
63.813453674316406,
25.049110412597656],
'shuffle': True,
'split_seed': 6122156,
'test_num_samples': 44932,
'train_num_samples': 142320,
'u_enc': 'uzeroes',
'unc_value': -1,
'use_class_weights': False,
'valid_num_samples': 36162}
if 'benchmark' in data.keys() and 'split_seed' in data['benchmark']:
benchmark = data['benchmark']
dataset_path = Path(benchmark['dataset_folder'])
train_labels = benchmark['train_labels'] if 'train_labels' in benchmark.keys() else 'train.csv'
split_test_size = benchmark['split_test_size'] if 'split_test_size' in benchmark.keys() else 0.2
split_valid_size = benchmark['split_valid_size'] if 'split_valid_size' in benchmark.keys() else 0.2
split_group = benchmark['split_group'] if 'split_group' in benchmark.keys() else 'patient_id'
split_seed = benchmark['split_seed']
all_labels = pd.read_csv(dataset_path / train_labels)
train_labels, test_labels = train_test_split(all_labels, test_size=split_test_size, group=split_group, seed=split_seed)
train_labels, validation_labels = train_test_split(train_labels, test_size=split_valid_size, group=split_group, seed=split_seed)
from src.datasets.u_encoding import uencode
def get_distribution(labels):
if 'nan_replacement' in benchmark.keys():
labels = labels.fillna(benchmark['nan_replacement'])
data = labels.to_numpy()
data = uencode(benchmark['u_enc'], data, unc_value=benchmark['unc_value'])
data = pd.DataFrame(data, columns=labels.columns)
labels = data[benchmark['label_columns']]
d = {'Pathology': [], 'Positive': [], 'Positive %': [], 'Negative': [], 'Negative %': [],}
for label in labels.columns:
values = labels.groupby(label)
d['Pathology'].append(label)
positive = values.size()[1.0] if 1.0 in values.size() else 0
positive_percent = positive / labels.shape[0] * 100
d['Positive'].append(positive)
d['Positive %'].append(round(positive_percent))
negative = values.size()[-0.0] if -0.0 in values.size() else 0
negative_percent = negative / labels.shape[0] * 100
d['Negative'].append(negative)
d['Negative %'].append(round(negative_percent))
df = pd.DataFrame(d)
df = df.set_index('Pathology')
return df
if 'benchmark' in data.keys() and 'split_seed' in data['benchmark']:
train = get_distribution(train_labels)
val = get_distribution(validation_labels)
test = get_distribution(test_labels)
positives = train[['Positive %']].merge(val[['Positive %']], left_index=True, right_index=True).merge(test[['Positive %']], left_index=True, right_index=True).rename(columns={"Positive %_x": "Positives Train", "Positive %_y": "Positives Validation", "Positive %": "Positives Test", })
positives.copy().plot(kind='bar', figsize=(10,7), title="Positive Labels Distribution")
negatives = train[['Negative %']].merge(val[['Negative %']], left_index=True, right_index=True).merge(test[['Negative %']], left_index=True, right_index=True).rename(columns={"Negative %_x": "Negative Train", "Negative %_y": "Negative Validation", "Negative %": "Negative Test", })
negatives.copy().plot(kind='bar', figsize=(10,7), title="Negative Labels Distribution")
train[['Positive %', 'Negative %']].copy().plot(kind='bar', figsize=(10,7), title="Training set")
val[['Positive %', 'Negative %']].copy().plot(kind='bar', figsize=(10,7), title="Validation set")
test[['Positive %', 'Negative %']].copy().plot(kind='bar', figsize=(10,7), title="Test set")