DenseNet121_Chexpert_BCE_E3_B32_C1_N12

Version: 1

Trained DenseNet121 architecture using the 'Chexpert_BCE_E3_B32_C1_N12' benchmark. The benchmark was initialized for the chexpert_preprocessed-256-crop dataset with batch size of 32, shuffle set to True and images rescaled to dimension (256, 256). The training was done for 3 epochs using the Adam optimizer and binary_crossentropy loss. A total of 12 labels/pathologies were included in the training and encoded using the 'uzeroes' method. The traing set included 142320 number of sample, the validation set 36162, and the test set 44932.

from pathlib import Path
from dotenv import load_dotenv, find_dotenv
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import json
import os
import re
import pprint

basepath = Path(os.getcwd())
if basepath.name != "idp-radio-1":
    os.chdir(basepath.parent.parent)
    print(os.getcwd())
load_dotenv(find_dotenv())

from src.preprocessing.split.train_test_split import train_test_split

/srv/idp-radio-1

data = json.loads(os.environ['EXP_DATA'])
history = data['history']

Model and Benchmark Summary

for s in data["description"].split(".")[:-1]:
    print(s + ".\n")

Trained DenseNet121 architecture using the 'Chexpert_BCE_E3_B32_C1_N12' benchmark.

 The benchmark was initialized for the chexpert_preprocessed-256-crop dataset with batch size of 32, shuffle set to True and images rescaled to dimension (256, 256).


The training was done for 3 epochs using the Adam optimizer and binary_crossentropy loss.


A total of 12 labels/pathologies were included in the training and encoded using the 'uzeroes' method.


The traing set included 142320 number of sample, the validation set 36162, and the test set 44932.

Extract and format metrics to be plotted

# if there are any metrics that were renamed, add this new name here as ("default_name":"new_name")
metric_custom_names={"auc":"AUC_ROC"}

metric_names = [re.sub("([a-z0-9])([A-Z])","\g<1> \g<2>",name) for name in data["benchmark"]["metrics"]]
metric_keys = [re.sub("([a-z0-9])([A-Z])","\g<1>_\g<2>",name).lower() for name in data["benchmark"]["metrics"]]

for default_name, custom_name in metric_custom_names.items():
    if not default_name in history.keys() and default_name in metric_keys:
        #replace default name with custom name
        metric_keys[metric_keys.index(default_name)]=custom_name

Plot training & validation accuracy values

def print_or_plot_metric(metric_key, metric_name, figure_name):
    if len(history[metric_key]) == 1:
        print("Data for {m_name} only available for a single epoch. \nSkipping plot and printing data...".format(m_name=metric_name))
        print('Train {}: '.format(metric_name), history[metric_key])
        print('Validation {}: '.format(metric_name), history['val_'+metric_key])
        print()        
    else:
        plot_epoch_metric(metric_key, metric_name, figure_name)
        
def plot_epoch_metric(metric_key, metric_name, figure_name):
    figure(num=None, figsize=(10, 6))
    plt.plot(history[metric_key])
    if 'val_'+metric_key in history.keys():
        plt.plot(history['val_'+metric_key])
    plt.title(figure_name)
    plt.ylabel(metric_name)
    plt.xlabel('Epoch')
    if 'val_'+metric_key in history.keys():
        plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()

for i, metric_key in enumerate(metric_keys):
    print_or_plot_metric(metric_key, metric_names[i], "Model "+metric_names[i])

png

Plot training & validation loss values

print_or_plot_metric("loss", "Loss", "Model loss")

png

if "lr" in history.keys():
    plot_epoch_metric("lr", "Learning Rate", "Learning Rate")

png

Classification Report

if 'classification_report' in data.keys() and data['classification_report']:
    print(data['classification_report'])

                            precision    recall  f1-score   support

Enlarged Cardiomediastinum       0.00      0.00      0.00      2214
              Cardiomegaly       0.12      0.02      0.04      5294
              Lung Opacity       0.48      0.21      0.29     21324
               Lung Lesion       0.00      0.00      0.00      1901
                     Edema       0.24      0.04      0.07     10461
             Consolidation       0.00      0.00      0.00      3063
                 Pneumonia       0.00      0.00      0.00      1225
               Atelectasis       0.00      0.00      0.00      6912
              Pneumothorax       0.09      0.05      0.06      3894
          Pleural Effusion       0.39      0.42      0.41     17656
             Pleural Other       0.00      0.00      0.00       747
                  Fracture       0.00      0.00      0.00      1863

                 micro avg       0.38      0.17      0.23     76554
                 macro avg       0.11      0.06      0.07     76554
              weighted avg       0.27      0.17      0.19     76554
               samples avg       0.20      0.14      0.15     76554

Test Scores

if 'test' in data.keys() and data['test']:
    for score_name, score in data["test"].items():
        print('Test {}: '.format(score_name), score)

Test loss:  0.3062105178833008
Test auc:  0.7229323983192444
Test precision:  0.6811800599098206
Test recall:  0.29407998919487
Test f2_score:  0.3317897915840149
Test binary_accuracy:  0.8802287578582764
Test accuracy_enlarged_cardiomediastinum:  0.9507255554199219
Test accuracy_cardiomegaly:  0.8905012011528015
Test accuracy_lung_opacity:  0.6067613363265991
Test accuracy_lung_lesion:  0.9576916098594666
Test accuracy_edema:  0.7808243632316589
Test accuracy_consolidation:  0.9318303465843201
Test accuracy_pneumonia:  0.9727365970611572
Test accuracy_atelectasis:  0.8461675643920898
Test accuracy_pneumothorax:  0.9101976156234741
Test accuracy_pleural_effusion:  0.7734131813049316
Test accuracy_pleural_other:  0.9833748936653137
Test accuracy_fracture:  0.9585373401641846
Test auc_enlarged_cardiomediastinum:  0.6274955868721008
Test auc_cardiomegaly:  0.827772855758667
Test auc_lung_opacity:  0.70615553855896
Test auc_lung_lesion:  0.6908576488494873
Test auc_edema:  0.7949528694152832
Test auc_consolidation:  0.7065780758857727
Test auc_pneumonia:  0.6720331907272339
Test auc_atelectasis:  0.6417831778526306
Test auc_pneumothorax:  0.7809439301490784
Test auc_pleural_effusion:  0.8473460078239441
Test auc_pleural_other:  0.7237358093261719
Test auc_fracture:  0.6555337309837341
Test precision_enlarged_cardiomediastinum:  0.0
Test precision_cardiomegaly:  0.6904277205467224
Test precision_lung_opacity:  0.6962310671806335
Test precision_lung_lesion:  0.0
Test precision_edema:  0.6720943450927734
Test precision_consolidation:  0.0
Test precision_pneumonia:  0.0
Test precision_atelectasis:  0.0
Test precision_pneumothorax:  0.4641586244106293
Test precision_pleural_effusion:  0.6966381072998047
Test precision_pleural_other:  0.0
Test precision_fracture:  0.0
Test recall_enlarged_cardiomediastinum:  0.0
Test recall_cardiomegaly:  0.12806951999664307
Test recall_lung_opacity:  0.30407053232192993
Test recall_lung_lesion:  0.0
Test recall_edema:  0.11442500352859497
Test recall_consolidation:  0.0
Test recall_pneumonia:  0.0
Test recall_atelectasis:  0.0
Test recall_pneumothorax:  0.23446327447891235
Test recall_pleural_effusion:  0.7499433755874634
Test recall_pleural_other:  0.0
Test recall_fracture:  0.0
Test f2_score_enlarged_cardiomediastinum:  0.0
Test f2_score_cardiomegaly:  0.1529921442270279
Test f2_score_lung_opacity:  0.34267351031303406
Test f2_score_lung_lesion:  0.0
Test f2_score_edema:  0.1371919810771942
Test f2_score_consolidation:  0.0
Test f2_score_pneumonia:  0.0
Test f2_score_atelectasis:  0.0
Test f2_score_pneumothorax:  0.26021772623062134
Test f2_score_pleural_effusion:  0.7386395335197449
Test f2_score_pleural_other:  0.0
Test f2_score_fracture:  0.0

Benchmark Details

pp = pprint.PrettyPrinter(indent=4)
if "benchmark" in data.keys():
    pp.pprint(data["benchmark"])

{   'batch_size': 32,
    'benchmark_name': 'Chexpert_BCE_E3_B32_C1_N12',
    'crop': False,
    'dataset_folder': 'data/chexpert/preprocessed-256-crop',
    'dataset_name': 'chexpert_preprocessed-256-crop',
    'dim': [256, 256],
    'drop_last': True,
    'epochs': 3,
    'label_columns': [   'Enlarged Cardiomediastinum',
                         'Cardiomegaly',
                         'Lung Opacity',
                         'Lung Lesion',
                         'Edema',
                         'Consolidation',
                         'Pneumonia',
                         'Atelectasis',
                         'Pneumothorax',
                         'Pleural Effusion',
                         'Pleural Other',
                         'Fracture'],
    'loss': 'binary_crossentropy',
    'metrics': [   'auc',
                   'precision',
                   'recall',
                   'f2_score',
                   'binary_accuracy',
                   'accuracy_enlarged_cardiomediastinum',
                   'accuracy_cardiomegaly',
                   'accuracy_lung_opacity',
                   'accuracy_lung_lesion',
                   'accuracy_edema',
                   'accuracy_consolidation',
                   'accuracy_pneumonia',
                   'accuracy_atelectasis',
                   'accuracy_pneumothorax',
                   'accuracy_pleural_effusion',
                   'accuracy_pleural_other',
                   'accuracy_fracture',
                   'auc_enlarged_cardiomediastinum',
                   'auc_cardiomegaly',
                   'auc_lung_opacity',
                   'auc_lung_lesion',
                   'auc_edema',
                   'auc_consolidation',
                   'auc_pneumonia',
                   'auc_atelectasis',
                   'auc_pneumothorax',
                   'auc_pleural_effusion',
                   'auc_pleural_other',
                   'auc_fracture',
                   'precision_enlarged_cardiomediastinum',
                   'precision_cardiomegaly',
                   'precision_lung_opacity',
                   'precision_lung_lesion',
                   'precision_edema',
                   'precision_consolidation',
                   'precision_pneumonia',
                   'precision_atelectasis',
                   'precision_pneumothorax',
                   'precision_pleural_effusion',
                   'precision_pleural_other',
                   'precision_fracture',
                   'recall_enlarged_cardiomediastinum',
                   'recall_cardiomegaly',
                   'recall_lung_opacity',
                   'recall_lung_lesion',
                   'recall_edema',
                   'recall_consolidation',
                   'recall_pneumonia',
                   'recall_atelectasis',
                   'recall_pneumothorax',
                   'recall_pleural_effusion',
                   'recall_pleural_other',
                   'recall_fracture',
                   'f2_score_enlarged_cardiomediastinum',
                   'f2_score_cardiomegaly',
                   'f2_score_lung_opacity',
                   'f2_score_lung_lesion',
                   'f2_score_edema',
                   'f2_score_consolidation',
                   'f2_score_pneumonia',
                   'f2_score_atelectasis',
                   'f2_score_pneumothorax',
                   'f2_score_pleural_effusion',
                   'f2_score_pleural_other',
                   'f2_score_fracture'],
    'models_dir': 'models',
    'n_channels': 3,
    'nan_replacement': 0,
    'negative_weights': [   1.0502760410308838,
                            1.1378039121627808,
                            1.8929194211959839,
                            1.0430703163146973,
                            1.3037710189819336,
                            1.0706310272216797,
                            1.0274434089660645,
                            1.1763091087341309,
                            1.0968482494354248,
                            1.6220307350158691,
                            1.0159274339675903,
                            1.04159677028656],
    'optimizer': 'Adam',
    'path_column': 'Path',
    'path_column_prefix': '',
    'positive_weights': [   20.890193939208984,
                            8.25668716430664,
                            2.119921922683716,
                            24.21783447265625,
                            4.291953086853027,
                            15.158074378967285,
                            37.438568115234375,
                            6.671855449676514,
                            11.325428009033203,
                            2.607637643814087,
                            63.78485107421875,
                            25.04029655456543],
    'shuffle': True,
    'split_seed': 6122156,
    'test_num_samples': 44932,
    'train_num_samples': 142320,
    'u_enc': 'uzeroes',
    'unc_value': -1,
    'use_class_weights': False,
    'valid_num_samples': 36162}

Data Distribution

if 'benchmark' in data.keys() and 'split_seed' in data['benchmark']:
    benchmark = data['benchmark']

    dataset_path = Path(benchmark['dataset_folder'])
    train_labels = benchmark['train_labels'] if 'train_labels' in benchmark.keys() else 'train.csv'
    split_test_size =  benchmark['split_test_size'] if 'split_test_size' in benchmark.keys() else 0.2
    split_valid_size =  benchmark['split_valid_size'] if 'split_valid_size' in benchmark.keys() else 0.2
    split_group = benchmark['split_group'] if 'split_group' in benchmark.keys() else 'patient_id'
    split_seed = benchmark['split_seed']

    all_labels = pd.read_csv(dataset_path / train_labels)
    train_labels, test_labels = train_test_split(all_labels, test_size=split_test_size, group=split_group, seed=split_seed)
    train_labels, validation_labels = train_test_split(train_labels, test_size=split_valid_size, group=split_group, seed=split_seed)

from src.datasets.u_encoding import uencode

def get_distribution(labels):
    if 'nan_replacement' in benchmark.keys():
        labels = labels.fillna(benchmark['nan_replacement'])
    data = labels.to_numpy()
    data = uencode(benchmark['u_enc'], data, unc_value=benchmark['unc_value'])
    data = pd.DataFrame(data, columns=labels.columns)

    labels = data[benchmark['label_columns']]

    d = {'Pathology': [], 'Positive': [], 'Positive %': [], 'Negative': [], 'Negative %': [],}
    for label in labels.columns:
        values = labels.groupby(label)
        d['Pathology'].append(label)

        positive = values.size()[1.0] if 1.0 in values.size() else 0
        positive_percent = positive / labels.shape[0] * 100
        d['Positive'].append(positive)
        d['Positive %'].append(round(positive_percent))

        negative = values.size()[-0.0] if -0.0 in values.size() else 0
        negative_percent = negative / labels.shape[0] * 100
        d['Negative'].append(negative)
        d['Negative %'].append(round(negative_percent))
    
    df = pd.DataFrame(d)
    df = df.set_index('Pathology')

    return df

if 'benchmark' in data.keys() and 'split_seed' in data['benchmark']:
    train = get_distribution(train_labels)
    val = get_distribution(validation_labels)
    test = get_distribution(test_labels)
    
    positives = train[['Positive %']].merge(val[['Positive %']], left_index=True, right_index=True).merge(test[['Positive %']], left_index=True,  right_index=True).rename(columns={"Positive %_x": "Positives Train", "Positive %_y": "Positives Validation", "Positive %": "Positives Test", })
    positives.copy().plot(kind='bar', figsize=(10,7), title="Positive Labels Distribution")
    
    negatives = train[['Negative %']].merge(val[['Negative %']], left_index=True, right_index=True).merge(test[['Negative %']], left_index=True,  right_index=True).rename(columns={"Negative %_x": "Negative Train", "Negative %_y": "Negative Validation", "Negative %": "Negative Test", })
    negatives.copy().plot(kind='bar', figsize=(10,7), title="Negative Labels Distribution")

    train[['Positive %', 'Negative %']].copy().plot(kind='bar', figsize=(10,7), title="Training set")
    val[['Positive %', 'Negative %']].copy().plot(kind='bar', figsize=(10,7), title="Validation set")
    test[['Positive %', 'Negative %']].copy().plot(kind='bar', figsize=(10,7), title="Test set")