DenseNet121_Chexpert_BCE_E10_B128 checkpoint - TobiasSchmidtDE/DeepL-MedicalImaging GitHub Wiki
Version: 1
Trained DenseNet121 architecture using the 'Chexpert BCE E10 B128' benchmark.The benchmark was initialized for the chexpert_full dataset with batch size of 128, shuffel set to True and images rescaled to dimension (256, 256). The training was done for 10 epochs using the Adam optimizer and binary_crossentropy loss. A total of 14 labels/pathologies were included in the training and encoded using the 'uzeroes' method. The traing set included 144408 number of sample, the validation set 35074, and the test set 43932.
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import json
import os
import re
import pprintdata = json.loads(os.environ['EXP_DATA'])
history = data['history']
for s in data["description"].split(".")[:-1]:
    print(s + ".\n")Trained DenseNet121 architecture using the 'Chexpert BCE E10 B128' benchmark.
The benchmark was initialized for the chexpert_full dataset with batch size of 128, shuffel set to True and images rescaled to dimension (256, 256).
The training was done for 10 epochs using the Adam optimizer and binary_crossentropy loss.
A total of 14 labels/pathologies were included in the training and encoded using the 'uzeroes' method.
The traing set included 144408 number of sample, the validation set 35074, and the test set 43932.
# if there are any metrics that were renamed, add this new name here as ("default_name":"new_name")
metric_custom_names={"auc":"AUC_ROC"}
metric_names = [re.sub("([a-z0-9])([A-Z])","\g<1> \g<2>",name) for name in data["benchmark"]["metrics"]]
metric_keys = [re.sub("([a-z0-9])([A-Z])","\g<1>_\g<2>",name).lower() for name in data["benchmark"]["metrics"]]
for default_name, custom_name in metric_custom_names.items():
    if not default_name in history.keys() and default_name in metric_keys:
        #replace default name with custom name
        metric_keys[metric_keys.index(default_name)]=custom_namedef print_or_plot_metric(metric_key, metric_name, figure_name):
    if len(history[metric_key]) == 1:
        print("Data for {m_name} only available for a single epoch. \nSkipping plot and printing data...".format(m_name=metric_name))
        print('Train {}: '.format(metric_name), history[metric_key])
        print('Validation {}: '.format(metric_name), history['val_'+metric_key])
        print()        
    else:
        plot_epoch_metric(metric_key, metric_name, figure_name)
        
def plot_epoch_metric(metric_key, metric_name, figure_name):
    figure(num=None, figsize=(10, 6))
    plt.plot(history[metric_key])
    if 'val_'+metric_key in history.keys():
        plt.plot(history['val_'+metric_key])
    plt.title(figure_name)
    plt.ylabel(metric_name)
    plt.xlabel('Epoch')
    if 'val_'+metric_key in history.keys():
        plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()
for i, metric_key in enumerate(metric_keys):
    print_or_plot_metric(metric_key, metric_names[i], "Model "+metric_names[i])




print_or_plot_metric("loss", "Loss", "Model loss")
if "lr" in history.keys():
    plot_epoch_metric("lr", "Learning Rate", "Learning Rate")if 'classification_report' in data.keys() and data['classification_report']:
    print(data['classification_report'])                            precision    recall  f1-score   support
                No Finding       0.09      0.02      0.04      4485
Enlarged Cardiomediastinum       0.00      0.00      0.00      2139
              Cardiomegaly       0.13      0.02      0.03      5377
              Lung Opacity       0.47      0.55      0.51     20770
               Lung Lesion       0.00      0.00      0.00      1769
                     Edema       0.23      0.20      0.21     10181
             Consolidation       0.00      0.00      0.00      2985
                 Pneumonia       0.00      0.00      0.00      1186
               Atelectasis       0.00      0.00      0.00      6456
              Pneumothorax       0.09      0.01      0.01      3711
          Pleural Effusion       0.38      0.43      0.40     16653
             Pleural Other       0.00      0.00      0.00       738
                  Fracture       0.00      0.00      0.00      1834
           Support Devices       0.52      0.52      0.52     22837
                 micro avg       0.43      0.32      0.37    101121
                 macro avg       0.14      0.13      0.12    101121
              weighted avg       0.31      0.32      0.31    101121
               samples avg       0.32      0.30      0.28    101121
if 'test' in data.keys() and data['test']:
    for score_name, score in data["test"].items():
        print('Test {}: '.format(score_name), score)
Test loss:  0.312576025724411
Test AUC:  0.7191928029060364
Test Precision:  0.6336762309074402
Test Recall:  0.4824714958667755
Test F2Score:  0.5066503882408142
Test BinaryAccuracy:  0.868972659111023
pp = pprint.PrettyPrinter(indent=4)
if "benchmark" in data.keys():
    pp.pprint(data["benchmark"]){   'batch_size': 128,
    'benchmark_name': 'Chexpert BCE E10 B128',
    'dataset_folder': 'data/chexpert/full',
    'dataset_name': 'chexpert_full',
    'dim': [256, 256],
    'drop_last': True,
    'epochs': 10,
    'label_columns': [   'No Finding',
                         'Enlarged Cardiomediastinum',
                         'Cardiomegaly',
                         'Lung Opacity',
                         'Lung Lesion',
                         'Edema',
                         'Consolidation',
                         'Pneumonia',
                         'Atelectasis',
                         'Pneumothorax',
                         'Pleural Effusion',
                         'Pleural Other',
                         'Fracture',
                         'Support Devices'],
    'loss': 'binary_crossentropy',
    'metrics': ['AUC', 'Precision', 'Recall', 'F2Score', 'BinaryAccuracy'],
    'models_dir': 'models',
    'n_channels': 3,
    'nan_replacement': 0,
    'optimizer': 'Adam',
    'path_column': 'Path',
    'path_column_prefix': '',
    'shuffle': True,
    'test_num_samples': 43932,
    'train_num_samples': 144408,
    'u_enc': 'uzeroes',
    'unc_value': -1,
    'valid_num_samples': 35074}
Version: 1
Trained DenseNet121 architecture using the 'Chexpert BCE E10 B128' benchmark.The benchmark was initialized for the chexpert_full dataset with batch size of 128, shuffel set to True and images rescaled to dimension (256, 256). The training was done for 10 epochs using the Adam optimizer and binary_crossentropy loss. A total of 14 labels/pathologies were included in the training and encoded using the 'uzeroes' method. The traing set included 144408 number of sample, the validation set 35074, and the test set 43932.
from pathlib import Path
from dotenv import load_dotenv, find_dotenv
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import json
import os
import re
import pprint
basepath = Path(os.getcwd())
if basepath.name != "idp-radio-1":
    os.chdir(basepath.parent.parent)
    print(os.getcwd())
load_dotenv(find_dotenv())
from src.preprocessing.split.train_test_split import train_test_split/srv/idp-radio-1
data = json.loads(os.environ['EXP_DATA'])
history = data['history']for s in data["description"].split(".")[:-1]:
    print(s + ".\n")Trained DenseNet121 architecture using the 'Chexpert BCE E10 B128' benchmark.
The benchmark was initialized for the chexpert_full dataset with batch size of 128, shuffel set to True and images rescaled to dimension (256, 256).
The training was done for 10 epochs using the Adam optimizer and binary_crossentropy loss.
A total of 14 labels/pathologies were included in the training and encoded using the 'uzeroes' method.
The traing set included 144408 number of sample, the validation set 35074, and the test set 43932.
# if there are any metrics that were renamed, add this new name here as ("default_name":"new_name")
metric_custom_names={"auc":"AUC_ROC"}
metric_names = [re.sub("([a-z0-9])([A-Z])","\g<1> \g<2>",name) for name in data["benchmark"]["metrics"]]
metric_keys = [re.sub("([a-z0-9])([A-Z])","\g<1>_\g<2>",name).lower() for name in data["benchmark"]["metrics"]]
for default_name, custom_name in metric_custom_names.items():
    if not default_name in history.keys() and default_name in metric_keys:
        #replace default name with custom name
        metric_keys[metric_keys.index(default_name)]=custom_namedef print_or_plot_metric(metric_key, metric_name, figure_name):
    if len(history[metric_key]) == 1:
        print("Data for {m_name} only available for a single epoch. \nSkipping plot and printing data...".format(m_name=metric_name))
        print('Train {}: '.format(metric_name), history[metric_key])
        print('Validation {}: '.format(metric_name), history['val_'+metric_key])
        print()        
    else:
        plot_epoch_metric(metric_key, metric_name, figure_name)
        
def plot_epoch_metric(metric_key, metric_name, figure_name):
    figure(num=None, figsize=(10, 6))
    plt.plot(history[metric_key])
    if 'val_'+metric_key in history.keys():
        plt.plot(history['val_'+metric_key])
    plt.title(figure_name)
    plt.ylabel(metric_name)
    plt.xlabel('Epoch')
    if 'val_'+metric_key in history.keys():
        plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()
for i, metric_key in enumerate(metric_keys):
    print_or_plot_metric(metric_key, metric_names[i], "Model "+metric_names[i])




print_or_plot_metric("loss", "Loss", "Model loss")
if "lr" in history.keys():
    plot_epoch_metric("lr", "Learning Rate", "Learning Rate")if 'classification_report' in data.keys() and data['classification_report']:
    print(data['classification_report'])                            precision    recall  f1-score   support
                No Finding       0.09      0.02      0.04      4485
Enlarged Cardiomediastinum       0.00      0.00      0.00      2139
              Cardiomegaly       0.13      0.02      0.03      5377
              Lung Opacity       0.47      0.55      0.51     20770
               Lung Lesion       0.00      0.00      0.00      1769
                     Edema       0.23      0.20      0.21     10181
             Consolidation       0.00      0.00      0.00      2985
                 Pneumonia       0.00      0.00      0.00      1186
               Atelectasis       0.00      0.00      0.00      6456
              Pneumothorax       0.09      0.01      0.01      3711
          Pleural Effusion       0.38      0.43      0.40     16653
             Pleural Other       0.00      0.00      0.00       738
                  Fracture       0.00      0.00      0.00      1834
           Support Devices       0.52      0.52      0.52     22837
                 micro avg       0.43      0.32      0.37    101121
                 macro avg       0.14      0.13      0.12    101121
              weighted avg       0.31      0.32      0.31    101121
               samples avg       0.32      0.30      0.28    101121
if 'test' in data.keys() and data['test']:
    for score_name, score in data["test"].items():
        print('Test {}: '.format(score_name), score)
Test loss:  0.312576025724411
Test AUC:  0.7191928029060364
Test Precision:  0.6336762309074402
Test Recall:  0.4824714958667755
Test F2Score:  0.5066503882408142
Test BinaryAccuracy:  0.868972659111023
pp = pprint.PrettyPrinter(indent=4)
if "benchmark" in data.keys():
    pp.pprint(data["benchmark"]){   'batch_size': 128,
    'benchmark_name': 'Chexpert BCE E10 B128',
    'dataset_folder': 'data/chexpert/full',
    'dataset_name': 'chexpert_full',
    'dim': [256, 256],
    'drop_last': True,
    'epochs': 10,
    'label_columns': [   'No Finding',
                         'Enlarged Cardiomediastinum',
                         'Cardiomegaly',
                         'Lung Opacity',
                         'Lung Lesion',
                         'Edema',
                         'Consolidation',
                         'Pneumonia',
                         'Atelectasis',
                         'Pneumothorax',
                         'Pleural Effusion',
                         'Pleural Other',
                         'Fracture',
                         'Support Devices'],
    'loss': 'binary_crossentropy',
    'metrics': ['AUC', 'Precision', 'Recall', 'F2Score', 'BinaryAccuracy'],
    'models_dir': 'models',
    'n_channels': 3,
    'nan_replacement': 0,
    'optimizer': 'Adam',
    'path_column': 'Path',
    'path_column_prefix': '',
    'shuffle': True,
    'test_num_samples': 43932,
    'train_num_samples': 144408,
    'u_enc': 'uzeroes',
    'unc_value': -1,
    'valid_num_samples': 35074}
if 'benchmark' in data.keys() and 'split_seed' in data['benchmark']:
    benchmark = data['benchmark']
    dataset_path = Path(benchmark['dataset_folder'])
    train_labels = benchmark['train_labels'] if 'train_labels' in benchmark.keys() else 'train.csv'
    split_test_size =  benchmark['split_test_size'] if 'split_test_size' in benchmark.keys() else 0.2
    split_valid_size =  benchmark['split_valid_size'] if 'split_valid_size' in benchmark.keys() else 0.2
    split_group = benchmark['split_group'] if 'split_group' in benchmark.keys() else 'patient_id'
    split_seed = benchmark['split_seed']
    all_labels = pd.read_csv(dataset_path / train_labels)
    train_labels, test_labels = train_test_split(all_labels, test_size=split_test_size, group=split_group, seed=split_seed)
    train_labels, validation_labels = train_test_split(train_labels, test_size=split_valid_size, group=split_group, seed=split_seed)from src.datasets.u_encoding import uencode
def get_distribution(labels):
    if 'nan_replacement' in benchmark.keys():
        labels = labels.fillna(benchmark['nan_replacement'])
    data = labels.to_numpy()
    data = uencode(benchmark['u_enc'], data, unc_value=benchmark['unc_value'])
    data = pd.DataFrame(data, columns=labels.columns)
    labels = data[benchmark['label_columns']]
    d = {'Pathology': [], 'Positive': [], 'Positive %': [], 'Negative': [], 'Negative %': [],}
    for label in labels.columns:
        values = labels.groupby(label)
        d['Pathology'].append(label)
        positive = values.size()[1.0] if 1.0 in values.size() else 0
        positive_percent = positive / labels.shape[0] * 100
        d['Positive'].append(positive)
        d['Positive %'].append(round(positive_percent))
        negative = values.size()[-0.0] if -0.0 in values.size() else 0
        negative_percent = negative / labels.shape[0] * 100
        d['Negative'].append(negative)
        d['Negative %'].append(round(negative_percent))
    
    df = pd.DataFrame(d)
    df = df.set_index('Pathology')
    return dfif 'benchmark' in data.keys() and 'split_seed' in data['benchmark']:
    train = get_distribution(train_labels)
    val = get_distribution(validation_labels)
    test = get_distribution(test_labels)
    
    positives = train[['Positive %']].merge(val[['Positive %']], left_index=True, right_index=True).merge(test[['Positive %']], left_index=True,  right_index=True).rename(columns={"Positive %_x": "Positives Train", "Positive %_y": "Positives Validation", "Positive %": "Positives Test", })
    positives.copy().plot(kind='bar', figsize=(10,7), title="Positive Labels Distribution")
    
    negatives = train[['Negative %']].merge(val[['Negative %']], left_index=True, right_index=True).merge(test[['Negative %']], left_index=True,  right_index=True).rename(columns={"Negative %_x": "Negative Train", "Negative %_y": "Negative Validation", "Negative %": "Negative Test", })
    negatives.copy().plot(kind='bar', figsize=(10,7), title="Negative Labels Distribution")
    train[['Positive %', 'Negative %']].copy().plot(kind='bar', figsize=(10,7), title="Training set")
    val[['Positive %', 'Negative %']].copy().plot(kind='bar', figsize=(10,7), title="Validation set")
    test[['Positive %', 'Negative %']].copy().plot(kind='bar', figsize=(10,7), title="Test set")