Arch‐specific options - neosr-project/neosr GitHub Wiki

Bellow are all arch-specific options. The values listed here are the defaults. You don't need to change any of those values and should use the default template configuration files, unless you have a specific user-case.

[!NOTE] If you want to train a monochrome model, you need to switch the input and output number of channels to 1, as well as use the color = "y" option on your configuration file (for both dataset and the validation). This does not work with OTF, only paired and single. Also note that some losses will not work on those conditions (such as color_opt) and some networks can expect more channels to work properly (such as DAT and OmniSR).

[!IMPORTANT] The option flash_attn breaks compatibility with official code. Take this into consideration in case you want to deploy your model on third-party software such as chaiNNer. rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.

Discriminators

Bellow are supported discriminators and their parameters.

`metagan`

type = "metagan"
in_ch = 3
n_class = 1
dims = [48, 96, 192, 288]
blocks = [3, 3, 9, 3]
downs = [4, 4, 2, 2]
se_mode = "SSE"
mlp_ratio = 2.0
attention = true
attn_drop = 0
proj_drop = 0
head_dim = 32
drop = 0.2
sigmoid = false

`ea2fpn`

[network_d]
type = "ea2fpn"
class_num = 6
encoder_channels = [512, 256, 128, 64]
pyramid_channels = 64
segmentation_channels = 64
dropout = 0.2

[!NOTE] The discriminator ea2fpn uses learning rate of 1e-4 with GAN weight of 0.1. Different settings may result in incorrect convergence or NaN.

`unet`

[network_d]
type = "unet"
num_in_ch = 3
num_feat = 64
skip_connection = true

[!NOTE] The discriminator unet uses learning rate of 1e-4 and GAN weight of 0.1. Different settings may result in incorrect convergence or NaN.

`dunet`

[network_d]
type = "dunet"
in_ch = 3
dim = 64

[!NOTE] The discriminator dunet uses learning rate of 1e-4 and GAN weight of 0.1. Different settings may result in incorrect convergence or NaN.

`patchgan`

[network_d]
type = "patchgan"
num_in_ch = 3
num_feat = 64
num_layers = 3
max_nf_mult = 8
use_sigmoid = false
use_sn = true
#norm_type = # None

[!NOTE] The discriminator patchgan uses learning rate of 1e-4 and GAN weight of 0.1. Different settings may result in incorrect convergence or NaN.

Generators

Bellow are supported generators and their parameters.

`asid`, `asid_d8`

[network_g]
type = "asid"
#type = "asid_d8"
num_in_ch = 3
num_out_ch = 3
num_feat = 48
res_num = 3
block_num = 1
window_size = 8
flash_attn = true
d8 = false
pe = false
bias = true
drop = 0.0

[!NOTE] ASID is not fully compatible with original implementation.

`atd`, `atd_light`

[network_g]
type = "atd" # "atd_light"
norm = false
img_size = 96
patch_size = 1
in_chans = 3
embed_dim = 210
depths = [ 6, 6, 6, 6, 6, 6 ]
num_heads = [ 6, 6, 6, 6, 6, 6 ]
window_size = 16
category_size = 256
num_tokens = 128
reducted_dim = 20
convffn_kernel_size = 5
mlp_ratio = 2.0
qkv_bias = true
norm_layer = "nn.LayerNorm"
ape = false
patch_norm = true
img_range = 1.0
upsampler = "pixelshuffle"
resi_connection = "1conv"

[!NOTE] By default, ATD is not fully compatible with original implementation. To make it compatible, use option norm: true.

`cfsr`

type = "cfsr"
in_chans = 3
embed_dim = 48
depths = [6, 6]
dw_size = 9
mlp_ratio = 2.0
img_range = 1.0
upsampler = "pixelshuffledirect"
mean_norm = false

`compact`

[network_g]
type = "compact"
num_in_ch = 3
num_out_ch = 3
num_feat = 64
num_conv = 16
act_type = "prelu"

`craft`

[network_g]
type = "craft"
flash_attn = true
in_chans = 3
img_size = 64
window_size = 16
embed_dim = 48
depths = [ 2, 2, 2, 2 ]
num_heads = [ 6, 6, 6, 6 ]
split_size_0 = 4
split_size_1 = 16
mlp_ratio = 2.0
qkv_bias = true
#qk_scale = # None
img_range = 1.0
resi_connection = "1conv"

[!NOTE] The option flash_attn: true makes it incompatible with official implementation. rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.

`cugan`

[network_g]
type = "cugan"
in_channels = 3
out_channels = 3
pro = true

`dat_small`, `dat_medium`, `dat_2`

[network_g]
type = "dat_small"
#type = "dat_medium"
#type = "dat_2"
img_size = 64
in_chans = 3
embed_dim = 180
split_size = [ 2, 4 ]
depth = [ 2, 2, 2, 2 ]
num_heads = [ 2, 2, 2, 2 ]
expansion_factor = 4
qkv_bias = true
#qk_scale = # None
drop_rate = 0.0
attn_drop_rate = 0.0
drop_path_rate = 0.1
use_chk = false
img_range = 1.0
resi_connection = "1conv"
upsampler = "pixelshuffle"

[!NOTE] rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.

`dct`

type = "dct"
img_size = 64
in_chans = 3
embed_dim = 80
depth = [20]
num_heads = [8]
expansion_factor = 4.0
qkv_bias = true
drop_rate = 0
attn_drop_rate = 0
drop_path_rate = 0.1
act_layer = "nn.GELU"
norm_layer = "nn.LayerNorm"
img_range = 1.0
resi_connection = "3conv"
upsampler = "pixelshuffledirect"

`dctlsa`

[network_g]
type = "dctlsa"
in_nc = 3
out_nc = 3
nf = 55
num_modules = 6
num_head = 5

[!NOTE] Added Dropout to DCTLSA may not be compatible with official implementation.

`ditn`

[network_g]
type = "ditn"
patch_size = 8
inp_channels = 3
dim = 60
ITL_blocks = 4
SAL_blocks = 4
UFONE_blocks = 1
ffn_expansion_factor = 2.0
bias = false
LayerNorm_type = "WithBias"

`drct`, `drct_l`, `drct_s`

[network_g]
type = "drct"
#type = "drct_l"
#type = "drct_s"
img_size = 64
patch_size = 1
in_chans = 3
embed_dim = 180
depths = [ 6, 6, 6, 6, 6, 6 ]
num_heads = [ 6, 6, 6, 6, 6, 6 ]
window_size = 16
compress_ratio = 3
squeeze_factor = 30
conv_scale = 0.01
overlap_ratio = 0.5
mlp_ratio = 2.0
qkv_bias = true
#qk_scale = # None
drop_rate = 0.0
attn_drop_rate = 0.0
drop_path_rate = 0.1
norm_layer = "nn.LayerNorm"
ape = false
patch_norm = true
img_range = 1.0
upsampler = "pixelshuffle"
resi_connection = "1conv"
gc = 32

[!NOTE] rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.

`esc`, `esc_light`, `esc_large`, `esc_fp`

[network_g]
type = "esc"
#type = `esc_light`
#type = `esc_large`
#type = `esc_fp`
dim = 64
pdim = 16
kernel_size = 13
n_blocks = 5
conv_blocks = 5
window_size = 32
num_heads = 4
exp_ratio = 1.25
attn_type = "sdpa" # "flex", "naive"
is_fp = false
use_dysample = true
realsr = true

`flexnet`, `metaflexnet`

type = "flexnet"
type = "metaflexnet"
inp_channels = 3
out_channels = 3
dim = 64
num_blocks = [6,6,6,6,6,6]
window_size = 8
hidden_rate = 4
channel_norm = false
attn_drop = 0
proj_drop = 0
pipeline_type = "linear"
upsampler = "ps"
flash_attn = true

`hasn`

type = "hasn"
in_channels = 3
out_channels = 3
feature_channels = 52

`hat_s`, `hat_m`, `hat_l`

[network_g]
type = "hat_m"
#type = "hat_s"
#type = "hat_l"
window_size = 16
img_size = 64
patch_size = 1
in_chans = 3
embed_dim = 96
depths = [ 6, 6, 6, 6 ]
num_heads = [ 6, 6, 6, 6 ]
compress_ratio = 3
squeeze_factor = 30
conv_scale = 0.01
overlap_ratio = 0.5
mlp_ratio = 4.0
qkv_bias = true
#qk_scale = # None
drop_rate = 0.0
attn_drop_rate = 0.0
drop_path_rate = 0.1
ape = false
patch_norm = true
img_range = 1.0
upsampler = "pixelshuffle"
resi_connection = "1conv"

[!NOTE] rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.

`hit_srf`, `hit_srf_medium`, `hit_srf_large`

[network_g]
type = "hit_srf"
#type = "hit_srf_medium"
#type = "hit_srf_large"
img_size = 64
patch_size = 1
in_chans = 3
embed_dim = 60
depths = [6, 6, 6, 6]
num_heads = [6, 6, 6, 6]
base_win_size = [8, 8]
mlp_ratio = 2.0
drop_rate = 0.0
value_drop_rate = 0.0
drop_path_rate = 0.0
ape = false
patch_norm = true
use_checkpoint = false
img_range = 1.0
upsampler="pixelshuffledirect"
resi_connection = "1conv"
hier_win_ratios = [0.5, 1, 2, 4, 6, 8]

[!NOTE] rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.

`hma`, `hma_medium`, `hma_large`

type = "hma"
#type = "hma_medium"
#type = "hma_large"
img_size = 64
patch_size = 1
in_chans = 3
embed_dim = 60
depths = [6, 6, 6, 6]
num_heads = [6, 6, 6, 6]
window_size = 8
interval_size = 4
mlp_ratio = 2.0
qkv_bias = true
#qk_scale= # None
drop_rate = 0.0
attn_drop_rate = 0.0
drop_path_rate = 0.1
ape = false
patch_norm = true
img_range = 1.0
upsampler = "pixelshuffle"
resi_connection = "1conv

[!NOTE] rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.

`krgn`

type = "krgn"
n_colors = 3
n_feats = 64
n_resgroups = 9
act = "lrelu"
rgb_range = 1.0
dilation = 3

`lmlt`, `lmlt_tiny`, `lmlt_large`

type = "lmlt"
#type = "lmlt_tiny"
#type = "lmlt_large"
dim = 60
n_blocks = 8
ffn_scale = 2.0
window_size = 8
drop_rate = 0
attn_drop_rate = 0
drop_path_rate = 0

`man`, `man_tiny`, `man_light`

type = "man"
#type = "man_tiny"
#type = "man_light"
n_resblocks = 36
n_resgroups = 1
n_colors = 3
n_feats = 180
res_scale = 1.0

`mosrv2`

type = "mosrv2"
in_ch = 3
scale = 4
n_block = 24
dim = 64
upsampler = "pixelshuffledirect" # "conv", "pixelshuffle", "nearest+conv", "dysample
expansion_ratio = 1.5
mid_dim = 32
unshuffle_mod = true

`moesr`

type = "moesr"
in_ch = 3
out_ch = 3
dim = 64
n_blocks = 9
n_block = 4
expansion_factor = 2.6
expansion_msg = 1.5
upsampler = "pixelshuffledirect"
upsample_dim = 64

`msdan`

[network_g]
type = "msdan"
channels = 48
num_DFEB = 8

`omnisr`

[network_g]
type = "omnisr"
upsampling = 4 # value required, no defaults
window_size = 8 # value required, no defaults
num_in_ch = 3
num_out_ch = 3
num_feat = 64
res_num = 5
block_num = 1
bias = true
pe = true
ffn_bias = true

`plainusr`, `plainusr_ultra`, `plainusr_large`

type = "plainusr"
type = "plainusr_ultra"
type = "plainusr_large"
n_feat = 64
im_feat = [64, 48, 32]
attn_feat = 16

`plksr`, `plksr_tiny`

[network_g]
type = "plksr"
#type = "plksr_tiny"
dim = 64
n_blocks = 28
kernel_size = 17
split_ratio = 0.25
use_ea = true
ccm_type = "DCCM" # "CCM", "ICCM", "DCCM"
lk_type = "PLK" # "PLK", "SparsePLK", "RectSparsePLK"
sparse_kernels = [ 5, 5, 5, 5 ]
sparse_dilations = [ 1, 2, 3, 4 ]
with_idt = false

[!NOTE] The generator plksr uses learning rate of 5e-4 or lower. Different settings may result in incorrect convergence or NaN.

`realplksr`, `realplksr_s`, `realplksr_l`

[network_g]
type = "realplksr"
#type = "realplksr_s"
#type = "realplksr_l"
dysample = false
dim = 64
n_blocks = 28
kernel_size = 17
split_ratio = 0.25
use_ea = true
norm_groups = 4
dropout = 0.0

[!NOTE] The generator realplksr uses learning rate of 5e-4 or lower. Different settings may result in incorrect convergence or NaN.

`rcan`

[network_g]
type = "rcan"
n_resgroups = 10
n_resblocks = 20
n_feats = 64
kernel_size = 3
reduction = 16
n_colors = 3
norm = false

`rgt`, `rgt_s`

[network_g]
type = "rgt"
#type = "rgt_s"
img_size = 64
in_chans = 3
embed_dim = 180
depth = [ 6, 6, 6, 6, 6, 6, 6, 6 ]
num_heads = [ 6, 6, 6, 6, 6, 6, 6, 6 ]
mlp_ratio = 2.0
qkv_bias = true
#qk_scale = # None
drop_rate = 0.0
attn_drop_rate = 0.0
drop_path_rate = 0.1
act_layer = "nn.GELU"
norm_layer = "nn.LayerNorm"
use_chk = false
img_range = 1.0
resi_connection = "1conv"
split_size = [ 8, 32 ]
c_ratio = 0.5

[!NOTE] rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.

`eimn`, `eimn_a`, `eimn_l`

[network_g]
type = "eimn"
#type = "eimn_a"
#type = "eimn_l"
embed_dims = 64
depths = 1
mlp_ratios = 2.66
drop_rate = 0.0
drop_path_rate = 0.0
num_stages = 16
freeze_param = false
norm = "nn.BatchNorm2d"

`esrgan`

[network_g]
type = "esrgan"
num_in_ch = 3
num_out_ch = 3
num_feat = 64
num_block = 23
num_grow_ch = 32

`grformer`, `grformer_medium`, `grformer_large`

[network_g]
type = "grformer"
#type = "grformer_medium"
#type = "grformer_large"
img_size = 64
in_chans = 3
window_size = [8, 32]
embed_dim = 60
depths = [ 6, 6, 6, 6 ]
num_heads = [ 3, 3, 3, 3 ]
mlp_ratio = 2.0
qkv_bias = true
#qk_scale = # None
drop_rate = 0.0
attn_drop_rate = 0.0
drop_path_rate = 0.1
norm_layer = "nn.LayerNorm"
ape = false
patch_norm = true
img_range = 1.0

`safmn`, `safmn_l`, `light_safmnpp`

[network_g]
type = "safmn"
#type = "safmn_l"
bcie = false
dim = 36
n_blocks = 8
ffn_scale = 2.0

`span`

[network_g]
type = "span"
num_in_ch = 3
num_out_ch = 3
feature_channels = 48
bias = true
norm = false
img_range = 255 # only applied if norm = true
rgb_mean = [ 0.4488, 0.4371, 0.4040 ] # only applied if norm = true

[!NOTE] By default, SPAN is not fully compatible with original implementation. To make it compatible, use option norm: true. rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.

`spanplus`

[network_g]
type = "spanplus"
#type = "spanplus_sts"
#type = "spanplus_s"
#type = "spanplus_st"
num_in_ch = 3
num_out_ch = 3
blocks = [4]
feature_channels = 48
drop_rate = 0.0
upsampler = "dys"  # "lp", "ps", "conv"- only 1x

`srformer_light`, `srformer_medium`

[network_g]
type = "srformer_light"
#type = "srformer_medium"
window_size = 16
img_size = 64
patch_size = 1.0
in_chans = 3
embed_dim = 60
depths = [ 6, 6, 6, 6 ]
num_heads = [ 6, 6, 6, 6 ]
mlp_ratio = 2.0
qkv_bias = true
#qk_scale = # None
drop_rate = 0.0
attn_drop_rate = 0.0
drop_path_rate = 0.1
ape = false
patch_norm = true
use_checkpoint = false
img_range = 1.0
upsampler = "pixelshuffledirect"
resi_connection = "1conv"

[!NOTE] rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.

`swinir_small`, `swinir_medium`

[network_g]
type = "swinir_small"
#type = "swinir_medium"
#type = "swinir_large"
flash_attn = false
window_size = 8
img_size = 32
patch_size = 1
in_chans = 3
embed_dim = 60
depths = [ 6, 6, 6, 6 ]
num_heads = [ 6, 6, 6, 6 ]
mlp_ratio = 2
qkv_bias = true
#qk_scale = # None
drop_rate = 0.0
attn_drop_rate = 0.0
drop_path_rate = 0.1
ape = false
patch_norm = true
use_checkpoint = false
img_range = 1.0
upsampler = "pixelshuffle"
resi_connection = "1conv"

[!NOTE] The option flash_attn: true makes it incompatible with official implementation. rgb_mean has been modified to neutral values, to improve stability. If inference is done using ImageNet values, it may cause different results.

Arch‐specific options - neosr-project/neosr GitHub Wiki

Discriminators

metagan

ea2fpn

unet

dunet

patchgan

Generators

asid, asid_d8

atd, atd_light

cfsr

compact

craft

cugan

dat_small, dat_medium, dat_2

dct

dctlsa

ditn

drct, drct_l, drct_s

esc, esc_light, esc_large, esc_fp

flexnet, metaflexnet

hasn

hat_s, hat_m, hat_l

hit_srf, hit_srf_medium, hit_srf_large

hma, hma_medium, hma_large

krgn

lmlt, lmlt_tiny, lmlt_large

man, man_tiny, man_light

mosrv2

moesr

msdan

omnisr

plainusr, plainusr_ultra, plainusr_large

plksr, plksr_tiny

realplksr, realplksr_s, realplksr_l

rcan

rgt, rgt_s

eimn, eimn_a, eimn_l

esrgan

grformer, grformer_medium, grformer_large

safmn, safmn_l, light_safmnpp

span

spanplus

srformer_light, srformer_medium

swinir_small, swinir_medium

`metagan`

`ea2fpn`

`unet`

`dunet`

`patchgan`

`asid`, `asid_d8`

`atd`, `atd_light`

`cfsr`

`compact`

`craft`

`cugan`

`dat_small`, `dat_medium`, `dat_2`

`dct`

`dctlsa`

`ditn`

`drct`, `drct_l`, `drct_s`

`esc`, `esc_light`, `esc_large`, `esc_fp`

`flexnet`, `metaflexnet`

`hasn`

`hat_s`, `hat_m`, `hat_l`

`hit_srf`, `hit_srf_medium`, `hit_srf_large`

`hma`, `hma_medium`, `hma_large`

`krgn`

`lmlt`, `lmlt_tiny`, `lmlt_large`

`man`, `man_tiny`, `man_light`

`mosrv2`

`moesr`

`msdan`

`omnisr`

`plainusr`, `plainusr_ultra`, `plainusr_large`

`plksr`, `plksr_tiny`

`realplksr`, `realplksr_s`, `realplksr_l`

`rcan`

`rgt`, `rgt_s`

`eimn`, `eimn_a`, `eimn_l`

`esrgan`

`grformer`, `grformer_medium`, `grformer_large`

`safmn`, `safmn_l`, `light_safmnpp`

`span`

`spanplus`

`srformer_light`, `srformer_medium`

`swinir_small`, `swinir_medium`