85 lines
2.7 KiB
Python
85 lines
2.7 KiB
Python
|
import numpy as np
|
||
|
from torch import nn
|
||
|
from torch.nn.utils.parametrizations import weight_norm
|
||
|
|
||
|
|
||
|
class MelganDiscriminator(nn.Module):
|
||
|
def __init__(
|
||
|
self,
|
||
|
in_channels=1,
|
||
|
out_channels=1,
|
||
|
kernel_sizes=(5, 3),
|
||
|
base_channels=16,
|
||
|
max_channels=1024,
|
||
|
downsample_factors=(4, 4, 4, 4),
|
||
|
groups_denominator=4,
|
||
|
):
|
||
|
super().__init__()
|
||
|
self.layers = nn.ModuleList()
|
||
|
|
||
|
layer_kernel_size = np.prod(kernel_sizes)
|
||
|
layer_padding = (layer_kernel_size - 1) // 2
|
||
|
|
||
|
# initial layer
|
||
|
self.layers += [
|
||
|
nn.Sequential(
|
||
|
nn.ReflectionPad1d(layer_padding),
|
||
|
weight_norm(nn.Conv1d(in_channels, base_channels, layer_kernel_size, stride=1)),
|
||
|
nn.LeakyReLU(0.2, inplace=True),
|
||
|
)
|
||
|
]
|
||
|
|
||
|
# downsampling layers
|
||
|
layer_in_channels = base_channels
|
||
|
for downsample_factor in downsample_factors:
|
||
|
layer_out_channels = min(layer_in_channels * downsample_factor, max_channels)
|
||
|
layer_kernel_size = downsample_factor * 10 + 1
|
||
|
layer_padding = (layer_kernel_size - 1) // 2
|
||
|
layer_groups = layer_in_channels // groups_denominator
|
||
|
self.layers += [
|
||
|
nn.Sequential(
|
||
|
weight_norm(
|
||
|
nn.Conv1d(
|
||
|
layer_in_channels,
|
||
|
layer_out_channels,
|
||
|
kernel_size=layer_kernel_size,
|
||
|
stride=downsample_factor,
|
||
|
padding=layer_padding,
|
||
|
groups=layer_groups,
|
||
|
)
|
||
|
),
|
||
|
nn.LeakyReLU(0.2, inplace=True),
|
||
|
)
|
||
|
]
|
||
|
layer_in_channels = layer_out_channels
|
||
|
|
||
|
# last 2 layers
|
||
|
layer_padding1 = (kernel_sizes[0] - 1) // 2
|
||
|
layer_padding2 = (kernel_sizes[1] - 1) // 2
|
||
|
self.layers += [
|
||
|
nn.Sequential(
|
||
|
weight_norm(
|
||
|
nn.Conv1d(
|
||
|
layer_out_channels,
|
||
|
layer_out_channels,
|
||
|
kernel_size=kernel_sizes[0],
|
||
|
stride=1,
|
||
|
padding=layer_padding1,
|
||
|
)
|
||
|
),
|
||
|
nn.LeakyReLU(0.2, inplace=True),
|
||
|
),
|
||
|
weight_norm(
|
||
|
nn.Conv1d(
|
||
|
layer_out_channels, out_channels, kernel_size=kernel_sizes[1], stride=1, padding=layer_padding2
|
||
|
)
|
||
|
),
|
||
|
]
|
||
|
|
||
|
def forward(self, x):
|
||
|
feats = []
|
||
|
for layer in self.layers:
|
||
|
x = layer(x)
|
||
|
feats.append(x)
|
||
|
return x, feats
|