GN ```python class GroupNorm(nn.HybridBlock): """ If the batch size is small, it's better to use GroupNorm instead of BatchNorm. GroupNorm achieves good results even at small batch sizes. Reference: https://arxiv.org/pdf/1803.08494.pdf """ def __init__(self, num_channels, num_groups=32, eps=1e-5, **kwargs): super(GroupNorm, self).__init__(**kwargs) with self.name_scope(): self.weight = self.params.get('weight', grad_req='write', shape=(1, num_channels, 1, 1)) self.bias = self.params.get('bias', grad_req='write', shape=(1, num_channels, 1, 1)) self.C = num_channels self.G = num_groups self.eps = eps assert self.C % self.G == 0 def hybrid_forward(self, F, x, weight, bias): x_new = F.reshape(x, (0, self.G, -1)) # (N,C,H,W) -> (N,G,H*W*C//G) mean = F.mean(x_new, axis=-1, keepdims=True) centered_x_new = F.broadcast_minus(x_new, mean) # (N,G,H*W*C//G) var = F.mean(F.square(centered_x_new), axis=-1, keepdims=True) x_new = F.broadcast_div(centered_x_new, F.sqrt(var + self.eps) # (N,G,H*W*C//G) -> (N,C,H,W) ).reshape_like(x) x_new = F.broadcast_add(F.broadcast_mul(x_new, weight),bias) return x_new ```