I'm quantizing the Swin transformer (static PTQ) using the following function:
def static_quantize(m, data_loader):
backend = 'qnnpack'
torch.backends.quantized.engine = backend
m.eval()
m.qconfig = torch.quantization.get_default_qconfig(backend)
torch.quantization.prepare(m, inplace=True)
with torch.no_grad():
for i, data in enumerate(data_loader):
if i >= 100:
break
result = m(return_loss=False, **data)
torch.quantization.convert(m, inplace=True)
return m
Most modules, including linear layers, do get quantized. However some linear layers of a SwinBlock
are skipped, as you can see here:
(3): SwinBlockSequence(
(blocks): ModuleList(
(0): SwinBlock(
(quant): Quantize(scale=tensor([0.3938]), zero_point=tensor([122]), dtype=torch.quint8)
(dequant): DeQuantize()
(norm1): QuantizedLayerNorm((768,), eps=1e-05, elementwise_affine=True)
(attn): ShiftWindowMSA(
(w_msa): WindowMSA(
(quant): Quantize(scale=tensor([0.0294]), zero_point=tensor([155]), dtype=torch.quint8)
(dequant): DeQuantize()
(qkv): QuantizedLinear(in_features=768, out_features=2304, scale=0.039033032953739166, zero_point=133, qscheme=torch.per_tensor_affine)
(attn_drop): Dropout(p=0, inplace=False)
(proj): QuantizedLinear(in_features=768, out_features=768, scale=0.0369536317884922, zero_point=110, qscheme=torch.per_tensor_affine)
(proj_drop): Dropout(p=0, inplace=False)
(softmax): Softmax(dim=-1)
)
(drop): DropPath()
)
(norm2): QuantizedLayerNorm((768,), eps=1e-05, elementwise_affine=True)
(ffn): FFN( // <------- HERE (children not quantized)
(activate): GELU()
(layers): Sequential(
(0): Sequential(
(0): Linear(in_features=768, out_features=3072, bias=True)
(1): GELU()
(2): Dropout(p=0, inplace=False)
)
(1): Linear(in_features=3072, out_features=768, bias=True)
(2): Dropout(p=0, inplace=False)
)
(dropout_layer): DropPath()
)
)
I am referring to the FFN
submodule, where nothing is quantized. However, it contains linear layers, which ought to pose no problems for quantization.
Here's how FFN is added to the module:
_ffn_cfgs = {
'embed_dims': embed_dims,
'feedforward_channels': int(embed_dims * ffn_ratio),
'num_fcs': 2,
'ffn_drop': 0,
'dropout_layer': dict(type='DropPath', drop_prob=drop_path),
'act_cfg': dict(type='GELU'),
**ffn_cfgs
}
self.norm2 = build_norm_layer(norm_cfg, embed_dims)[1]
self.ffn = FFN(**_ffn_cfgs)
Here's the source code for FFN:
@FEEDFORWARD_NETWORK.register_module()
class FFN(BaseModule):
"""Implements feed-forward networks (FFNs) with identity connection.
Args:
embed_dims (int): The feature dimension. Same as
`MultiheadAttention`. Defaults: 256.
feedforward_channels (int): The hidden dimension of FFNs.
Defaults: 1024.
num_fcs (int, optional): The number of fully-connected layers in
FFNs. Default: 2.
act_cfg (dict, optional): The activation config for FFNs.
Default: dict(type='ReLU')
ffn_drop (float, optional): Probability of an element to be
zeroed in FFN. Default 0.0.
add_identity (bool, optional): Whether to add the
identity connection. Default: `True`.
dropout_layer (obj:`ConfigDict`): The dropout_layer used
when adding the shortcut.
init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
Default: None.
"""
@deprecated_api_warning(
{
'dropout': 'ffn_drop',
'add_residual': 'add_identity'
},
cls_name='FFN')
def __init__(self,
embed_dims=256,
feedforward_channels=1024,
num_fcs=2,
act_cfg=dict(type='ReLU', inplace=True),
ffn_drop=0.,
dropout_layer=None,
add_identity=True,
init_cfg=None,
**kwargs):
super().__init__(init_cfg)
assert num_fcs >= 2, 'num_fcs should be no less ' \
f'than 2. got {num_fcs}.'
self.embed_dims = embed_dims
self.feedforward_channels = feedforward_channels
self.num_fcs = num_fcs
self.act_cfg = act_cfg
self.activate = build_activation_layer(act_cfg)
layers = []
in_channels = embed_dims
for _ in range(num_fcs - 1):
layers.append(
Sequential(
Linear(in_channels, feedforward_channels), self.activate,
nn.Dropout(ffn_drop)))
in_channels = feedforward_channels
layers.append(Linear(feedforward_channels, embed_dims))
layers.append(nn.Dropout(ffn_drop))
self.layers = Sequential(*layers)
self.dropout_layer = build_dropout(
dropout_layer) if dropout_layer else torch.nn.Identity()
self.add_identity = add_identity
@deprecated_api_warning({'residual': 'identity'}, cls_name='FFN')
def forward(self, x, identity=None):
"""Forward function for `FFN`.
The function would add x to the output tensor if residue is None.
"""
out = self.layers(x)
if not self.add_identity:
return self.dropout_layer(out)
if identity is None:
identity = x
return identity + self.dropout_layer(out)
The problem is very silly:
Linear
in this case referred to an mmcv wrapper class fornn.Linear
. Quantizing the wrapper class is not supported.By the looks of it (since I'm using PyTorch 1.8.1) this can be easily remedied by modifying the
FFN
class to usenn.Linear
.