I'm quantizing the Swin transformer (static PTQ) using the following function:
def static_quantize(m, data_loader):
backend = 'qnnpack'
torch.backends.quantized.engine = backend
m.eval()
m.qconfig = torch.quantization.get_default_qconfig(backend)
torch.quantization.prepare(m, inplace=True)
with torch.no_grad():
for i, data in enumerate(data_loader):
if i >= 100:
break
result = m(return_loss=False, **data)
torch.quantization.convert(m, inplace=True)
return m
Most modules, including linear layers, do get quantized. However some linear layers of a SwinBlock are skipped, as you can see here:
(3): SwinBlockSequence(
(blocks): ModuleList(
(0): SwinBlock(
(quant): Quantize(scale=tensor([0.3938]), zero_point=tensor([122]), dtype=torch.quint8)
(dequant): DeQuantize()
(norm1): QuantizedLayerNorm((768,), eps=1e-05, elementwise_affine=True)
(attn): ShiftWindowMSA(
(w_msa): WindowMSA(
(quant): Quantize(scale=tensor([0.0294]), zero_point=tensor([155]), dtype=torch.quint8)
(dequant): DeQuantize()
(qkv): QuantizedLinear(in_features=768, out_features=2304, scale=0.039033032953739166, zero_point=133, qscheme=torch.per_tensor_affine)
(attn_drop): Dropout(p=0, inplace=False)
(proj): QuantizedLinear(in_features=768, out_features=768, scale=0.0369536317884922, zero_point=110, qscheme=torch.per_tensor_affine)
(proj_drop): Dropout(p=0, inplace=False)
(softmax): Softmax(dim=-1)
)
(drop): DropPath()
)
(norm2): QuantizedLayerNorm((768,), eps=1e-05, elementwise_affine=True)
(ffn): FFN( // <------- HERE (children not quantized)
(activate): GELU()
(layers): Sequential(
(0): Sequential(
(0): Linear(in_features=768, out_features=3072, bias=True)
(1): GELU()
(2): Dropout(p=0, inplace=False)
)
(1): Linear(in_features=3072, out_features=768, bias=True)
(2): Dropout(p=0, inplace=False)
)
(dropout_layer): DropPath()
)
)
I am referring to the FFN submodule, where nothing is quantized. However, it contains linear layers, which ought to pose no problems for quantization.
Here's how FFN is added to the module:
_ffn_cfgs = {
'embed_dims': embed_dims,
'feedforward_channels': int(embed_dims * ffn_ratio),
'num_fcs': 2,
'ffn_drop': 0,
'dropout_layer': dict(type='DropPath', drop_prob=drop_path),
'act_cfg': dict(type='GELU'),
**ffn_cfgs
}
self.norm2 = build_norm_layer(norm_cfg, embed_dims)[1]
self.ffn = FFN(**_ffn_cfgs)
Here's the source code for FFN:
@FEEDFORWARD_NETWORK.register_module()
class FFN(BaseModule):
"""Implements feed-forward networks (FFNs) with identity connection.
Args:
embed_dims (int): The feature dimension. Same as
`MultiheadAttention`. Defaults: 256.
feedforward_channels (int): The hidden dimension of FFNs.
Defaults: 1024.
num_fcs (int, optional): The number of fully-connected layers in
FFNs. Default: 2.
act_cfg (dict, optional): The activation config for FFNs.
Default: dict(type='ReLU')
ffn_drop (float, optional): Probability of an element to be
zeroed in FFN. Default 0.0.
add_identity (bool, optional): Whether to add the
identity connection. Default: `True`.
dropout_layer (obj:`ConfigDict`): The dropout_layer used
when adding the shortcut.
init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
Default: None.
"""
@deprecated_api_warning(
{
'dropout': 'ffn_drop',
'add_residual': 'add_identity'
},
cls_name='FFN')
def __init__(self,
embed_dims=256,
feedforward_channels=1024,
num_fcs=2,
act_cfg=dict(type='ReLU', inplace=True),
ffn_drop=0.,
dropout_layer=None,
add_identity=True,
init_cfg=None,
**kwargs):
super().__init__(init_cfg)
assert num_fcs >= 2, 'num_fcs should be no less ' \
f'than 2. got {num_fcs}.'
self.embed_dims = embed_dims
self.feedforward_channels = feedforward_channels
self.num_fcs = num_fcs
self.act_cfg = act_cfg
self.activate = build_activation_layer(act_cfg)
layers = []
in_channels = embed_dims
for _ in range(num_fcs - 1):
layers.append(
Sequential(
Linear(in_channels, feedforward_channels), self.activate,
nn.Dropout(ffn_drop)))
in_channels = feedforward_channels
layers.append(Linear(feedforward_channels, embed_dims))
layers.append(nn.Dropout(ffn_drop))
self.layers = Sequential(*layers)
self.dropout_layer = build_dropout(
dropout_layer) if dropout_layer else torch.nn.Identity()
self.add_identity = add_identity
@deprecated_api_warning({'residual': 'identity'}, cls_name='FFN')
def forward(self, x, identity=None):
"""Forward function for `FFN`.
The function would add x to the output tensor if residue is None.
"""
out = self.layers(x)
if not self.add_identity:
return self.dropout_layer(out)
if identity is None:
identity = x
return identity + self.dropout_layer(out)
The problem is very silly:
Linearin this case referred to an mmcv wrapper class fornn.Linear. Quantizing the wrapper class is not supported.By the looks of it (since I'm using PyTorch 1.8.1) this can be easily remedied by modifying the
FFNclass to usenn.Linear.