I'm quantizing the Swin transformer (static PTQ) using the following function:
def static_quantize(m, data_loader):
    backend = 'qnnpack'
    torch.backends.quantized.engine = backend
    m.eval()
    m.qconfig = torch.quantization.get_default_qconfig(backend)
    torch.quantization.prepare(m, inplace=True)
    with torch.no_grad():
        for i, data in enumerate(data_loader):
            if i >= 100:
                break
            result = m(return_loss=False, **data)
        
    torch.quantization.convert(m, inplace=True)
    return m
Most modules, including linear layers, do get quantized. However some linear layers of a SwinBlock are skipped, as you can see here:
(3): SwinBlockSequence(
  (blocks): ModuleList(
    (0): SwinBlock(
      (quant): Quantize(scale=tensor([0.3938]), zero_point=tensor([122]), dtype=torch.quint8)
      (dequant): DeQuantize()
      (norm1): QuantizedLayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (attn): ShiftWindowMSA(
        (w_msa): WindowMSA(
          (quant): Quantize(scale=tensor([0.0294]), zero_point=tensor([155]), dtype=torch.quint8)
          (dequant): DeQuantize()
          (qkv): QuantizedLinear(in_features=768, out_features=2304, scale=0.039033032953739166, zero_point=133, qscheme=torch.per_tensor_affine)
          (attn_drop): Dropout(p=0, inplace=False)
          (proj): QuantizedLinear(in_features=768, out_features=768, scale=0.0369536317884922, zero_point=110, qscheme=torch.per_tensor_affine)
          (proj_drop): Dropout(p=0, inplace=False)
          (softmax): Softmax(dim=-1)
        )
        (drop): DropPath()
      )
      (norm2): QuantizedLayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (ffn): FFN( // <------- HERE (children not quantized)
        (activate): GELU()
        (layers): Sequential(
          (0): Sequential(
            (0): Linear(in_features=768, out_features=3072, bias=True)
            (1): GELU()
            (2): Dropout(p=0, inplace=False)
          )
          (1): Linear(in_features=3072, out_features=768, bias=True)
          (2): Dropout(p=0, inplace=False)
        )
        (dropout_layer): DropPath()
      )
    )
I am referring to the FFN submodule, where nothing is quantized. However, it contains linear layers, which ought to pose no problems for quantization.
Here's how FFN is added to the module:
        _ffn_cfgs = {
            'embed_dims': embed_dims,
            'feedforward_channels': int(embed_dims * ffn_ratio),
            'num_fcs': 2,
            'ffn_drop': 0,
            'dropout_layer': dict(type='DropPath', drop_prob=drop_path),
            'act_cfg': dict(type='GELU'),
            **ffn_cfgs
        }
        self.norm2 = build_norm_layer(norm_cfg, embed_dims)[1]
        self.ffn = FFN(**_ffn_cfgs)
Here's the source code for FFN:
@FEEDFORWARD_NETWORK.register_module()
class FFN(BaseModule):
    """Implements feed-forward networks (FFNs) with identity connection.
    Args:
        embed_dims (int): The feature dimension. Same as
            `MultiheadAttention`. Defaults: 256.
        feedforward_channels (int): The hidden dimension of FFNs.
            Defaults: 1024.
        num_fcs (int, optional): The number of fully-connected layers in
            FFNs. Default: 2.
        act_cfg (dict, optional): The activation config for FFNs.
            Default: dict(type='ReLU')
        ffn_drop (float, optional): Probability of an element to be
            zeroed in FFN. Default 0.0.
        add_identity (bool, optional): Whether to add the
            identity connection. Default: `True`.
        dropout_layer (obj:`ConfigDict`): The dropout_layer used
            when adding the shortcut.
        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
            Default: None.
    """
    @deprecated_api_warning(
        {
            'dropout': 'ffn_drop',
            'add_residual': 'add_identity'
        },
        cls_name='FFN')
    def __init__(self,
                 embed_dims=256,
                 feedforward_channels=1024,
                 num_fcs=2,
                 act_cfg=dict(type='ReLU', inplace=True),
                 ffn_drop=0.,
                 dropout_layer=None,
                 add_identity=True,
                 init_cfg=None,
                 **kwargs):
        super().__init__(init_cfg)
        assert num_fcs >= 2, 'num_fcs should be no less ' \
            f'than 2. got {num_fcs}.'
        self.embed_dims = embed_dims
        self.feedforward_channels = feedforward_channels
        self.num_fcs = num_fcs
        self.act_cfg = act_cfg
        self.activate = build_activation_layer(act_cfg)
        layers = []
        in_channels = embed_dims
        for _ in range(num_fcs - 1):
            layers.append(
                Sequential(
                    Linear(in_channels, feedforward_channels), self.activate,
                    nn.Dropout(ffn_drop)))
            in_channels = feedforward_channels
        layers.append(Linear(feedforward_channels, embed_dims))
        layers.append(nn.Dropout(ffn_drop))
        self.layers = Sequential(*layers)
        self.dropout_layer = build_dropout(
            dropout_layer) if dropout_layer else torch.nn.Identity()
        self.add_identity = add_identity
    @deprecated_api_warning({'residual': 'identity'}, cls_name='FFN')
    def forward(self, x, identity=None):
        """Forward function for `FFN`.
        The function would add x to the output tensor if residue is None.
        """
        out = self.layers(x)
        if not self.add_identity:
            return self.dropout_layer(out)
        if identity is None:
            identity = x
        return identity + self.dropout_layer(out)
 
                        
The problem is very silly:
Linearin this case referred to an mmcv wrapper class fornn.Linear. Quantizing the wrapper class is not supported.By the looks of it (since I'm using PyTorch 1.8.1) this can be easily remedied by modifying the
FFNclass to usenn.Linear.