Source code for megengine.module.quantized.linear

import numpy as np

from ... import functional as F
from ... import module as Float
from ...core.tensor import dtype
from ...tensor import Parameter
from ..qat import linear as QAT
from .module import QuantizedModule


[docs]class Linear(QuantizedModule): r"""Quantized version of :class:`~.qat.Linear`.""" def __init__(self, dtype: np.dtype = None, **kwargs): super().__init__(**kwargs) self.weight = None self.bias = None self.output_dtype = dtype def calc_linear_quantized(self, inp, nonlinear_mode="identity"): if self.training: raise ValueError("quantized module only support inference.") assert nonlinear_mode in ["identity", "relu"] inp_scale = dtype.get_scale(inp.dtype) w_scale = dtype.get_scale(self.weight.dtype) bias_dtype = dtype.qint32(inp_scale * w_scale) ret = F.linear( inp, self.weight, None if self.bias is None else self.bias.astype(bias_dtype), ) ret = ret if self.output_dtype is None else ret.astype(self.output_dtype) if nonlinear_mode == "relu": ret = F.relu(ret) return ret def forward(self, inp): return self.calc_linear_quantized(inp)
[docs] @classmethod def from_qat_module(cls, qat_module: QAT.Linear): r""" Return a :class:`~.QuantizedModule` instance converted from a :class:`~.QATModule` instance. """ output_dtype = qat_module.get_activation_dtype() qmod = cls(dtype=output_dtype, name=qat_module.name) qmod.name = qat_module.name weight = qat_module.weight.astype(qat_module.get_weight_dtype()) qmod.weight = Parameter(weight.numpy(), name=qat_module.weight.name) if qat_module.bias is not None: qmod.bias = Parameter(qat_module.bias.numpy(), name=qat_module.bias.name) return qmod
class LinearRelu(Linear): r"""Quantized version of :class:`~.qat.LinearRelu`.""" def forward(self, inp): return self.calc_linear_quantized(inp, nonlinear_mode="relu")