Add aimet_torch.onnx.export API reference

Lee, Kyunggeun · quic-kyunggeu · GitHub Enterprise · commit 661a107a2589 · 2025-05-08T18:35:16.000-07:00
Signed-off-by: Kyunggeun Lee &lt;quic_kyunggeu@quicinc.com&gt;
Co-authored-by: Kyunggeun Lee &lt;quic_kyunggeu@quicinc.com&gt;
diff --git a/Docs/apiref/torch/index.rst b/Docs/apiref/torch/index.rst
@@ -9,9 +9,10 @@ aimet_torch API
 
     Migrate to aimet_torch 2 <migration_guide>
     aimet_torch.quantsim <quantsim>
-    aimet_torch.adaround <adaround>
     aimet_torch.nn <nn>
     aimet_torch.quantization <quantization>
+    aimet_torch.onnx <onnx>
+    aimet_torch.adaround <adaround>
     aimet_torch.seq_mse <seq_mse>
     aimet_torch.adascale <adascale>
     aimet_torch.quantsim.config_utils <lpbq>
@@ -42,6 +43,7 @@ aimet_torch
 - :ref:`aimet_torch.quantsim <apiref-torch-quantsim>`
 - :ref:`aimet_torch.nn <apiref-torch-nn>`
 - :ref:`aimet_torch.quantization <apiref-torch-quantization>`
+- :ref:`aimet_torch.onnx (beta) <apiref-torch-onnx>`
 - :ref:`aimet_torch.adaround <apiref-torch-adaround>`
 - :ref:`aimet_torch.seq_mse <apiref-torch-seq-mse>`
 - :ref:`aimet_torch.adascale <apiref-torch-adascale>`
diff --git a/TrainingExtensions/torch/src/python/aimet_torch/onnx.py b/TrainingExtensions/torch/src/python/aimet_torch/onnx.py
@@ -72,10 +72,47 @@ def export(model: Union[torch.nn.Module, QuantizationSimModel],
            export_int32_bias: bool = True,
            **kwargs):
     """
-    Export QuantizationSimModel to onnx model with
-    QuantizeLinear/DequantizeLinear embedded in the graph.
-
-    This function takes set of same arguments as torch.onnx.export()
+    Export :class:`QuantizationSimModel` to onnx model with
+    onnx `QuantizeLinear`_ and `DequantizeLinear`_ embedded in the graph.
+
+    This function takes set of same arguments as `torch.onnx.export()`_
+
+    Args:
+        model: The model to be exported
+        args: Same as `torch.onnx.export()`
+        f: Same as `torch.onnx.export()`
+        export_int32_bias (bool, optional):
+            If true, generate and export int32 bias encoding on the fly (default: `True`)
+        **kwargs: Same as `torch.onnx.export()`
+
+
+    .. note::
+        Unlike `torch.onnx.export()`, this function allows up to opset 21.
+        to support 4/16-bit quantization only available in opset 21.
+        However, exporting to opset 21 is a beta feature and not fully stable yet.
+        For robustness, opset 20 or lower is recommended whenever possible.
+
+    .. note::
+        Dynamo-based export (`dynamo=True`) is not supported yet
+
+    .. _torch.onnx.export(): https://docs.pytorch.org/docs/stable/onnx_torchscript.html#torch.onnx.export
+    .. _QuantizeLinear: https://onnx.ai/onnx/operators/onnx__QuantizeLinear.html
+    .. _DequantizeLinear: https://onnx.ai/onnx/operators/onnx__DequantizeLinear.html
+
+    Examples:
+
+        >>> aimet_torch.onnx.export(sim.model, x, f="model.onnx",
+        ...                         input_names=["input"], output_names=["output"],
+        ...                         opset_version=21, export_int32_bias=True)
+        ...
+        >>> import onnxruntime as ort
+        >>> options = ort.SessionOptions()
+        >>> options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
+        >>> sess = ort.InferenceSession("model.onnx", sess_options=options)
+        >>> onnx_output, = sess.run(None, {"input": x.detach().numpy()})
+        >>> torch.nn.functional.cosine_similarity(torch.from_numpy(onnx_output), sim.model(x))
+        tensor([1.0000, 0.9999, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
+               grad_fn=<AliasBackward0>)
     """
     if isinstance(model, QuantizationSimModel):
         model = model.model