Skip to content

Commit 770d1a6

Browse files
michaelgtuttlequic-mtuttle
authored andcommitted
Pass providers directly in aimet-onnx QuantSim init
Signed-off-by: Michael Tuttle <quic_mtuttle@quicinc.com> Co-authored-by: Michael Tuttle <quic_mtuttle@quicinc.com>
1 parent 27a52e0 commit 770d1a6

File tree

11 files changed

+145
-74
lines changed

11 files changed

+145
-74
lines changed

Examples/onnx/quantization/AMP.ipynb

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -235,10 +235,8 @@
235235
"# cudnn_conv_algo_search is fixing it to default to avoid changing in accuracies/outputs at every inference\n",
236236
"if 'CUDAExecutionProvider' in ort.get_available_providers():\n",
237237
" providers = [('CUDAExecutionProvider', {'cudnn_conv_algo_search': 'DEFAULT'}), 'CPUExecutionProvider']\n",
238-
" use_cuda = True\n",
239238
"else:\n",
240-
" providers = ['CPUExecutionProvider']\n",
241-
" use_cuda = False"
239+
" providers = ['CPUExecutionProvider']"
242240
]
243241
},
244242
{
@@ -341,7 +339,7 @@
341339
" quant_scheme=QuantScheme.post_training_tf_enhanced,\n",
342340
" default_activation_bw=8,\n",
343341
" default_param_bw=8,\n",
344-
" use_cuda=use_cuda)"
342+
" providers=providers)"
345343
]
346344
},
347345
{

Examples/onnx/quantization/adaround.ipynb

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -221,10 +221,8 @@
221221
"# cudnn_conv_algo_search is fixing it to default to avoid changing in accuracies/outputs at every inference\n",
222222
"if 'CUDAExecutionProvider' in ort.get_available_providers():\n",
223223
" providers = [('CUDAExecutionProvider', {'cudnn_conv_algo_search': 'DEFAULT'}), 'CPUExecutionProvider']\n",
224-
" use_cuda = True\n",
225224
"else:\n",
226-
" providers = ['CPUExecutionProvider']\n",
227-
" use_cuda = False"
225+
" providers = ['CPUExecutionProvider']"
228226
]
229227
},
230228
{
@@ -306,7 +304,7 @@
306304
" quant_scheme=QuantScheme.post_training_tf_enhanced,\n",
307305
" default_activation_bw=8,\n",
308306
" default_param_bw=8,\n",
309-
" use_cuda=use_cuda)"
307+
" providers=providers)"
310308
]
311309
},
312310
{
@@ -485,7 +483,7 @@
485483
" quant_scheme=QuantScheme.post_training_tf_enhanced,\n",
486484
" default_activation_bw=8,\n",
487485
" default_param_bw=8,\n",
488-
" use_cuda=use_cuda)\n",
486+
" providers=providers)\n",
489487
"\n",
490488
"sim.set_and_freeze_param_encodings(encoding_path=os.path.join(\"output\", 'adaround.encodings'))\n",
491489
"\n",

Examples/onnx/quantization/cle.ipynb

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -256,10 +256,8 @@
256256
"# cudnn_conv_algo_search is fixing it to default to avoid changing in accuracies/outputs at every inference\n",
257257
"if 'CUDAExecutionProvider' in ort.get_available_providers():\n",
258258
" providers = [('CUDAExecutionProvider', {'cudnn_conv_algo_search': 'DEFAULT'}), 'CPUExecutionProvider']\n",
259-
" use_cuda = True\n",
260259
"else:\n",
261-
" providers = ['CPUExecutionProvider']\n",
262-
" use_cuda = False"
260+
" providers = ['CPUExecutionProvider']"
263261
]
264262
},
265263
{
@@ -355,7 +353,7 @@
355353
" quant_scheme=QuantScheme.post_training_tf_enhanced,\n",
356354
" default_activation_bw=8,\n",
357355
" default_param_bw=8,\n",
358-
" use_cuda=use_cuda)"
356+
" providers=providers)"
359357
]
360358
},
361359
{
@@ -556,7 +554,7 @@
556554
" quant_scheme=QuantScheme.post_training_tf_enhanced,\n",
557555
" default_activation_bw=8,\n",
558556
" default_param_bw=8,\n",
559-
" use_cuda=use_cuda)\n",
557+
" providers=providers)\n",
560558
"\n",
561559
"sim.compute_encodings(forward_pass_callback=pass_calibration_data,\n",
562560
" forward_pass_callback_args=1000)\n",

Examples/onnx/quantization/quantsim.ipynb

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -248,11 +248,9 @@
248248
"source": [
249249
"# cudnn_conv_algo_search is fixing it to default to avoid changing in accuracies/outputs at every inference\n",
250250
"if 'CUDAExecutionProvider' in ort.get_available_providers():\n",
251-
" providers = [('CUDAExecutionProvider', {'cudnn_conv_algo_search': 'DEFAULT'}), 'CPUExecutionProvider']\n",
252-
" use_cuda = True\n",
251+
" providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']\n",
253252
"else:\n",
254-
" providers = ['CPUExecutionProvider']\n",
255-
" use_cuda = False"
253+
" providers = ['CPUExecutionProvider']"
256254
]
257255
},
258256
{
@@ -353,7 +351,7 @@
353351
" quant_scheme=QuantScheme.post_training_tf_enhanced,\n",
354352
" default_activation_bw=8,\n",
355353
" default_param_bw=8,\n",
356-
" use_cuda=use_cuda)"
354+
" providers=providers)"
357355
]
358356
},
359357
{

TrainingExtensions/onnx/src/python/aimet_onnx/quantsim.py

Lines changed: 64 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
import tempfile
4242
from pathlib import Path
4343
import os
44-
from typing import Any, Callable, Dict, List, Optional, overload, Tuple, TypeVar, Union
44+
from typing import Any, Callable, Dict, List, Optional, overload, Tuple, TypeVar, Union, Sequence
4545
import itertools
4646
import json
4747
import warnings
@@ -160,39 +160,92 @@ class QuantizationSimModel:
160160
:param model: ONNX model
161161
:param dummy_input: Dummy input to the model. If None, will attempt to auto-generate a dummy input
162162
:param quant_scheme: Quantization scheme (e.g. QuantScheme.post_training_tf)
163-
:param rounding_mode: Rounding mode (e.g. nearest)
163+
:param rounding_mode: Deprecated
164164
:param default_param_bw: Quantization bitwidth for parameter
165165
:param default_activation_bw: Quantization bitwidth for activation
166-
:param use_symmetric_encodings: True if symmetric encoding is used. False otherwise.
167-
:param use_cuda: True if using CUDA to run quantization op. False otherwise.
166+
:param use_symmetric_encodings: Deprecated, symmetry is controlled by the config_file
167+
:param use_cuda: Deprecated, use `providers` instead
168168
:param config_file: File path or alias of the configuration file.
169169
Alias can be one of {{ {', '.join(_config_file_aliases.keys())} }} (Default: `"default"`)
170170
:param default_data_type: Default data type to use for quantizing all layer inputs, outputs and parameters.
171171
Possible options are QuantizationDataType.int and QuantizationDataType.float.
172172
Note that the mode default_data_type=QuantizationDataType.float is only supported with
173173
default_output_bw=16 and default_param_bw=16
174174
:param user_onnx_libs: List of paths to all compiled ONNX custom ops libraries
175+
:param providers: Onnxruntime execution providers to use when building InferenceSession.
176+
If `None`, falls back to `onnxruntime.get_available_providers()`
175177
:param path: Directory to save the artifacts.
176178
"""
177179

178180
def __init__(self,
179181
model: Union[ModelProto, ONNXModel],
180-
dummy_input: Dict[str, np.ndarray] = None,
182+
dummy_input: Optional[Dict[str, np.ndarray]] = None,
181183
quant_scheme: QuantScheme = QuantScheme.min_max,
182-
rounding_mode: str = 'nearest',
184+
rounding_mode: str = None, # Deprecated
183185
default_param_bw: int = 8,
184186
default_activation_bw: int = 8,
185-
use_symmetric_encodings: bool = False, use_cuda: bool = True,
186-
device: int = 0, config_file: str = None,
187+
use_symmetric_encodings: bool = None, # Deprecated
188+
use_cuda: bool = None, # Deprecated
189+
device: int = None, # Deprecated
190+
config_file: Optional[str] = None,
187191
default_data_type: QuantizationDataType = QuantizationDataType.int,
188-
user_onnx_libs: List[str] = None, path: str = None):
192+
user_onnx_libs: List[str] = None,
193+
providers: Optional[Sequence[str | Tuple[str, Dict[Any, Any]]]] = None,
194+
path: Optional[str] = None):
195+
# pylint: disable = too-many-branches, too-many-statements
196+
if rounding_mode is not None:
197+
if rounding_mode == 'nearest':
198+
warnings.warn(_red("Passing rounding_mode='nearest' is no longer needed " \
199+
"and will be deprecated soon in the later versions."),
200+
DeprecationWarning, stacklevel=2)
201+
else:
202+
raise TypeError("'rounding_mode' parameter is no longer supported.")
203+
204+
if use_symmetric_encodings is not None:
205+
warnings.warn(_red("Passing `use_symmetric_encodings` is not needed and will be deprecated in later versions."),
206+
DeprecationWarning, stacklevel=2)
207+
208+
if device is not None:
209+
warnings.warn(_red("Passing `device` will be deprecated in later versions. " \
210+
"Please use the `providers` argument instead to specify cuda device."),
211+
DeprecationWarning, stacklevel=2)
212+
if providers is not None:
213+
raise RuntimeError("Cannot provide `device` and `providers` at the same time.")
214+
215+
if use_cuda is not None:
216+
warnings.warn(_red("Passing `use_cuda` will be deprecated in later versions. " \
217+
"Please use the `providers` argument instead."),
218+
DeprecationWarning, stacklevel=2)
219+
if providers is not None:
220+
raise RuntimeError("Cannot provide `use_cuda` and `providers` at the same time.")
221+
222+
# Legacy behavior of use_cuda
223+
if "CUDAExecutionProvider" not in ort.get_available_providers():
224+
use_cuda = False
225+
226+
device = device or 0
227+
if use_cuda:
228+
providers = [('CUDAExecutionProvider', {'device_id': device}), 'CPUExecutionProvider']
229+
else:
230+
providers = ['CPUExecutionProvider']
231+
232+
if not providers:
233+
providers = ort.get_available_providers()
234+
189235
if isinstance(quant_scheme, str):
190236
quant_scheme = QuantScheme.from_str(quant_scheme)
191237

192238
if isinstance(model, ModelProto):
193239
model = ONNXModel(model)
194240

241+
op_domain = "aimet.customop.cpu"
242+
for provider in providers:
243+
if provider == "CUDAExecutionProvider" or provider[0] == "CUDAExecutionProvider":
244+
op_domain = "aimet.customop.cuda"
245+
195246
self.model = model
247+
self._op_domain = op_domain
248+
self.providers = providers
196249

197250
if not dummy_input:
198251
dummy_input = make_dummy_input(self.model.model)
@@ -204,16 +257,6 @@ def __init__(self,
204257
self._default_param_bw = default_param_bw
205258
self._default_activation_bw = default_activation_bw
206259
self._default_quantization_data_type = default_data_type
207-
self._use_symmetric_encodings = use_symmetric_encodings
208-
self._use_cuda = use_cuda
209-
if 'CUDAExecutionProvider' not in ort.get_available_providers():
210-
self._use_cuda = False
211-
if self._use_cuda:
212-
self._op_domain = "aimet.customop.cuda"
213-
self.providers = [('CUDAExecutionProvider', {'device_id': device, 'cudnn_conv_algo_search': 'DEFAULT'}), 'CPUExecutionProvider']
214-
else:
215-
self._op_domain = "aimet.customop.cpu"
216-
self.providers = ['CPUExecutionProvider']
217260
self._user_onnx_libs = user_onnx_libs
218261
self.param_names = []
219262
self.input_quantizers_name = []
@@ -465,7 +508,7 @@ def _insert_param_quantization_nodes(self):
465508
rounding_mode=self._rounding_mode,
466509
op_mode=OpMode.oneShotQuantizeDequantize,
467510
bitwidth=self._default_param_bw,
468-
use_symmetric_encodings=self._use_symmetric_encodings,
511+
use_symmetric_encodings=False,
469512
tensor_quantizer_params=tensor_quantizer_params)
470513

471514
def _create_quant_info_object_for_param(self, param_name: str):
@@ -533,7 +576,7 @@ def _insert_activation_quantization_nodes(self):
533576
rounding_mode=self._rounding_mode,
534577
op_mode=OpMode.updateStats,
535578
bitwidth=self._default_activation_bw,
536-
use_symmetric_encodings=self._use_symmetric_encodings)
579+
use_symmetric_encodings=False)
537580

538581
@staticmethod
539582
def build_session(model: onnx.ModelProto, providers: List, user_onnx_libs: List[str] = None, path: str = None):

TrainingExtensions/onnx/test/python/test_auto_quant_v2.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -249,11 +249,16 @@ def __init__(self,
249249
rounding_mode: str = 'nearest',
250250
default_param_bw: int = 8,
251251
default_activation_bw: int = 8,
252-
use_symmetric_encodings: bool = False, use_cuda: bool = True,
253-
device: int = 0, config_file: str = None, default_data_type: QuantizationDataType = QuantizationDataType.int,
254-
user_onnx_libs: List[str] = None):
252+
use_symmetric_encodings: bool = None, # Deprecated
253+
use_cuda: bool = None, # Deprecated
254+
device: int = None, # Deprecated
255+
config_file = None,
256+
default_data_type: QuantizationDataType = QuantizationDataType.int,
257+
user_onnx_libs: List[str] = None,
258+
providers = None,
259+
path = None):
255260
super(_QuantizationSimModel, self).__init__(model, dummy_input, quant_scheme, rounding_mode, default_param_bw, default_activation_bw,
256-
use_symmetric_encodings, use_cuda, device, config_file, default_data_type, user_onnx_libs)
261+
use_symmetric_encodings, use_cuda, device, config_file, default_data_type, user_onnx_libs, providers, path)
257262

258263
self.session = {'applied_bn_folding': getattr(model, 'applied_bn_folding'),
259264
'applied_cle': getattr(model, 'applied_cle'),

TrainingExtensions/onnx/test/python/test_auto_quant_v2_with_amp.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -288,11 +288,16 @@ def __init__(self,
288288
rounding_mode: str = 'nearest',
289289
default_param_bw: int = 8,
290290
default_activation_bw: int = 8,
291-
use_symmetric_encodings: bool = False, use_cuda: bool = True,
292-
device: int = 0, config_file: str = None, default_data_type: QuantizationDataType = QuantizationDataType.int,
293-
user_onnx_libs: List[str] = None):
291+
use_symmetric_encodings: bool = None, # Deprecated
292+
use_cuda: bool = None, # Deprecated
293+
device: int = None, # Deprecated
294+
config_file = None,
295+
default_data_type: QuantizationDataType = QuantizationDataType.int,
296+
user_onnx_libs: List[str] = None,
297+
providers = None,
298+
path = None):
294299
super(_QuantizationSimModel, self).__init__(model, dummy_input, quant_scheme, rounding_mode, default_param_bw, default_activation_bw,
295-
use_symmetric_encodings, use_cuda, device, config_file, default_data_type, user_onnx_libs)
300+
use_symmetric_encodings, use_cuda, device, config_file, default_data_type, user_onnx_libs, providers, path)
296301

297302
self.session = {'applied_bn_folding': getattr(model, 'applied_bn_folding'),
298303
'applied_cle': getattr(model, 'applied_cle'),

TrainingExtensions/onnx/test/python/test_layer_output_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def get_quantsim_artifacts():
7171
def callback(session, input_dict):
7272
session.run(None, input_dict)
7373

74-
quantsim = QuantizationSimModel(model=model, dummy_input=input_dict, use_cuda=False)
74+
quantsim = QuantizationSimModel(model=model, dummy_input=input_dict, providers=["CPUExecutionProvider"])
7575
quantsim.compute_encodings(callback, input_dict)
7676

7777
output_names = [node.name for node in quantsim.model.model.graph.input]

0 commit comments

Comments
 (0)