Hi team,
Recently I’m involved in a research project to apply Concrete ML to a neural network model. When I tried to do Post-Training Quantization, a ValueError occured:
ValueError: matmul: Input operand 0 does not have enough dimensions (has 0, gufunc core with signature (n?,k),(k,m?)->(n?,m?) requires 1)
The traceback of the error is:
---> 13 quantized_model = compile_onnx_model(onnx_model, input_set, n_bits)
File ~/.local/lib/python3.10/site-packages/concrete/ml/torch/compile.py:371, in compile_onnx_model(onnx_model, torch_inputset, import_qat, configuration, artifacts, show_mlir, n_bits, rounding_threshold_bits, p_error, global_p_error, verbose, inputs_encryption_status, reduce_sum_copy)
364 onnx_model_opset_version = get_onnx_opset_version(onnx_model)
365 assert_true(
366 onnx_model_opset_version == OPSET_VERSION_FOR_ONNX_EXPORT,
367 f"ONNX version must be {OPSET_VERSION_FOR_ONNX_EXPORT} "
368 f"but it is {onnx_model_opset_version}",
369 )
--> 371 return _compile_torch_or_onnx_model(
372 onnx_model,
373 torch_inputset,
374 import_qat,
375 configuration=configuration,
376 artifacts=artifacts,
377 show_mlir=show_mlir,
378 n_bits=n_bits,
379 rounding_threshold_bits=rounding_threshold_bits,
380 p_error=p_error,
381 global_p_error=global_p_error,
382 verbose=verbose,
383 inputs_encryption_status=inputs_encryption_status,
384 reduce_sum_copy=reduce_sum_copy,
385 )
File ~/.local/lib/python3.10/site-packages/concrete/ml/torch/compile.py:187, in _compile_torch_or_onnx_model(model, torch_inputset, import_qat, configuration, artifacts, show_mlir, n_bits, rounding_threshold_bits, p_error, global_p_error, verbose, inputs_encryption_status, reduce_sum_copy)
182 inputset_as_numpy_tuple = tuple(
183 convert_torch_tensor_or_numpy_array_to_numpy_array(val) for val in to_tuple(torch_inputset)
184 )
186 # Build the quantized module
--> 187 quantized_module = build_quantized_module(
188 model=model,
189 torch_inputset=inputset_as_numpy_tuple,
190 import_qat=import_qat,
191 n_bits=n_bits,
192 rounding_threshold_bits=rounding_threshold_bits,
193 reduce_sum_copy=reduce_sum_copy,
194 )
196 # Check that p_error or global_p_error is not set in both the configuration and in the direct
197 # parameters
198 check_there_is_no_p_error_options_in_configuration(configuration)
File ~/.local/lib/python3.10/site-packages/concrete/ml/torch/compile.py:122, in build_quantized_module(model, torch_inputset, import_qat, n_bits, rounding_threshold_bits, reduce_sum_copy)
116 post_training_quant = post_training(n_bits, numpy_model, rounding_threshold_bits)
118 # Build the quantized module
119 # FIXME: mismatch here. We traced with dummy_input_for_tracing which made some operator
120 # only work over shape of (1, ., .). For example, some reshape have newshape hardcoded based
121 # on the inputset we sent in the NumpyModule.
--> 122 quantized_module = post_training_quant.quantize_module(*inputset_as_numpy_tuple)
123 # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4127
124 if reduce_sum_copy:
File ~/.local/lib/python3.10/site-packages/concrete/ml/quantization/post_training.py:602, in ONNXConverter.quantize_module(self, *calibration_data)
599 # First transform all parameters to their quantized version
600 self._quantize_params()
--> 602 self._quantize_layers(*calibration_data)
604 # Create quantized module from self.quant_layers_dict
605 quantized_module = QuantizedModule(
606 ordered_module_input_names=(
607 graph_input.name for graph_input in self.numpy_model.onnx_model.graph.input
(...)
613 onnx_model=self.numpy_model.onnx_model,
614 )
File ~/.local/lib/python3.10/site-packages/concrete/ml/quantization/post_training.py:529, in ONNXConverter._quantize_layers(self, *input_calibration_data)
520 self.quant_ops_dict[output_name] = (
521 tuple(variable_input_names),
522 quantized_op_instance,
523 )
525 layer_quant = list(
526 node_override_quantizer.get(input_name, None)
527 for input_name in variable_input_names
528 )
--> 529 output_calibration_data, layer_quantizer = self._process_layer(
530 quantized_op_instance, *curr_calibration_data, quantizers=layer_quant
531 )
532 node_results[output_name] = output_calibration_data
533 node_override_quantizer[output_name] = layer_quantizer
File ~/.local/lib/python3.10/site-packages/concrete/ml/quantization/post_training.py:900, in PostTrainingQATImporter._process_layer(self, quantized_op, quantizers, *calibration_data)
879 def _process_layer(
880 self,
881 quantized_op: QuantizedOp,
882 *calibration_data: numpy.ndarray,
883 quantizers: List[Optional[UniformQuantizer]],
884 ) -> Tuple[numpy.ndarray, Optional[UniformQuantizer]]:
885 """Configure a graph operation by calibrating it for Quantization Aware Training.
886
887 Args:
(...)
897 numpy.ndarray: calibration data for the following operators
898 """
--> 900 return self._calibrate_layers_activation(
901 False, quantized_op, *calibration_data, quantizers=quantizers
902 )
File ~/.local/lib/python3.10/site-packages/concrete/ml/quantization/post_training.py:224, in ONNXConverter._calibrate_layers_activation(self, calibrate_quantized, quantized_op, quantizers, *calibration_data)
208 """Calibrate the QuantizedOp with the previous layer's output calibration data.
209
210 Args:
(...)
221 numpy.ndarray: the output of the newly calibrated layer.
222 """
223 # Calibrate the output of the layer
--> 224 raw_result = quantized_op.calibrate(*calibration_data)
226 # Some operators need to quantize their inputs using model_outputs instead of op_inputs in
227 # order to reduce the impact of quantization.
228 if quantized_op.quantize_inputs_with_model_outputs_precision:
File ~/.local/lib/python3.10/site-packages/concrete/ml/quantization/base_quantized_op.py:714, in QuantizedOp.calibrate(self, *inputs)
708 # Here we need the actual values of the constants, we need to pass through
709 # the numpy.ndarrays in the computation graph
710 prepared_inputs = self._prepare_inputs_with_constants(
711 *inputs, calibrate=True, quantize_actual_values=False
712 )
--> 714 raw_result = self.call_impl(*prepared_inputs, **self.attrs)
715 if isinstance(raw_result, RawOpOutput):
716 return raw_result
File ~/.local/lib/python3.10/site-packages/concrete/ml/quantization/base_quantized_op.py:773, in QuantizedOp.call_impl(self, *inputs, **attrs)
771 else:
772 impl_func = self.impl.__func__ # type: ignore
--> 773 outputs = impl_func(*inputs) if not self._has_attr else impl_func(*inputs, **attrs)
774 assert_true(
775 isinstance(outputs, tuple),
776 f"The output of {impl_func.__name__} needs to be a tuple. Got {outputs}",
777 )
778 num_outputs = len(outputs)
File ~/.local/lib/python3.10/site-packages/concrete/ml/onnx/ops_impl.py:291, in numpy_gemm(a, b, c, alpha, beta, transA, transB)
288 b_prime = numpy.transpose(b) if transB else b
289 c_prime: Union[numpy.ndarray, float] = c if c is not None else 0
--> 291 y = processed_alpha * numpy.matmul(a_prime, b_prime) + processed_beta * c_prime
293 return (y,)
ValueError: matmul: Input operand 0 does not have enough dimensions (has 0, gufunc core with signature (n?,k),(k,m?)->(n?,m?) requires 1)
I’m struggling to debug the issue and would genuinely appreciate some help.
If you need any further information please let me know.
Thank you!