Hello,
I am trying to implement an XGBoostClassifier on some embedded text data.
here is my code.
# my train and test data are numpy arrays with the shapes:
x_train has shape: (53783, 768)
y_train has shape: (53783,)
x_test has shape: (13446, 768)
y_test has shape: (13446,)
from concrete.ml.sklearn import XGBClassifier as ConcreteXGBClassifier
# model
enc_model = ConcreteXGBClassifier( max_depth = 5, n_estimators=120 , n_bits=6, random_state=15).fit(X_train, y_train)
# A circuit needs to be compiled to enable FHE execution
circuit = enc_model.compile(X_train)
y_pred_enc = enc_model.predict(X_test, fhe="execute")
# this line gives an error,
----> 5 y_pred_enc = enc_model.predict(X_test, fhe="execute")
TypeError: predict() got an unexpected keyword argument 'fhe'
So I am doing it the lengthy way:
# Compile the model on a representative set
fhe_circuit = enc_model.compile(X_train)
# Generate the keys (set force to True in order to generate new keys at each execution)
fhe_circuit.keygen(force=True)
y_pred_fhe_step = []
for f_input in X_test:
# Quantize an input (float)
q_input = enc_model.quantize_input([f_input])
# Encrypt the input
q_input_enc = fhe_circuit.encrypt(q_input)
# Execute the linear product in FHE
q_y_enc = fhe_circuit.run(q_input_enc)
# Decrypt the result (integer)
q_y = fhe_circuit.decrypt(q_y_enc)
# De-quantize the result
y = enc_model.dequantize_output(q_y)
# Apply either the sigmoid if it is a binary classification task, which is the case in this
# example, or a softmax function in order to get the probabilities (in the clear)
y_proba = enc_model.post_processing(y)
# Since this model does classification, apply the argmax to get the class predictions (in the clear)
# Note that regression models won't need the following line
y_class = np.argmax(y_proba, axis=1)
y_pred_fhe_step += list(y_class)
y_pred_fhe_step = np.array(y_pred_fhe_step)
print("Predictions in clear:", y_pred)
print("Predictions in FHE :", y_pred_fhe_step)
print(f"Similarity: {int((y_pred_fhe_step == y_pred).mean()*100)}%")
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-21-5ac661ca2f61> in <module>
11 for f_input in X_test:
12 # Quantize an input (float)
---> 13 q_input = enc_model.quantize_input([f_input])
14
15 # Encrypt the input
/opt/conda/lib/python3.7/site-packages/concrete/ml/sklearn/base.py in quantize_input(self, X)
606 # Quantize using the learned quantization parameters for each feature
607 for i, q_x_ in enumerate(self.input_quantizers):
--> 608 qX[:, i] = q_x_.quant(X[:, i])
609 return qX.astype(numpy.int64)
610
TypeError: list indices must be integers or slices, not tuple
Why am I getting this error?