I am trying to quantize a fine tuned model to 8-bit, using the optimum.intel library, i have used post-training quantization like this
from optimum.intel import OVModelForTokenClassification, OVWeightQuantizationConfig
model_id = "/kaggle/input/open-vinno-format/ov_model"
model = OVModelForTokenClassification.from_pretrained(model_id, load_in_8bit=True)
model.save_pretrained("ov_8-bit_model")
when i try to run inference on cpu using the OVTrainer, OVTrainingArguments i get this error
Inference Code (colab notebook) : - Google Colab
INFO:nncf:Statistics of the bitwidth distribution:
+--------------+---------------------------+-----------------------------------+
| Num bits (N) | % all parameters (layers) | % ratio-defining parameters |
| | | (layers) |
+==============+===========================+===================================+
+--------------+---------------------------+-----------------------------------+
Compiling the model to CPU ...
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<timed exec> in <module>
<ipython-input-18-ba61a4fc5431> in process_dataset(self, ds, tokenizer, CFG)
77 })
78 # Predict for the split
---> 79 pred = trainer.predict(x).predictions
80 # Remove stride and additional CLS & SEP tokens
81 row_preds.append(self.backwards_map_preds(pred, len(row["offset_mapping"]), CFG))
3 frames
/usr/local/lib/python3.10/dist-packages/transformers/trainer.py in predict(self, test_dataset, ignore_keys, metric_key_prefix)
3303
3304 eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
-> 3305 output = eval_loop(
3306 test_dataloader, description="Prediction", ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix
3307 )
/usr/local/lib/python3.10/dist-packages/transformers/trainer.py in evaluation_loop(self, dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix)
3350 self.accelerator.prepare(model)
3351 if self.is_deepspeed_enabled
-> 3352 else self.accelerator.prepare_model(model, evaluation_mode=True)
3353 )
3354
/usr/local/lib/python3.10/dist-packages/accelerate/accelerator.py in prepare_model(self, model, device_placement, evaluation_mode)
1327 # TODO: Look at enabling native TP training directly with a proper config
1328 if (
-> 1329 self.verify_device_map(model)
1330 and self.distributed_type != DistributedType.NO
1331 and os.environ.get("ACCELERATE_BYPASS_DEVICE_MAP", "false") != "true"
/usr/local/lib/python3.10/dist-packages/accelerate/accelerator.py in verify_device_map(self, model)
3253 """
3254 # Checks if any of the child modules has the attribute `hf_device_map` and this map has more than one entry.
-> 3255 for m in model.modules():
3256 if hasattr(m, "hf_device_map") and len(m.hf_device_map) > 1:
3257 return True
AttributeError: 'OVModelForTokenClassification' object has no attribute 'modules