CUDA OOM when export a large model to ONNX

I tried it but still got oom error print("\nStep 2: Preparing and running the ONNX export...") try: # The zsh: killederror is a classic Out-Of-Memory (OOM) error from the OS. # Your insight abouttorch.inference_mode()(you mentionedtorch.reference_made()`,

# by disabling all gradient calculations during loading and exporting.
with torch.inference_mode():
    # --- Pre-load check and fix for empty index files ---
    index_path = Path(pytorch_model_path) / "model.safetensors.index.json"
    if index_path.exists() and index_path.stat().st_size == 0:
        print(f"⚠️  Found an empty index file at: {index_path}")
        print("   This can cause loading errors. Removing it to proceed.")
        os.remove(index_path)
        print("   âś… Empty index file removed.")

    # --- MPS DEBUGGING ---
    # Forcing CPU to bypass any MPS-specific bugs.
    device = "cpu"
    print(f"Using device: {device}")

    # Load the model and config ONCE to have better control over memory.
    print("Loading model and config from disk...")
    main_config = AutoConfig.from_pretrained(pytorch_model_path, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        pytorch_model_path,
        config=main_config,
        trust_remote_code=True,
    ).to(device)
    print("Model loaded.")

    custom_onnx_configs = {
        "decoder_model": CustomGemma3NMultimodalOnnxConfig(config=main_config, task="text-generation", use_past=False),
        "decoder_with_past_model": CustomGemma3NMultimodalOnnxConfig(config=main_config, task="text-generation", use_past=True),
    }
    # Use the more direct `onnx_export_from_model` which takes a pre-loaded model object.
    onnx_export_from_model(
         model=model,
         output=Path(onnx_output_path),
         task="text-generation-with-past",
         custom_onnx_configs=custom_onnx_configs,
         fn_get_submodels=get_submodels,
         opset=14,
         do_validation=False,
         device=device,
     )
    print("\nâś… ONNX conversion process completed successfully!")
    print(f"   The exported model is saved in: {Path(onnx_output_path).resolve()}")

except Exception:
print(f"\n❌ An error occurred during the ONNX conversion process.“); print(”— FULL TRACEBACK —“); traceback.print_exc(); print(”— END OF TRACEBACK —")`

1 Like