@@ -25,16 +25,32 @@ def get_onnx_model_path(
25
25
local_model_dir : Optional [str ] = None ,
26
26
):
27
27
"""
28
- exports the model to onnx if pre-exported file is not found and returns onnx_model_path
29
-
30
- ``Mandatory`` Args:
31
- :model_name (str): Hugging Face Model Card name, Example: ``gpt2``.
32
- ``Optional`` Args:
33
- :cache_dir (str): Cache dir where downloaded HuggingFace files are stored. ``Defaults to None.``
34
- :tokenizer (Union[PreTrainedTokenizer, PreTrainedTokenizerFast]): Pass model tokenizer. ``Defaults to None.``
35
- :hf_token (str): HuggingFace login token to access private repos. ``Defaults to None.``
36
- :local_model_dir (str): Path to custom model weights and config files. ``Defaults to None.``
37
- :full_batch_size (int): Set full batch size to enable continuous batching mode. ``Defaults to None.``
28
+ Exports the PyTorch model to ONNX format if a pre-exported file is not found,
29
+ and returns the path to the ONNX model.
30
+
31
+ This function loads a Hugging Face model via QEFFCommonLoader, then calls
32
+ its export method to generate the ONNX graph.
33
+
34
+ Parameters
35
+ ----------
36
+ model_name : str
37
+ Hugging Face Model Card name (e.g., ``gpt2``).
38
+
39
+ Other Parameters
40
+ ----------------
41
+ cache_dir : str, optional
42
+ Cache directory where downloaded HuggingFace files are stored. Default is None.
43
+ hf_token : str, optional
44
+ HuggingFace login token to access private repositories. Default is None.
45
+ full_batch_size : int, optional
46
+ Sets the full batch size to enable continuous batching mode. Default is None.
47
+ local_model_dir : str, optional
48
+ Path to custom model weights and config files. Default is None.
49
+
50
+ Returns
51
+ -------
52
+ str
53
+ Path of the generated ONNX graph file.
38
54
"""
39
55
logger .info (f"Exporting Pytorch { model_name } model to ONNX..." )
40
56
@@ -58,20 +74,35 @@ def main(
58
74
full_batch_size : Optional [int ] = None ,
59
75
) -> None :
60
76
"""
61
- Helper function used by export CLI app for exporting to ONNX Model.
62
-
63
- ``Mandatory`` Args:
64
- :model_name (str): Hugging Face Model Card name, Example: ``gpt2``.
65
-
66
- ``Optional`` Args:
67
- :cache_dir (str): Cache dir where downloaded HuggingFace files are stored. ``Defaults to None.``
68
- :hf_token (str): HuggingFace login token to access private repos. ``Defaults to None.``
69
- :local_model_dir (str): Path to custom model weights and config files. ``Defaults to None.``
70
- :full_batch_size (int): Set full batch size to enable continuous batching mode. ``Defaults to None.``
77
+ Main function for the QEfficient ONNX export CLI application.
78
+
79
+ This function serves as the entry point for exporting a PyTorch model, loaded
80
+ via QEFFCommonLoader, to the ONNX format. It prepares the necessary
81
+ paths and calls `get_onnx_model_path`.
82
+
83
+ Parameters
84
+ ----------
85
+ model_name : str
86
+ Hugging Face Model Card name (e.g., ``gpt2``).
87
+
88
+ Other Parameters
89
+ ----------------
90
+ cache_dir : str, optional
91
+ Cache directory where downloaded HuggingFace files are stored. Default is None.
92
+ hf_token : str, optional
93
+ HuggingFace login token to access private repositories. Default is None.
94
+ local_model_dir : str, optional
95
+ Path to custom model weights and config files. Default is None.
96
+ full_batch_size : int, optional
97
+ Sets the full batch size to enable continuous batching mode. Default is None.
98
+
99
+ Example
100
+ -------
101
+ To export a model from the command line:
71
102
72
103
.. code-block:: bash
73
104
74
- python -m QEfficient.cloud.export OPTIONS
105
+ python -m QEfficient.cloud.export --model-name gpt2 --cache-dir /path/to/cache
75
106
76
107
"""
77
108
cache_dir = check_and_assign_cache_dir (local_model_dir , cache_dir )
0 commit comments