diff --git a/.local/share/pytorch_pod/python-apps/ai-model.py b/.local/share/pytorch_pod/python-apps/ai-model.py index f5ce68e..3ea9b76 100755 --- a/.local/share/pytorch_pod/python-apps/ai-model.py +++ b/.local/share/pytorch_pod/python-apps/ai-model.py @@ -291,9 +291,11 @@ def _load_model_locked(model_id: str): else: # Standard Text Model (GPT-OSS) print(f"Loading {model_id} with AutoModelForCausalLM...") + # GPT-OSS-20B uses native MXFP4 quantization and needs "auto" dtype + use_dtype = "auto" if "gpt-oss-20b" in model_id else dtype model = AutoModelForCausalLM.from_pretrained( model_id, - torch_dtype=dtype, + torch_dtype=use_dtype, device_map=device_map, attn_implementation=attn_impl, trust_remote_code=True,