Was this helpful?
from unsloth import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/Qwen3-4B-Instruct-2507",
max_seq_length = 2048,
load_in_16bit = True,
)
model = FastLanguageModel.get_peft_model(
model,
r = 16,
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_alpha = 32,
# We support fp8-int4, fp8-fp8, int8-int4, int4
qat_scheme = "int4",
)from torchao.quantization import quantize_
from torchao.quantization.qat import QATConfig
quantize_(model, QATConfig(step = "convert"))# Use the exact same config as QAT (convenient function)
model.save_pretrained_torchao(
model, "tokenizer",
torchao_config = model._torchao_config.base_config,
)
# Int4 QAT
from torchao.quantization import Int4WeightOnlyConfig
model.save_pretrained_torchao(
model, "tokenizer",
torchao_config = Int4WeightOnlyConfig(),
)
# Int8 QAT
from torchao.quantization import Int8DynamicActivationInt8WeightConfig
model.save_pretrained_torchao(
model, "tokenizer",
torchao_config = Int8DynamicActivationInt8WeightConfig(),
)# Float8
from torchao.quantization import PerRow
from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
torchao_config = Float8DynamicActivationFloat8WeightConfig(granularity = PerRow())
model.save_pretrained_torchao(torchao_config = torchao_config)pip install --upgrade --no-cache-dir --force-reinstall unsloth unsloth_zoo
pip install torchao==0.14.0 fbgemm-gpu-genai==1.3.0






