# Configure NF4 quantization quant_config = PipelineQuantizationConfig( quant_backend="bitsandbytes_4bit", quant_kwargs={"load_in_4bit": True, "bnb_4bit_quant_type": "nf4", "bnb_4bit_compute_dtype": torch.bfloat16}, components_to_quantize=["transformer", "text_encoder"], ) # Load the pipeline with NF4 quantization pipe = DiffusionPipeline.from_pretrained( model_name, quantization_config=quant_config, torch_dtype=torch.bfloat16, use_safetensors=True, low_cpu_mem_usage=True ).to(device)
update: doesn't work well. this approach seems to be recommended: https://github.com/QwenLM/Qwen-Image/pull/6/files