from transformers import BitsAndBytesConfig
quantization_config = BitsAndBytesConfig(
load_in_4bit = True,
bnb_4bit_quant_type = "nf4",
bnb_4bit_use_double_quant = True, # 启用嵌套量化,在第一轮量化之后会进行第二轮量化,为每个参数额外节省 0.4 比特
bnb_4bit_compute_dtype = torch.bfloat16, # 更改量化模型的计算数据类型来加速训练
)
Settings.llm = HuggingFaceLLM(
context_window = 4096,
max_new_tokens = 2048,
generate_kwargs = {"temperature": 0.0, "do_sample": False},
query_wrapper_prompt = query_wrapper_prompt,
tokenizer_name = "/root/autodl-tmp/llamaindex/tcm-ai-rag/Qwen/Qwen1.5-7B-Chat",
model_name = "/root/autodl-tmp/llamaindex/tcm-ai-rag/Qwen/Qwen1.5-7B-Chat",
device_map = "auto", #"auto","balanced","balanced_low_0","sequential"
model_kwargs = {
"trust_remote_code":True,
"quantization_config": quantization_config
}
执行出现如下错误,无法通过它的提示安装解决:---------------------------------------------------------------------------ImportError Traceback (most recent call last)
Cell In[24], line 10 1 from transformers import BitsAndBytesConfig 3 quantization_config = BitsAndBytesConfig( 4 load_in_4bit = True, 5 bnb_4bit_quant_type = "nf4", 6 bnb_4bit_use_double_quant = True, # 启用嵌套量化,在第一轮量化之后会进行第二轮量化,为每个参数额外节省 0.4 比特 7 bnb_4bit_compute_dtype = torch.bfloat16, # 更改量化模型的计算数据类型来加速训练 8 )---> 10 Settings.llm = HuggingFaceLLM( 11 context_window = 4096, 12 max_new_tokens = 2048, 13 generate_kwargs = {"temperature": 0.0, "do_sample": False}, 14 query_wrapper_prompt = query_wrapper_prompt, 15 tokenizer_name = "/root/autodl-tmp/llamaindex/tcm-ai-rag/Qwen/Qwen1.5-7B-Chat", 16 model_name = "/root/autodl-tmp/llamaindex/tcm-ai-rag/Qwen/Qwen1.5-7B-Chat", 17 device_map = "auto", #"auto","balanced","balanced_low_0","sequential" 18 model_kwargs = { 19 "trust_remote_code":True, 20 "quantization_config": quantization_config 21 } 22 )
File ~/miniconda3/envs/llamaindex/lib/python3.10/site-packages/llama_index/llms/huggingface/base.py:210, in HuggingFaceLLM.__init__(self, context_window, max_new_tokens, query_wrapper_prompt, tokenizer_name, model_name, model, tokenizer, device_map, stopping_ids, tokenizer_kwargs, tokenizer_outputs_to_remove, model_kwargs, generate_kwargs, is_chat_model, callback_manager, system_prompt, messages_to_prompt, completion_to_prompt, pydantic_program_mode, output_parser)
...
81 "The bitsandbytes library requires PyTorch but it was not found in your environment. " 82 "You can install it with `pip install torch`." 83 )ImportError: Using `bitsandbytes` 4-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`