大语言模型-alpaca-lora
-
微调
大语言模型-ChatGLM-Tuning
大语言模型-微调chatglm6b
大语言模型-中文chatGLM-LLAMA微调
大语言模型-alpaca-lora -
本地知识库
大语言模型2-document ai解读
大语言模型-DocumentSearch解读
大语言模型-中文Langchain
本文读的代码为
https://github.com/tloen/alpaca-lora
fine tune
其中fire的用法是: 对Fire进行一次调用即可将任何Python组件转变为命令行界面
- https://github.com/google/python-fire
prompt
class Prompter(object):__slots__ = ("template", "_verbose")def __init__(self, template_name: str = "", verbose: bool = False):self._verbose = verboseif not template_name:# Enforce the default here, so the constructor can be called with '' and will not break.template_name = "alpaca"file_name = osp.join("templates", f"{template_name}.json")if not osp.exists(file_name):raise ValueError(f"Can't read {file_name}")with open(file_name) as fp:self.template = json.load(fp)if self._verbose:print(f"Using prompt template {template_name}: {self.template['description']}")def generate_prompt(self,instruction: str,input: Union[None, str] = None,label: Union[None, str] = None,) -> str:# returns the full prompt from instruction and optional input# if a label (=response, =output) is provided, it's also appended.if input:res = self.template["prompt_input"].format(instruction=instruction, input=input)else:res = self.template["prompt_no_input"].format(instruction=instruction)if label:res = f"{res}{label}"if self._verbose:print(res)return resdef get_response(self, output: str) -> str:return output.split(self.template["response_split"])[1].strip()
callback
class Stream(transformers.StoppingCriteria):def __init__(self, callback_func=None):self.callback_func = callback_funcdef __call__(self, input_ids, scores) -> bool:if self.callback_func is not None:self.callback_func(input_ids[0])return Falseclass Iteratorize:"""Transforms a function that takes a callbackinto a lazy iterator (generator)."""def __init__(self, func, kwargs={}, callback=None):self.mfunc = funcself.c_callback = callbackself.q = Queue()self.sentinel = object()self.kwargs = kwargsself.stop_now = Falsedef _callback(val):if self.stop_now:raise ValueErrorself.q.put(val)def gentask():try:ret = self.mfunc(callback=_callback, self.kwargs)except ValueError:passexcept:traceback.print_exc()passself.q.put(self.sentinel)if self.c_callback:self.c_callback(ret)self.thread = Thread(target=gentask)self.thread.start()def __iter__(self):return selfdef __next__(self):obj = self.q.get(True, None)if obj is self.sentinel:raise StopIterationelse:return objdef __enter__(self):return selfdef __exit__(self, exc_type, exc_val, exc_tb):self.stop_now = True
模型相关
model = LlamaForCausalLM.from_pretrained(base_model,load_in_8bit=True,torch_dtype=torch.float16,device_map=device_map,)
model = prepare_model_for_int8_training(model)config = LoraConfig(r=lora_r,lora_alpha=lora_alpha,target_modules=lora_target_modules,lora_dropout=lora_dropout,bias="none",task_type="CAUSAL_LM",)model = get_peft_model(model, config)
tokenizer
tokenizer = LlamaTokenizer.from_pretrained(base_model)tokenizer.pad_token_id = (0 # unk. we want this to be different from the eos token)tokenizer.padding_side = "left" # Allow batched inferencedef tokenize(prompt, add_eos_token=True):# there's probably a way to do this with the tokenizer settings# but again, gotta move fastresult = tokenizer(prompt,truncation=True,max_length=cutoff_len,padding=False,return_tensors=None,)if (result["input_ids"][-1] != tokenizer.eos_token_idand len(result["input_ids"]) < cutoff_lenand add_eos_token):result["input_ids"].append(tokenizer.eos_token_id)result["attention_mask"].append(1)result["labels"] = result["input_ids"].copy()return resultdef generate_and_tokenize_prompt(data_point):full_prompt = prompter.generate_prompt(data_point["instruction"],data_point["input"],data_point["output"],)tokenized_full_prompt = tokenize(full_prompt)if not train_on_inputs:user_prompt = prompter.generate_prompt(data_point["instruction"], data_point["input"])tokenized_user_prompt = tokenize(user_prompt, add_eos_token=add_eos_token)user_prompt_len = len(tokenized_user_prompt["input_ids"])if add_eos_token:user_prompt_len -= 1tokenized_full_prompt["labels"] = [-100] * user_prompt_len + tokenized_full_prompt["labels"][user_prompt_len:] # could be sped up, probablyreturn tokenized_full_prompt
训练
if val_set_size > 0:train_val = data["train"].train_test_split(test_size=val_set_size, shuffle=True, seed=42)train_data = (train_val["train"].shuffle().map(generate_and_tokenize_prompt))val_data = (train_val["test"].shuffle().map(generate_and_tokenize_prompt))else:train_data = data["train"].shuffle().map(generate_and_tokenize_prompt)val_data = Noneif not ddp and torch.cuda.device_count() > 1:# keeps Trainer from trying its own DataParallelism when more than 1 gpu is availablemodel.is_parallelizable = Truemodel.model_parallel = Truetrainer = transformers.Trainer(model=model,train_dataset=train_data,eval_dataset=val_data,args=transformers.TrainingArguments(per_device_train_batch_size=micro_batch_size,gradient_accumulation_steps=gradient_accumulation_steps,warmup_steps=100,num_train_epochs=num_epochs,learning_rate=learning_rate,fp16=True,logging_steps=10,optim="adamw_torch",evaluation_strategy="steps" if val_set_size > 0 else "no",save_strategy="steps",eval_steps=200 if val_set_size > 0 else None,save_steps=200,output_dir=output_dir,save_total_limit=3,load_best_model_at_end=True if val_set_size > 0 else False,ddp_find_unused_parameters=False if ddp else None,group_by_length=group_by_length,report_to="wandb" if use_wandb else None,run_name=wandb_run_name if use_wandb else None,),data_collator=transformers.DataCollatorForSeq2Seq(tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True),)model.config.use_cache = Falseold_state_dict = model.state_dictmodel.state_dict = (lambda self, *_, __: get_peft_model_state_dict(self, old_state_dict())).__get__(model, type(model))if torch.__version__ >= "2" and sys.platform != "win32":model = torch.compile(model)trainer.train(resume_from_checkpoint=resume_from_checkpoint)model.save_pretrained(output_dir)print("\\n If there's a warning about missing keys above, please disregard :)")
推理generate
- import gradio as gr作为webui