You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
import logging
import os
import json
import torch
from datasets import load_from_disk
from transformers import TrainingArguments
from trl import SFTTrainer
from unsloth import FastLanguageModel
Defining the configuration for the base model, LoRA and training
config = {
"hugging_face_username":"Shekswess",
"model_config": {
"base_model":os.path.join(DATA_HOME, "model_root/model_en"), # The base model
"finetuned_model":os.path.join(DATA_HOME, "model_root/model_en/adapters/model_gen"), # The fine-tuned model
"max_seq_length": MAX_SEQ_LENGTH, # The maximum sequence length
"dtype":torch.float16, # The data type
"load_in_4bit": True, # Load the model in 4-bit
},
"lora_config": {
"r": 16, # The number of LoRA layers 8, 16, 32, 64
"target_modules": ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"], # The target modules
"lora_alpha":16, # The alpha value for LoRA
"lora_dropout":0, # The dropout value for LoRA
"bias":"none", # The bias for LoRA
"use_gradient_checkpointing":True, # Use gradient checkpointing
"use_rslora":False, # Use RSLora
"use_dora":False, # Use DoRa
"loftq_config":None # The LoFTQ configuration
},
"training_dataset":{
"name":os.path.join(DATA_HOME, "dataset_gen"), # The dataset name(huggingface/datasets)
"split":"train", # The dataset split
"input_field":"prompt", # The input field
},
"training_config": {
"per_device_train_batch_size": 1, # The batch size
"gradient_accumulation_steps": 1, # The gradient accumulation steps
"warmup_steps": 5, # The warmup steps
"max_steps":0, # The maximum steps (0 if the epochs are defined)
"num_train_epochs": 1, # The number of training epochs(0 if the maximum steps are defined)
"learning_rate": 2e-4, # The learning rate
"fp16": not torch.cuda.is_bf16_supported(), # The fp16
"bf16": torch.cuda.is_bf16_supported(), # The bf16
"logging_steps": 1, # The logging steps
"optim" :"adamw_8bit", # The optimizer
"weight_decay" : 0.01, # The weight decay
"lr_scheduler_type": "linear", # The learning rate scheduler
"seed" : 42, # The seed
"output_dir" : "outputs", # The output directory
}
}
import logging
import os
import json
import torch
from datasets import load_from_disk
from transformers import TrainingArguments
from trl import SFTTrainer
from unsloth import FastLanguageModel
logger = logging.getLogger(name)
logger.setLevel(logging.DEBUG)
DATA_HOME = "/home/sidney/app"
MAX_SEQ_LENGTH = 5000
Defining the configuration for the base model, LoRA and training
config = {
"hugging_face_username":"Shekswess",
"model_config": {
"base_model":os.path.join(DATA_HOME, "model_root/model_en"), # The base model
"finetuned_model":os.path.join(DATA_HOME, "model_root/model_en/adapters/model_gen"), # The fine-tuned model
"max_seq_length": MAX_SEQ_LENGTH, # The maximum sequence length
"dtype":torch.float16, # The data type
"load_in_4bit": True, # Load the model in 4-bit
},
"lora_config": {
"r": 16, # The number of LoRA layers 8, 16, 32, 64
"target_modules": ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"], # The target modules
"lora_alpha":16, # The alpha value for LoRA
"lora_dropout":0, # The dropout value for LoRA
"bias":"none", # The bias for LoRA
"use_gradient_checkpointing":True, # Use gradient checkpointing
"use_rslora":False, # Use RSLora
"use_dora":False, # Use DoRa
"loftq_config":None # The LoFTQ configuration
},
"training_dataset":{
"name":os.path.join(DATA_HOME, "dataset_gen"), # The dataset name(huggingface/datasets)
"split":"train", # The dataset split
"input_field":"prompt", # The input field
},
"training_config": {
"per_device_train_batch_size": 1, # The batch size
"gradient_accumulation_steps": 1, # The gradient accumulation steps
"warmup_steps": 5, # The warmup steps
"max_steps":0, # The maximum steps (0 if the epochs are defined)
"num_train_epochs": 1, # The number of training epochs(0 if the maximum steps are defined)
"learning_rate": 2e-4, # The learning rate
"fp16": not torch.cuda.is_bf16_supported(), # The fp16
"bf16": torch.cuda.is_bf16_supported(), # The bf16
"logging_steps": 1, # The logging steps
"optim" :"adamw_8bit", # The optimizer
"weight_decay" : 0.01, # The weight decay
"lr_scheduler_type": "linear", # The learning rate scheduler
"seed" : 42, # The seed
"output_dir" : "outputs", # The output directory
}
}
Loading the model and the tokinizer for the model
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = config.get("model_config").get("base_model"),
max_seq_length = config.get("model_config").get("max_seq_length"),
dtype = config.get("model_config").get("dtype"),
load_in_4bit = config.get("model_config").get("load_in_4bit"),
)
Setup for QLoRA/LoRA peft of the base model
model = FastLanguageModel.get_peft_model(
model,
r = config.get("lora_config").get("r"),
target_modules = config.get("lora_config").get("target_modules"),
lora_alpha = config.get("lora_config").get("lora_alpha"),
lora_dropout = config.get("lora_config").get("lora_dropout"),
bias = config.get("lora_config").get("bias"),
use_gradient_checkpointing = config.get("lora_config").get("use_gradient_checkpointing"),
random_state = 42,
use_rslora = config.get("lora_config").get("use_rslora"),
use_dora = config.get("lora_config").get("use_dora"),
loftq_config = config.get("lora_config").get("loftq_config"),
)
Loading the training dataset
dataset_train = load_from_disk(config.get("training_dataset").get("name"))['train']
Setting up the trainer for the model
trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = dataset_train,
dataset_text_field = config.get("training_dataset").get("input_field"),
max_seq_length = config.get("model_config").get("max_seq_length"),
dataset_num_proc = 1,
packing = False,
args = TrainingArguments(
per_device_train_batch_size = config.get("training_config").get("per_device_train_batch_size"),
gradient_accumulation_steps = config.get("training_config").get("gradient_accumulation_steps"),
warmup_steps = config.get("training_config").get("warmup_steps"),
max_steps = config.get("training_config").get("max_steps"),
num_train_epochs= config.get("training_config").get("num_train_epochs"),
learning_rate = config.get("training_config").get("learning_rate"),
fp16 = config.get("training_config").get("fp16"),
bf16 = config.get("training_config").get("bf16"),
logging_steps = config.get("training_config").get("logging_steps"),
optim = config.get("training_config").get("optim"),
weight_decay = config.get("training_config").get("weight_decay"),
lr_scheduler_type = config.get("training_config").get("lr_scheduler_type"),
seed = 42,
output_dir = config.get("training_config").get("output_dir"),
),
)
Training the model
trainer_stats = trainer.train()
Saving the trainer stats
with open(os.path.join(DATA_HOME, "outputs/trainer_stats_gen.json"), "w") as f:
json.dump(trainer_stats, f, indent=4)
Locally saving the model and pushing it to the Hugging Face Hub (only LoRA adapters)
model.save_pretrained(config.get("model_config").get("finetuned_model"))
Can amend this code to use cpu?
The text was updated successfully, but these errors were encountered: