Fine tune and infer llama3 with cpu #1037

SidneyLann · 2024-09-18T21:22:34Z

import logging
import os
import json
import torch
from datasets import load_from_disk
from transformers import TrainingArguments
from trl import SFTTrainer
from unsloth import FastLanguageModel

logger = logging.getLogger(name)
logger.setLevel(logging.DEBUG)

DATA_HOME = "/home/sidney/app"
MAX_SEQ_LENGTH = 5000

Defining the configuration for the base model, LoRA and training

config = {
"hugging_face_username":"Shekswess",
"model_config": {
"base_model":os.path.join(DATA_HOME, "model_root/model_en"), # The base model
"finetuned_model":os.path.join(DATA_HOME, "model_root/model_en/adapters/model_gen"), # The fine-tuned model
"max_seq_length": MAX_SEQ_LENGTH, # The maximum sequence length
"dtype":torch.float16, # The data type
"load_in_4bit": True, # Load the model in 4-bit
},
"lora_config": {
"r": 16, # The number of LoRA layers 8, 16, 32, 64
"target_modules": ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"], # The target modules
"lora_alpha":16, # The alpha value for LoRA
"lora_dropout":0, # The dropout value for LoRA
"bias":"none", # The bias for LoRA
"use_gradient_checkpointing":True, # Use gradient checkpointing
"use_rslora":False, # Use RSLora
"use_dora":False, # Use DoRa
"loftq_config":None # The LoFTQ configuration
},
"training_dataset":{
"name":os.path.join(DATA_HOME, "dataset_gen"), # The dataset name(huggingface/datasets)
"split":"train", # The dataset split
"input_field":"prompt", # The input field
},
"training_config": {
"per_device_train_batch_size": 1, # The batch size
"gradient_accumulation_steps": 1, # The gradient accumulation steps
"warmup_steps": 5, # The warmup steps
"max_steps":0, # The maximum steps (0 if the epochs are defined)
"num_train_epochs": 1, # The number of training epochs(0 if the maximum steps are defined)
"learning_rate": 2e-4, # The learning rate
"fp16": not torch.cuda.is_bf16_supported(), # The fp16
"bf16": torch.cuda.is_bf16_supported(), # The bf16
"logging_steps": 1, # The logging steps
"optim" :"adamw_8bit", # The optimizer
"weight_decay" : 0.01, # The weight decay
"lr_scheduler_type": "linear", # The learning rate scheduler
"seed" : 42, # The seed
"output_dir" : "outputs", # The output directory
}
}

Loading the model and the tokinizer for the model

model, tokenizer = FastLanguageModel.from_pretrained(
model_name = config.get("model_config").get("base_model"),
max_seq_length = config.get("model_config").get("max_seq_length"),
dtype = config.get("model_config").get("dtype"),
load_in_4bit = config.get("model_config").get("load_in_4bit"),
)

Setup for QLoRA/LoRA peft of the base model

model = FastLanguageModel.get_peft_model(
model,
r = config.get("lora_config").get("r"),
target_modules = config.get("lora_config").get("target_modules"),
lora_alpha = config.get("lora_config").get("lora_alpha"),
lora_dropout = config.get("lora_config").get("lora_dropout"),
bias = config.get("lora_config").get("bias"),
use_gradient_checkpointing = config.get("lora_config").get("use_gradient_checkpointing"),
random_state = 42,
use_rslora = config.get("lora_config").get("use_rslora"),
use_dora = config.get("lora_config").get("use_dora"),
loftq_config = config.get("lora_config").get("loftq_config"),
)

Loading the training dataset

dataset_train = load_from_disk(config.get("training_dataset").get("name"))['train']

Setting up the trainer for the model

trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = dataset_train,
dataset_text_field = config.get("training_dataset").get("input_field"),
max_seq_length = config.get("model_config").get("max_seq_length"),
dataset_num_proc = 1,
packing = False,
args = TrainingArguments(
per_device_train_batch_size = config.get("training_config").get("per_device_train_batch_size"),
gradient_accumulation_steps = config.get("training_config").get("gradient_accumulation_steps"),
warmup_steps = config.get("training_config").get("warmup_steps"),
max_steps = config.get("training_config").get("max_steps"),
num_train_epochs= config.get("training_config").get("num_train_epochs"),
learning_rate = config.get("training_config").get("learning_rate"),
fp16 = config.get("training_config").get("fp16"),
bf16 = config.get("training_config").get("bf16"),
logging_steps = config.get("training_config").get("logging_steps"),
optim = config.get("training_config").get("optim"),
weight_decay = config.get("training_config").get("weight_decay"),
lr_scheduler_type = config.get("training_config").get("lr_scheduler_type"),
seed = 42,
output_dir = config.get("training_config").get("output_dir"),
),
)

Training the model

trainer_stats = trainer.train()

Saving the trainer stats

with open(os.path.join(DATA_HOME, "outputs/trainer_stats_gen.json"), "w") as f:
json.dump(trainer_stats, f, indent=4)

Locally saving the model and pushing it to the Hugging Face Hub (only LoRA adapters)

model.save_pretrained(config.get("model_config").get("finetuned_model"))

Can amend this code to use cpu?

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fine tune and infer llama3 with cpu #1037

Fine tune and infer llama3 with cpu #1037

SidneyLann commented Sep 18, 2024

Fine tune and infer llama3 with cpu #1037

Fine tune and infer llama3 with cpu #1037

Comments

SidneyLann commented Sep 18, 2024

Defining the configuration for the base model, LoRA and training

Loading the model and the tokinizer for the model

Setup for QLoRA/LoRA peft of the base model

Loading the training dataset

Setting up the trainer for the model

Training the model

Saving the trainer stats

Locally saving the model and pushing it to the Hugging Face Hub (only LoRA adapters)