Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fine tune and infer llama3 with cpu #1037

Open
SidneyLann opened this issue Sep 18, 2024 · 0 comments
Open

Fine tune and infer llama3 with cpu #1037

SidneyLann opened this issue Sep 18, 2024 · 0 comments

Comments

@SidneyLann
Copy link

import logging
import os
import json
import torch
from datasets import load_from_disk
from transformers import TrainingArguments
from trl import SFTTrainer
from unsloth import FastLanguageModel

logger = logging.getLogger(name)
logger.setLevel(logging.DEBUG)

DATA_HOME = "/home/sidney/app"
MAX_SEQ_LENGTH = 5000

Defining the configuration for the base model, LoRA and training

config = {
"hugging_face_username":"Shekswess",
"model_config": {
"base_model":os.path.join(DATA_HOME, "model_root/model_en"), # The base model
"finetuned_model":os.path.join(DATA_HOME, "model_root/model_en/adapters/model_gen"), # The fine-tuned model
"max_seq_length": MAX_SEQ_LENGTH, # The maximum sequence length
"dtype":torch.float16, # The data type
"load_in_4bit": True, # Load the model in 4-bit
},
"lora_config": {
"r": 16, # The number of LoRA layers 8, 16, 32, 64
"target_modules": ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"], # The target modules
"lora_alpha":16, # The alpha value for LoRA
"lora_dropout":0, # The dropout value for LoRA
"bias":"none", # The bias for LoRA
"use_gradient_checkpointing":True, # Use gradient checkpointing
"use_rslora":False, # Use RSLora
"use_dora":False, # Use DoRa
"loftq_config":None # The LoFTQ configuration
},
"training_dataset":{
"name":os.path.join(DATA_HOME, "dataset_gen"), # The dataset name(huggingface/datasets)
"split":"train", # The dataset split
"input_field":"prompt", # The input field
},
"training_config": {
"per_device_train_batch_size": 1, # The batch size
"gradient_accumulation_steps": 1, # The gradient accumulation steps
"warmup_steps": 5, # The warmup steps
"max_steps":0, # The maximum steps (0 if the epochs are defined)
"num_train_epochs": 1, # The number of training epochs(0 if the maximum steps are defined)
"learning_rate": 2e-4, # The learning rate
"fp16": not torch.cuda.is_bf16_supported(), # The fp16
"bf16": torch.cuda.is_bf16_supported(), # The bf16
"logging_steps": 1, # The logging steps
"optim" :"adamw_8bit", # The optimizer
"weight_decay" : 0.01, # The weight decay
"lr_scheduler_type": "linear", # The learning rate scheduler
"seed" : 42, # The seed
"output_dir" : "outputs", # The output directory
}
}

Loading the model and the tokinizer for the model

model, tokenizer = FastLanguageModel.from_pretrained(
model_name = config.get("model_config").get("base_model"),
max_seq_length = config.get("model_config").get("max_seq_length"),
dtype = config.get("model_config").get("dtype"),
load_in_4bit = config.get("model_config").get("load_in_4bit"),
)

Setup for QLoRA/LoRA peft of the base model

model = FastLanguageModel.get_peft_model(
model,
r = config.get("lora_config").get("r"),
target_modules = config.get("lora_config").get("target_modules"),
lora_alpha = config.get("lora_config").get("lora_alpha"),
lora_dropout = config.get("lora_config").get("lora_dropout"),
bias = config.get("lora_config").get("bias"),
use_gradient_checkpointing = config.get("lora_config").get("use_gradient_checkpointing"),
random_state = 42,
use_rslora = config.get("lora_config").get("use_rslora"),
use_dora = config.get("lora_config").get("use_dora"),
loftq_config = config.get("lora_config").get("loftq_config"),
)

Loading the training dataset

dataset_train = load_from_disk(config.get("training_dataset").get("name"))['train']

Setting up the trainer for the model

trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = dataset_train,
dataset_text_field = config.get("training_dataset").get("input_field"),
max_seq_length = config.get("model_config").get("max_seq_length"),
dataset_num_proc = 1,
packing = False,
args = TrainingArguments(
per_device_train_batch_size = config.get("training_config").get("per_device_train_batch_size"),
gradient_accumulation_steps = config.get("training_config").get("gradient_accumulation_steps"),
warmup_steps = config.get("training_config").get("warmup_steps"),
max_steps = config.get("training_config").get("max_steps"),
num_train_epochs= config.get("training_config").get("num_train_epochs"),
learning_rate = config.get("training_config").get("learning_rate"),
fp16 = config.get("training_config").get("fp16"),
bf16 = config.get("training_config").get("bf16"),
logging_steps = config.get("training_config").get("logging_steps"),
optim = config.get("training_config").get("optim"),
weight_decay = config.get("training_config").get("weight_decay"),
lr_scheduler_type = config.get("training_config").get("lr_scheduler_type"),
seed = 42,
output_dir = config.get("training_config").get("output_dir"),
),
)

Training the model

trainer_stats = trainer.train()

Saving the trainer stats

with open(os.path.join(DATA_HOME, "outputs/trainer_stats_gen.json"), "w") as f:
json.dump(trainer_stats, f, indent=4)

Locally saving the model and pushing it to the Hugging Face Hub (only LoRA adapters)

model.save_pretrained(config.get("model_config").get("finetuned_model"))

Can amend this code to use cpu?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant