Skip to content

add llama 3 support to llm.c #332

add llama 3 support to llm.c

add llama 3 support to llm.c #332

Workflow file for this run

name: Unit, Static and other Tests
on:
create:
workflow_dispatch:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
dataloader_test:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: test the dataloader without / with sanitize address
run: |
cd dev/test
make PRECISION=BF16 test_dataloader
./test_dataloader
make clean
make PRECISION=BF16 TEST_CFLAGS="-fsanitize=address -fno-omit-frame-pointer" test_dataloader
./test_dataloader
ptx_and_sass_files:
runs-on: ubuntu-latest
container:
image: nvidia/cuda:12.4.1-devel-ubuntu22.04
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install OpenMP and OpenMPI
run: apt-get update && apt-get install -y libomp-dev libopenmpi-dev
- name: Generate ptx/sass files and upload them to persistent storage
run: |
mkdir -p dev/cuda/ptx_sass_logs
make train_gpt2cu
cuobjdump --dump-ptx train_gpt2cu > dev/cuda/train_gpt2cu.ptx
cuobjdump --dump-sass train_gpt2cu > dev/cuda/train_gpt2cu.sass
cd dev/cuda
make -j all_ptx
make -j all_sass
cp *.ptx ptx_sass_logs/
cp *.sass ptx_sass_logs/
ls ptx_sass_logs/
- name: Generate ptx/sass files for A100 and upload them to persistent storage
run: |
mkdir -p dev/cuda/ptx_sass_logs_A100
make train_gpt2cu GPU_COMPUTE_CAPABILITY=80
cuobjdump --dump-ptx train_gpt2cu > dev/cuda/train_gpt2cu.ptx
cuobjdump --dump-sass train_gpt2cu > dev/cuda/train_gpt2cu.sass
cd dev/cuda
make -j GPU_COMPUTE_CAPABILITY=80 all_ptx
make -j GPU_COMPUTE_CAPABILITY=80 all_sass
cp *.ptx ptx_sass_logs_A100/
cp *.sass ptx_sass_logs_A100/
ls ptx_sass_logs_A100/
- name: Generate ptx/sass files for H100 and upload them to persistent storage
run: |
mkdir -p dev/cuda/ptx_sass_logs_H100
make train_gpt2cu GPU_COMPUTE_CAPABILITY=90
cuobjdump --dump-ptx train_gpt2cu > dev/cuda/train_gpt2cu.ptx
cuobjdump --dump-sass train_gpt2cu > dev/cuda/train_gpt2cu.sass
cd dev/cuda
make -j GPU_COMPUTE_CAPABILITY=90 all_ptx
make -j GPU_COMPUTE_CAPABILITY=90 all_sass
cp *.ptx ptx_sass_logs_H100/
cp *.sass ptx_sass_logs_H100/
ls ptx_sass_logs_H100/
- name: Upload ptx/sass files
uses: actions/upload-artifact@v4
with:
name: ptx_sass_files
path: dev/cuda/ptx_sass_logs/
retention-days: 30 # days to retain
- name: Upload ptx/sass files for A100
uses: actions/upload-artifact@v4
with:
name: ptx_sass_files_A100
path: dev/cuda/ptx_sass_logs_A100/
retention-days: 30 # days to retain
- name: Upload ptx/sass files for H100
uses: actions/upload-artifact@v4
with:
name: ptx_sass_files_H100
path: dev/cuda/ptx_sass_logs_H100/
retention-days: 30 # days to retain