Skip to content

Instantly share code, notes, and snippets.

@MrCroxx
Last active March 22, 2024 03:13
Show Gist options
  • Select an option

  • Save MrCroxx/767ab7bd3022b6991dd502d39d9ba5dc to your computer and use it in GitHub Desktop.

Select an option

Save MrCroxx/767ab7bd3022b6991dd502d39d9ba5dc to your computer and use it in GitHub Desktop.
// baseline: 4.383167ms
// alloc_f16_buf: 29.965166ms
// alloc_f16_buf_fix: 4.217625ms
// set_len: 6.583µs
use half::f16;
use std::hint::black_box;
use std::time::Instant;
// it's slow to initialize a vec![f16::ZERO; buf_size], nearly 80~200ms on preparing kv cache.
// we can initialize a vec![0 as u16; buf_size] and reinterpret it into Vec<f16> to make it
// faster, please note that the zerod f16 is not f16::ZERO, but f16(0x0000), do not read the
// uninitialized data in this buf.
// the code is modified from half's reinterpret_into function.
pub fn alloc_f16_buf(len: usize) -> Vec<f16> {
let mut vec_u16 = vec![0; len];
let length = vec_u16.len();
let capacity = vec_u16.capacity();
let pointer = vec_u16.as_mut_ptr() as *mut f16;
// Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted
std::mem::forget(vec_u16);
// Finally construct a new Vec<f16> from the raw pointer
// SAFETY: We are reconstructing full length and capacity of original vector,
// using its original pointer, and the size of elements are identical.
unsafe { Vec::from_raw_parts(pointer, length, capacity) }
}
pub fn alloc_f16_buf_fix(len: usize) -> Vec<f16> {
let mut vec_u16: Vec<u16> = vec![0; len];
let length = vec_u16.len();
let capacity = vec_u16.capacity();
let pointer = vec_u16.as_mut_ptr() as *mut f16;
// Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted
std::mem::forget(vec_u16);
// Finally construct a new Vec<f16> from the raw pointer
// SAFETY: We are reconstructing full length and capacity of original vector,
// using its original pointer, and the size of elements are identical.
unsafe { Vec::from_raw_parts(pointer, length, capacity) }
}
pub fn set_len(len: usize) -> Vec<f16> {
let mut v = Vec::<f16>::with_capacity(len);
unsafe { v.set_len(len) };
v
}
pub fn baseline(len: usize) -> Vec<f16> {
vec![f16::ZERO; len]
}
const LEN: usize = 2048 * 3200;
const LOOP: usize = 25;
fn main() {
let now = Instant::now();
for _ in 0..LOOP {
black_box(baseline(LEN));
}
println!("baseline: {:?}", now.elapsed());
let now = Instant::now();
for _ in 0..LOOP {
black_box(alloc_f16_buf(LEN));
}
println!("alloc_f16_buf: {:?}", now.elapsed());
let now = Instant::now();
for _ in 0..LOOP {
black_box(alloc_f16_buf_fix(LEN));
}
println!("alloc_f16_buf_fix: {:?}", now.elapsed());
let now = Instant::now();
for _ in 0..LOOP {
black_box(set_len(LEN));
}
println!("set_len: {:?}", now.elapsed());
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment