ROCm / HIP
AMD's open compute stack. HIP is a thin C++ runtime that mirrors the CUDA API almost one-to-one, so CUDA code ports with hipify and the same source compiles for AMD or NVIDIA. PyTorch and TensorFlow ship ROCm builds.
AMDC/C++ · PythonAMDopen sourceCUDA-portable
Install
# Ubuntu — install ROCm + HIP
wget https://repo.radeon.com/amdgpu-install/latest/ubuntu/jammy/amdgpu-install_6.2.60200-1_all.deb
sudo apt-get install -y ./amdgpu-install_*.deb
sudo amdgpu-install --usecase=rocm,hip
hipcc --version # verify
Hello, GPU
vecadd.cpp — the same kernel in HIP
#include <hip/hip_runtime.h>
#include <cstdio>
__global__ void add(const float* a, const float* b, float* c, int n) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n) c[i] = a[i] + b[i];
}
int main() {
int n = 1 << 20; size_t bytes = n * sizeof(float);
float *a, *b, *c;
hipMallocManaged(&a, bytes);
hipMallocManaged(&b, bytes);
hipMallocManaged(&c, bytes);
for (int i = 0; i < n; i++) { a[i] = 1.0f; b[i] = 2.0f; }
int threads = 256, blocks = (n + threads - 1) / threads;
add<<<blocks, threads>>>(a, b, c, n);
hipDeviceSynchronize();
printf("c[0] = %f\n", c[0]); // 3.0
hipFree(a); hipFree(b); hipFree(c);
}
Run it:
hipcc vecadd.cpp -o vecadd && ./vecadd