/* * Copyright (C) 2002-2021 the Network-Based Computing Laboratory * (NBCL), The Ohio State University. * * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) * * For detailed copyright and licensing information, please refer to the * copyright file COPYRIGHT in the top level OMB directory. */ __global__ void compute_kernel(float a, float * x, float * y, int N) { int i = blockIdx.x * blockDim.x + threadIdx.x; int count = 0; if (i < N) { for(count=0; count < (N/8); count++) { y[i] = a * x[i] + y[i]; } } } __global__ void touch_managed_kernel(char *buf, size_t len) { int i; i = blockIdx.x * blockDim.x + threadIdx.x; if (i < len) { buf[i] = buf[i]+1; } } __global__ void empty_kernel(char *buf, size_t len) { } extern "C" void call_kernel(float a, float * d_x, float * d_y, int N, cudaStream_t * stream) { compute_kernel<<<(N+255)/256, 256, 0, *stream>>>(a, d_x, d_y, N); } extern "C" void call_touch_managed_kernel(char *buf, size_t length, cudaStream_t * stream) { touch_managed_kernel<<<(length+255)/256, 256, 0, *stream>>>(buf, length); } extern "C" void call_empty_kernel(char *buf, size_t length, cudaStream_t * stream) { empty_kernel<<<(length+255)/256, 256, 0, *stream>>>(buf, length); }