YAKL
YAKL_fence.h
Go to the documentation of this file.
1 
7 #pragma once
8 // Included by YAKL.h
9 
11 namespace yakl {
12 
16  inline void fence() {
17  #if defined(YAKL_ARCH_CUDA)
18  cudaDeviceSynchronize();
19  #elif defined(YAKL_ARCH_HIP)
20  hipDeviceSynchronize();
21  #elif defined(YAKL_ARCH_SYCL)
22  sycl_default_stream().wait_and_throw();
23  #elif defined(YAKL_ARCH_OPENMP)
24  #pragma omp barrier
25  #endif
26  }
27 
35  #if defined(YAKL_ARCH_CUDA)
36  YAKL_EXECUTE_ON_DEVICE_ONLY( __syncthreads(); )
37  #elif defined(YAKL_ARCH_HIP)
38  YAKL_EXECUTE_ON_DEVICE_ONLY( __syncthreads(); )
39  #elif defined(YAKL_ARCH_SYCL)
40  YAKL_EXECUTE_ON_DEVICE_ONLY( handler.get_item().barrier(sycl::access::fence_space::local_space); )
41  #elif defined(YAKL_ARCH_OPENMP)
42  // OpenMP doesn't do parallelism at the inner level, so nothing needed here
43  #endif
44  }
45 
46 }
48 
49 
YAKL_EXECUTE_ON_DEVICE_ONLY
#define YAKL_EXECUTE_ON_DEVICE_ONLY(...)
[NOT COMMONLY USED] Macro function used to determine if the current code is compiling for the device.
Definition: YAKL_defines.h:158
__YAKL_NAMESPACE_WRAPPER_END__
#define __YAKL_NAMESPACE_WRAPPER_END__
Definition: YAKL.h:20
__YAKL_NAMESPACE_WRAPPER_BEGIN__
#define __YAKL_NAMESPACE_WRAPPER_BEGIN__
Definition: YAKL.h:19
yakl::InnerHandlerEmpty
This class is necessary for coordination of two-level parallelism.
Definition: YAKL_LaunchConfig.h:111
YAKL_INLINE
#define YAKL_INLINE
Used to decorate functions called from kernels (parallel_for and parallel_outer) or from CPU function...
Definition: YAKL_defines.h:140
yakl::fence
void fence()
Block the host code until all device code has completed.
Definition: YAKL_fence.h:16
yakl
yakl::fence_inner
YAKL_INLINE void fence_inner(InnerHandler &handler)
Block inner threads until all inner threads have completed.
Definition: YAKL_fence.h:34