link-device-bitcode.cu 3.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. // Test for linking with CUDA's libdevice as outlined in
  2. // http://llvm.org/docs/NVPTXUsage.html#linking-with-libdevice
  3. //
  4. // REQUIRES: nvptx-registered-target
  5. //
  6. // Prepare bitcode file to link with
  7. // RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \
  8. // RUN: -disable-llvm-passes -o %t.bc %S/Inputs/device-code.ll
  9. // RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \
  10. // RUN: -disable-llvm-passes -o %t-2.bc %S/Inputs/device-code-2.ll
  11. //
  12. // Make sure function in device-code gets linked in and internalized.
  13. // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
  14. // RUN: -mlink-builtin-bitcode %t.bc -emit-llvm \
  15. // RUN: -disable-llvm-passes -o - %s \
  16. // RUN: | FileCheck %s -check-prefix CHECK-IR
  17. // Make sure legacy flag name works
  18. // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
  19. // RUN: -mlink-cuda-bitcode %t.bc -emit-llvm \
  20. // RUN: -disable-llvm-passes -o - %s \
  21. // RUN: | FileCheck %s -check-prefix CHECK-IR
  22. //
  23. // Make sure we can link two bitcode files.
  24. // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
  25. // RUN: -mlink-builtin-bitcode %t.bc -mlink-builtin-bitcode %t-2.bc \
  26. // RUN: -emit-llvm -disable-llvm-passes -o - %s \
  27. // RUN: | FileCheck %s -check-prefix CHECK-IR -check-prefix CHECK-IR-2
  28. //
  29. // Make sure function in device-code gets linked but is not internalized
  30. // without -fcuda-uses-libdevice
  31. // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
  32. // RUN: -mlink-bitcode-file %t.bc -emit-llvm \
  33. // RUN: -disable-llvm-passes -o - %s \
  34. // RUN: | FileCheck %s -check-prefix CHECK-IR-NLD
  35. //
  36. // Make sure NVVMReflect pass is enabled in NVPTX back-end.
  37. // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
  38. // RUN: -mlink-builtin-bitcode %t.bc -S -o /dev/null %s \
  39. // RUN: -mllvm -debug-pass=Structure 2>&1 \
  40. // RUN: | FileCheck %s -check-prefix CHECK-REFLECT
  41. #include "Inputs/cuda.h"
  42. __device__ float device_mul_or_add(float a, float b);
  43. extern "C" __device__ double __nv_sin(double x);
  44. extern "C" __device__ double __nv_exp(double x);
  45. // CHECK-IR-LABEL: define void @_Z26should_not_be_internalizedPf(
  46. // CHECK-PTX-LABEL: .visible .func _Z26should_not_be_internalizedPf(
  47. __device__ void should_not_be_internalized(float *data) {}
  48. // Make sure kernel call has not been internalized.
  49. // CHECK-IR-LABEL: define void @_Z6kernelPfS_
  50. // CHECK-PTX-LABEL: .visible .entry _Z6kernelPfS_(
  51. __global__ __attribute__((used)) void kernel(float *out, float *in) {
  52. *out = device_mul_or_add(in[0], in[1]);
  53. *out += __nv_exp(__nv_sin(*out));
  54. should_not_be_internalized(out);
  55. }
  56. // Make sure device_mul_or_add() is present in IR, is internal and
  57. // calls __nvvm_reflect().
  58. // CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff(
  59. // CHECK-IR-NLD-LABEL: define float @_Z17device_mul_or_addff(
  60. // CHECK-IR: call i32 @__nvvm_reflect
  61. // CHECK-IR: ret float
  62. // Make sure we've linked in and internalized only needed functions
  63. // from the second bitcode file.
  64. // CHECK-IR-2-LABEL: define internal double @__nv_sin
  65. // CHECK-IR-2-LABEL: define internal double @__nv_exp
  66. // CHECK-IR-2-NOT: double @__unused
  67. // Verify that NVVMReflect pass is among the passes run by NVPTX back-end.
  68. // CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1