parallel_reduction_codegen.cpp 46 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895
  1. // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s
  2. // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
  3. // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
  4. // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
  5. // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
  6. // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
  7. // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
  8. // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
  9. // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
  10. // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
  11. // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
  12. // expected-no-diagnostics
  13. #ifndef HEADER
  14. #define HEADER
  15. volatile int g __attribute__((aligned(128))) = 1212;
  16. template <class T>
  17. struct S {
  18. T f;
  19. S(T a) : f(a + g) {}
  20. S() : f(g) {}
  21. operator T() { return T(); }
  22. S &operator&(const S &) { return *this; }
  23. ~S() {}
  24. };
  25. struct SS {
  26. int a;
  27. int b : 4;
  28. int &c;
  29. SS(int &d) : a(0), b(0), c(d) {
  30. #pragma omp parallel reduction(+: a, b, c)
  31. #ifdef LAMBDA
  32. [&]() {
  33. ++this->a, --b, (this)->c /= 1;
  34. #pragma omp parallel reduction(&: a, b, c)
  35. ++(this)->a, --b, this->c /= 1;
  36. }();
  37. #elif defined(BLOCKS)
  38. ^{
  39. ++a;
  40. --this->b;
  41. (this)->c /= 1;
  42. #pragma omp parallel reduction(-: a, b, c)
  43. ++(this)->a, --b, this->c /= 1;
  44. }();
  45. #else
  46. ++this->a, --b, c /= 1;
  47. #endif
  48. }
  49. };
  50. template<typename T>
  51. struct SST {
  52. T a;
  53. SST() : a(T()) {
  54. #pragma omp parallel reduction(*: a)
  55. #ifdef LAMBDA
  56. [&]() {
  57. [&]() {
  58. ++this->a;
  59. #pragma omp parallel reduction(&& :a)
  60. ++(this)->a;
  61. }();
  62. }();
  63. #elif defined(BLOCKS)
  64. ^{
  65. ^{
  66. ++a;
  67. #pragma omp parallel reduction(|: a)
  68. ++(this)->a;
  69. }();
  70. }();
  71. #else
  72. ++(this)->a;
  73. #endif
  74. }
  75. };
  76. // CHECK: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8
  77. // LAMBDA: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8
  78. // BLOCKS: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8
  79. // CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
  80. // CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
  81. // CHECK-DAG: [[REDUCTION_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
  82. // CHECK-DAG: [[REDUCTION_LOCK:@.+]] = common global [8 x i32] zeroinitializer
  83. //CHECK: foo_array_sect
  84. //CHECK: call void {{.+}}@__kmpc_fork_call(
  85. //CHECK: ret void
  86. void foo_array_sect(short x[1]) {
  87. #pragma omp parallel reduction(+ : x[:])
  88. {}
  89. }
  90. template <typename T>
  91. T tmain() {
  92. T t;
  93. S<T> test;
  94. SST<T> sst;
  95. T t_var __attribute__((aligned(128))) = T(), t_var1 __attribute__((aligned(128)));
  96. T vec[] = {1, 2};
  97. S<T> s_arr[] = {1, 2};
  98. S<T> var __attribute__((aligned(128))) (3), var1 __attribute__((aligned(128)));
  99. #pragma omp parallel reduction(+:t_var) reduction(&:var) reduction(&& : var1) reduction(min: t_var1)
  100. {
  101. vec[0] = t_var;
  102. s_arr[0] = var;
  103. }
  104. return T();
  105. }
  106. int sivar;
  107. int main() {
  108. SS ss(sivar);
  109. #ifdef LAMBDA
  110. // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
  111. // LAMBDA-LABEL: @main
  112. // LAMBDA: alloca [[SS_TY]],
  113. // LAMBDA: alloca [[CAP_TY:%.+]],
  114. // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@[^(]+]]([[CAP_TY]]*
  115. [&]() {
  116. // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
  117. // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G]])
  118. #pragma omp parallel reduction(+:g)
  119. {
  120. // LAMBDA: define {{.+}} @{{.+}}([[SS_TY]]*
  121. // LAMBDA: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0
  122. // LAMBDA: store i{{[0-9]+}} 0, i{{[0-9]+}}* %
  123. // LAMBDA: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1
  124. // LAMBDA: store i8
  125. // LAMBDA: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2
  126. // LAMBDA: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0
  127. // LAMBDA-NOT: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1
  128. // LAMBDA: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2
  129. // LAMBDA: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[SS_TY]]*, i32*, i32*, i32*)* [[SS_MICROTASK:@.+]] to void
  130. // LAMBDA: [[B_REF:%.+]] = getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %{{.*}}, i32 0, i32 1
  131. // LAMBDA: store i8 %{{.+}}, i8* [[B_REF]],
  132. // LAMBDA: ret
  133. // LAMBDA: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* %{{.+}}, i32* {{.+}}, i32* {{.+}}, i32* {{.+}})
  134. // LAMBDA-NOT: getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %
  135. // LAMBDA: call{{.*}} void
  136. // LAMBDA: ret void
  137. // LAMBDA: define internal void @{{.+}}(i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]*
  138. // LAMBDA: [[A_PRIV:%.+]] = alloca i{{[0-9]+}},
  139. // LAMBDA: [[B_PRIV:%.+]] = alloca i{{[0-9]+}},
  140. // LAMBDA: [[C_PRIV:%.+]] = alloca i{{[0-9]+}},
  141. // LAMBDA: store i{{[0-9]+}} -1, i{{[0-9]+}}* [[A_PRIV]],
  142. // LAMBDA: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REFA:%.+]],
  143. // LAMBDA: store i{{[0-9]+}} -1, i{{[0-9]+}}* [[B_PRIV]],
  144. // LAMBDA: store i{{[0-9]+}} -1, i{{[0-9]+}}* [[C_PRIV]],
  145. // LAMBDA: store i{{[0-9]+}}* [[C_PRIV]], i{{[0-9]+}}** [[REFC:%.+]],
  146. // LAMBDA: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFA]],
  147. // LAMBDA-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]],
  148. // LAMBDA-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1
  149. // LAMBDA-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]],
  150. // LAMBDA-NEXT: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_PRIV]],
  151. // LAMBDA-NEXT: [[DEC:%.+]] = add nsw i{{[0-9]+}} [[B_VAL]], -1
  152. // LAMBDA-NEXT: store i{{[0-9]+}} [[DEC]], i{{[0-9]+}}* [[B_PRIV]],
  153. // LAMBDA-NEXT: [[C_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFC]],
  154. // LAMBDA-NEXT: [[C_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[C_PRIV]],
  155. // LAMBDA-NEXT: [[DIV:%.+]] = sdiv i{{[0-9]+}} [[C_VAL]], 1
  156. // LAMBDA-NEXT: store i{{[0-9]+}} [[DIV]], i{{[0-9]+}}* [[C_PRIV]],
  157. // LAMBDA: call i32 @__kmpc_reduce_nowait(
  158. // LAMBDA: ret void
  159. // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}})
  160. // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
  161. // Reduction list for runtime.
  162. // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x i8*],
  163. // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR:%.+]]
  164. // LAMBDA: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128
  165. g = 1;
  166. // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128
  167. // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
  168. // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]]
  169. // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
  170. // LAMBDA: [[G_PRIV_REF:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[RED_LIST]], i64 0, i64 0
  171. // LAMBDA: [[BITCAST:%.+]] = bitcast i32* [[G_PRIVATE_ADDR]] to i8*
  172. // LAMBDA: store i8* [[BITCAST]], i8** [[G_PRIV_REF]],
  173. // LAMBDA: call i32 @__kmpc_reduce_nowait(
  174. // LAMBDA: switch i32 %{{.+}}, label %[[REDUCTION_DONE:.+]] [
  175. // LAMBDA: i32 1, label %[[CASE1:.+]]
  176. // LAMBDA: i32 2, label %[[CASE2:.+]]
  177. // LAMBDA: [[CASE1]]
  178. // LAMBDA: [[G_VAL:%.+]] = load i32, i32* [[G_REF]]
  179. // LAMBDA: [[G_PRIV_VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]]
  180. // LAMBDA: [[ADD:%.+]] = add nsw i32 [[G_VAL]], [[G_PRIV_VAL]]
  181. // LAMBDA: store i32 [[ADD]], i32* [[G_REF]]
  182. // LAMBDA: call void @__kmpc_end_reduce_nowait(
  183. // LAMBDA: br label %[[REDUCTION_DONE]]
  184. // LAMBDA: [[CASE2]]
  185. // LAMBDA: [[G_PRIV_VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]]
  186. // LAMBDA: atomicrmw add i32* [[G_REF]], i32 [[G_PRIV_VAL]] monotonic
  187. // LAMBDA: br label %[[REDUCTION_DONE]]
  188. // LAMBDA: [[REDUCTION_DONE]]
  189. // LAMBDA: ret void
  190. [&]() {
  191. // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
  192. // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
  193. g = 2;
  194. // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
  195. // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
  196. // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
  197. // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
  198. }();
  199. }
  200. }();
  201. return 0;
  202. #elif defined(BLOCKS)
  203. // BLOCKS: [[G:@.+]] = global i{{[0-9]+}} 1212,
  204. // BLOCKS-LABEL: @main
  205. // BLOCKS: call
  206. // BLOCKS: call void {{%.+}}(i8
  207. ^{
  208. // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
  209. // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G]])
  210. #pragma omp parallel reduction(-:g)
  211. {
  212. // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}})
  213. // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
  214. // Reduction list for runtime.
  215. // BLOCKS: [[RED_LIST:%.+]] = alloca [1 x i8*],
  216. // BLOCKS: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR:%.+]]
  217. // BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128
  218. g = 1;
  219. // BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128
  220. // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
  221. // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
  222. // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
  223. // BLOCKS: call void {{%.+}}(i8
  224. // BLOCKS: [[G_PRIV_REF:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[RED_LIST]], i64 0, i64 0
  225. // BLOCKS: [[BITCAST:%.+]] = bitcast i32* [[G_PRIVATE_ADDR]] to i8*
  226. // BLOCKS: store i8* [[BITCAST]], i8** [[G_PRIV_REF]],
  227. // BLOCKS: call i32 @__kmpc_reduce_nowait(
  228. // BLOCKS: switch i32 %{{.+}}, label %[[REDUCTION_DONE:.+]] [
  229. // BLOCKS: i32 1, label %[[CASE1:.+]]
  230. // BLOCKS: i32 2, label %[[CASE2:.+]]
  231. // BLOCKS: [[CASE1]]
  232. // BLOCKS: [[G_VAL:%.+]] = load i32, i32* [[G_REF]]
  233. // BLOCKS: [[G_PRIV_VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]]
  234. // BLOCKS: [[ADD:%.+]] = add nsw i32 [[G_VAL]], [[G_PRIV_VAL]]
  235. // BLOCKS: store i32 [[ADD]], i32* [[G_REF]]
  236. // BLOCKS: call void @__kmpc_end_reduce_nowait(
  237. // BLOCKS: br label %[[REDUCTION_DONE]]
  238. // BLOCKS: [[CASE2]]
  239. // BLOCKS: [[G_PRIV_VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]]
  240. // BLOCKS: atomicrmw add i32* [[G_REF]], i32 [[G_PRIV_VAL]] monotonic
  241. // BLOCKS: br label %[[REDUCTION_DONE]]
  242. // BLOCKS: [[REDUCTION_DONE]]
  243. // BLOCKS: ret void
  244. ^{
  245. // BLOCKS: define {{.+}} void {{@.+}}(i8*
  246. g = 2;
  247. // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
  248. // BLOCKS: store i{{[0-9]+}} 2, i{{[0-9]+}}*
  249. // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
  250. // BLOCKS: ret
  251. }();
  252. }
  253. }();
  254. return 0;
  255. // BLOCKS: define {{.+}} @{{.+}}([[SS_TY]]*
  256. // BLOCKS: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0
  257. // BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* %
  258. // BLOCKS: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1
  259. // BLOCKS: store i8
  260. // BLOCKS: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2
  261. // BLOCKS: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0
  262. // BLOCKS-NOT: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1
  263. // BLOCKS: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2
  264. // BLOCKS: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[SS_TY]]*, i32*, i32*, i32*)* [[SS_MICROTASK:@.+]] to void
  265. // BLOCKS: [[B_REF:%.+]] = getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %{{.*}}, i32 0, i32 1
  266. // BLOCKS: store i8 %{{.+}}, i8* [[B_REF]],
  267. // BLOCKS: ret
  268. // BLOCKS: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* %{{.+}}, i32* {{.+}}, i32* {{.+}}, i32* {{.+}})
  269. // BLOCKS-NOT: getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %
  270. // BLOCKS: call{{.*}} void
  271. // BLOCKS: ret void
  272. // BLOCKS: define internal void @{{.+}}(i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* %{{.+}}, i32* {{.+}}, i32* {{.+}}, i32* {{.+}})
  273. // BLOCKS: [[A_PRIV:%.+]] = alloca i{{[0-9]+}},
  274. // BLOCKS: [[B_PRIV:%.+]] = alloca i{{[0-9]+}},
  275. // BLOCKS: [[C_PRIV:%.+]] = alloca i{{[0-9]+}},
  276. // BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[A_PRIV]],
  277. // BLOCKS: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REFA:%.+]],
  278. // BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[B_PRIV]],
  279. // BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[C_PRIV]],
  280. // BLOCKS: store i{{[0-9]+}}* [[C_PRIV]], i{{[0-9]+}}** [[REFC:%.+]],
  281. // BLOCKS: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFA]],
  282. // BLOCKS-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]],
  283. // BLOCKS-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1
  284. // BLOCKS-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]],
  285. // BLOCKS-NEXT: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_PRIV]],
  286. // BLOCKS-NEXT: [[DEC:%.+]] = add nsw i{{[0-9]+}} [[B_VAL]], -1
  287. // BLOCKS-NEXT: store i{{[0-9]+}} [[DEC]], i{{[0-9]+}}* [[B_PRIV]],
  288. // BLOCKS-NEXT: [[C_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFC]],
  289. // BLOCKS-NEXT: [[C_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[C_PRIV]],
  290. // BLOCKS-NEXT: [[DIV:%.+]] = sdiv i{{[0-9]+}} [[C_VAL]], 1
  291. // BLOCKS-NEXT: store i{{[0-9]+}} [[DIV]], i{{[0-9]+}}* [[C_PRIV]],
  292. // BLOCKS: call i32 @__kmpc_reduce_nowait(
  293. // BLOCKS: ret void
  294. #else
  295. S<float> test;
  296. float t_var = 0, t_var1;
  297. int vec[] = {1, 2};
  298. S<float> s_arr[] = {1, 2};
  299. S<float> var(3), var1;
  300. float _Complex cf;
  301. #pragma omp parallel reduction(+:t_var) reduction(&:var) reduction(&& : var1) reduction(min: t_var1)
  302. {
  303. vec[0] = t_var;
  304. s_arr[0] = var;
  305. }
  306. if (var1)
  307. #pragma omp parallel reduction(+ : t_var) reduction(& : var) reduction(&& : var1) reduction(min : t_var1)
  308. while (1) {
  309. vec[0] = t_var;
  310. s_arr[0] = var;
  311. }
  312. #pragma omp parallel reduction(+ : cf)
  313. ;
  314. return tmain<int>();
  315. #endif
  316. }
  317. // CHECK: define {{.*}}i{{[0-9]+}} @main()
  318. // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
  319. // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
  320. // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, float*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*)* [[MAIN_MICROTASK:@.+]] to void
  321. // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, float*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*)* [[MAIN_MICROTASK1:@.+]] to void
  322. // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, { float, float }*)* [[MAIN_MICROTASK2:@.+]] to void
  323. // CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
  324. // CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
  325. // CHECK: ret
  326. //
  327. // CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}},
  328. // CHECK: [[T_VAR_PRIV:%.+]] = alloca float,
  329. // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
  330. // CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
  331. // CHECK: [[T_VAR1_PRIV:%.+]] = alloca float,
  332. // Reduction list for runtime.
  333. // CHECK: [[RED_LIST:%.+]] = alloca [4 x i8*],
  334. // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
  335. // CHECK: [[T_VAR_REF:%.+]] = load float*, float** %
  336. // CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
  337. // CHECK: [[VAR1_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
  338. // CHECK: [[T_VAR1_REF:%.+]] = load float*, float** %
  339. // For + reduction operation initial value of private variable is 0.
  340. // CHECK: store float 0.0{{.+}}, float* [[T_VAR_PRIV]],
  341. // For & reduction operation initial value of private variable is ones in all bits.
  342. // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
  343. // For && reduction operation initial value of private variable is 1.0.
  344. // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[VAR1_PRIV]])
  345. // For min reduction operation initial value of private variable is largest repesentable value.
  346. // CHECK: store float 0x47EFFFFFE0000000, float* [[T_VAR1_PRIV]],
  347. // Skip checks for internal operations.
  348. // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
  349. // CHECK: [[T_VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 0
  350. // CHECK: [[BITCAST:%.+]] = bitcast float* [[T_VAR_PRIV]] to i8*
  351. // CHECK: store i8* [[BITCAST]], i8** [[T_VAR_PRIV_REF]],
  352. // CHECK: [[VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 1
  353. // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_PRIV]] to i8*
  354. // CHECK: store i8* [[BITCAST]], i8** [[VAR_PRIV_REF]],
  355. // CHECK: [[VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 2
  356. // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_PRIV]] to i8*
  357. // CHECK: store i8* [[BITCAST]], i8** [[VAR1_PRIV_REF]],
  358. // CHECK: [[T_VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 3
  359. // CHECK: [[BITCAST:%.+]] = bitcast float* [[T_VAR1_PRIV]] to i8*
  360. // CHECK: store i8* [[BITCAST]], i8** [[T_VAR1_PRIV_REF]],
  361. // res = __kmpc_reduce_nowait(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>);
  362. // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
  363. // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
  364. // CHECK: [[BITCAST:%.+]] = bitcast [4 x i8*]* [[RED_LIST]] to i8*
  365. // CHECK: [[RES:%.+]] = call i32 @__kmpc_reduce_nowait(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], i32 4, i64 32, i8* [[BITCAST]], void (i8*, i8*)* [[REDUCTION_FUNC:@.+]], [8 x i32]* [[REDUCTION_LOCK]])
  366. // switch(res)
  367. // CHECK: switch i32 [[RES]], label %[[RED_DONE:.+]] [
  368. // CHECK: i32 1, label %[[CASE1:.+]]
  369. // CHECK: i32 2, label %[[CASE2:.+]]
  370. // CHECK: ]
  371. // case 1:
  372. // t_var += t_var_reduction;
  373. // CHECK: [[T_VAR_VAL:%.+]] = load float, float* [[T_VAR_REF]],
  374. // CHECK: [[T_VAR_PRIV_VAL:%.+]] = load float, float* [[T_VAR_PRIV]],
  375. // CHECK: [[UP:%.+]] = fadd float [[T_VAR_VAL]], [[T_VAR_PRIV_VAL]]
  376. // CHECK: store float [[UP]], float* [[T_VAR_REF]],
  377. // var = var.operator &(var_reduction);
  378. // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* [[VAR_REF]], [[S_FLOAT_TY]]* dereferenceable(4) [[VAR_PRIV]])
  379. // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_REF]] to i8*
  380. // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8*
  381. // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
  382. // var1 = var1.operator &&(var1_reduction);
  383. // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_REF]])
  384. // CHECK: [[VAR1_BOOL:%.+]] = fcmp une float [[TO_FLOAT]], 0.0
  385. // CHECK: br i1 [[VAR1_BOOL]], label %[[TRUE:.+]], label %[[END2:.+]]
  386. // CHECK: [[TRUE]]
  387. // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_PRIV]])
  388. // CHECK: [[VAR1_REDUCTION_BOOL:%.+]] = fcmp une float [[TO_FLOAT]], 0.0
  389. // CHECK: br label %[[END2]]
  390. // CHECK: [[END2]]
  391. // CHECK: [[COND_LVALUE:%.+]] = phi i1 [ false, %{{.+}} ], [ [[VAR1_REDUCTION_BOOL]], %[[TRUE]] ]
  392. // CHECK: [[CONV:%.+]] = uitofp i1 [[COND_LVALUE]] to float
  393. // CHECK: call void @{{.+}}([[S_FLOAT_TY]]* [[COND_LVALUE:%.+]], float [[CONV]])
  394. // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_REF]] to i8*
  395. // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8*
  396. // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
  397. // t_var1 = min(t_var1, t_var1_reduction);
  398. // CHECK: [[T_VAR1_VAL:%.+]] = load float, float* [[T_VAR1_REF]],
  399. // CHECK: [[T_VAR1_PRIV_VAL:%.+]] = load float, float* [[T_VAR1_PRIV]],
  400. // CHECK: [[CMP:%.+]] = fcmp olt float [[T_VAR1_VAL]], [[T_VAR1_PRIV_VAL]]
  401. // CHECK: br i1 [[CMP]]
  402. // CHECK: [[UP:%.+]] = phi float
  403. // CHECK: store float [[UP]], float* [[T_VAR1_REF]],
  404. // __kmpc_end_reduce_nowait(<loc>, <gtid>, &<lock>);
  405. // CHECK: call void @__kmpc_end_reduce_nowait(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], [8 x i32]* [[REDUCTION_LOCK]])
  406. // break;
  407. // CHECK: br label %[[RED_DONE]]
  408. // case 2:
  409. // t_var += t_var_reduction;
  410. // CHECK: load float, float* [[T_VAR_PRIV]]
  411. // CHECK: [[T_VAR_REF_INT:%.+]] = bitcast float* [[T_VAR_REF]] to i32*
  412. // CHECK: [[OLD1:%.+]] = load atomic i32, i32* [[T_VAR_REF_INT]] monotonic,
  413. // CHECK: br label %[[CONT:.+]]
  414. // CHECK: [[CONT]]
  415. // CHECK: [[ORIG_OLD_INT:%.+]] = phi i32 [ [[OLD1]], %{{.+}} ], [ [[OLD2:%.+]], %[[CONT]] ]
  416. // CHECK: fadd float
  417. // CHECK: [[UP_INT:%.+]] = load i32
  418. // CHECK: [[T_VAR_REF_INT:%.+]] = bitcast float* [[T_VAR_REF]] to i32*
  419. // CHECK: [[RES:%.+]] = cmpxchg i32* [[T_VAR_REF_INT]], i32 [[ORIG_OLD_INT]], i32 [[UP_INT]] monotonic monotonic
  420. // CHECK: [[OLD2:%.+]] = extractvalue { i32, i1 } [[RES]], 0
  421. // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
  422. // CHECK: br i1 [[SUCCESS_FAIL]], label %[[ATOMIC_DONE:.+]], label %[[CONT]]
  423. // CHECK: [[ATOMIC_DONE]]
  424. // var = var.operator &(var_reduction);
  425. // CHECK: call void @__kmpc_critical(
  426. // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* [[VAR_REF]], [[S_FLOAT_TY]]* dereferenceable(4) [[VAR_PRIV]])
  427. // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_REF]] to i8*
  428. // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8*
  429. // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
  430. // CHECK: call void @__kmpc_end_critical(
  431. // var1 = var1.operator &&(var1_reduction);
  432. // CHECK: call void @__kmpc_critical(
  433. // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_REF]])
  434. // CHECK: [[VAR1_BOOL:%.+]] = fcmp une float [[TO_FLOAT]], 0.0
  435. // CHECK: br i1 [[VAR1_BOOL]], label %[[TRUE:.+]], label %[[END2:.+]]
  436. // CHECK: [[TRUE]]
  437. // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_PRIV]])
  438. // CHECK: [[VAR1_REDUCTION_BOOL:%.+]] = fcmp une float [[TO_FLOAT]], 0.0
  439. // CHECK: br label %[[END2]]
  440. // CHECK: [[END2]]
  441. // CHECK: [[COND_LVALUE:%.+]] = phi i1 [ false, %{{.+}} ], [ [[VAR1_REDUCTION_BOOL]], %[[TRUE]] ]
  442. // CHECK: [[CONV:%.+]] = uitofp i1 [[COND_LVALUE]] to float
  443. // CHECK: call void @{{.+}}([[S_FLOAT_TY]]* [[COND_LVALUE:%.+]], float [[CONV]])
  444. // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_REF]] to i8*
  445. // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8*
  446. // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
  447. // CHECK: call void @__kmpc_end_critical(
  448. // t_var1 = min(t_var1, t_var1_reduction);
  449. // CHECK: load float, float* [[T_VAR1_PRIV]]
  450. // CHECK: [[T_VAR1_REF_INT:%.+]] = bitcast float* [[T_VAR1_REF]] to i32*
  451. // CHECK: [[OLD1:%.+]] = load atomic i32, i32* [[T_VAR1_REF_INT]] monotonic,
  452. // CHECK: br label %[[CONT:.+]]
  453. // CHECK: [[CONT]]
  454. // CHECK: [[ORIG_OLD_INT:%.+]] = phi i32 [ [[OLD1]], %{{.+}} ], [ [[OLD2:%.+]], %{{.+}} ]
  455. // CHECK: [[CMP:%.+]] = fcmp olt float
  456. // CHECK: br i1 [[CMP]]
  457. // CHECK: [[UP:%.+]] = phi float
  458. // CHECK: [[UP_INT:%.+]] = load i32
  459. // CHECK: [[T_VAR1_REF_INT:%.+]] = bitcast float* [[T_VAR1_REF]] to i32*
  460. // CHECK: [[RES:%.+]] = cmpxchg i32* [[T_VAR1_REF_INT]], i32 [[ORIG_OLD_INT]], i32 [[UP_INT]] monotonic monotonic
  461. // CHECK: [[OLD2:%.+]] = extractvalue { i32, i1 } [[RES]], 0
  462. // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
  463. // CHECK: br i1 [[SUCCESS_FAIL]], label %[[ATOMIC_DONE:.+]], label %[[CONT]]
  464. // CHECK: [[ATOMIC_DONE]]
  465. // break;
  466. // CHECK: br label %[[RED_DONE]]
  467. // CHECK: [[RED_DONE]]
  468. // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
  469. // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
  470. // CHECK: ret void
  471. // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
  472. // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
  473. // ...
  474. // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
  475. // *(Type<n>-1*)rhs[<n>-1]);
  476. // }
  477. // CHECK: define internal void [[REDUCTION_FUNC]](i8*, i8*)
  478. // t_var_lhs = (float*)lhs[0];
  479. // CHECK: [[T_VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS:%.+]], i64 0, i64 0
  480. // CHECK: [[T_VAR_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR_RHS_REF]],
  481. // CHECK: [[T_VAR_RHS:%.+]] = bitcast i8* [[T_VAR_RHS_VOID]] to float*
  482. // t_var_rhs = (float*)rhs[0];
  483. // CHECK: [[T_VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS:%.+]], i64 0, i64 0
  484. // CHECK: [[T_VAR_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR_LHS_REF]],
  485. // CHECK: [[T_VAR_LHS:%.+]] = bitcast i8* [[T_VAR_LHS_VOID]] to float*
  486. // var_lhs = (S<float>*)lhs[1];
  487. // CHECK: [[VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 1
  488. // CHECK: [[VAR_RHS_VOID:%.+]] = load i8*, i8** [[VAR_RHS_REF]],
  489. // CHECK: [[VAR_RHS:%.+]] = bitcast i8* [[VAR_RHS_VOID]] to [[S_FLOAT_TY]]*
  490. // var_rhs = (S<float>*)rhs[1];
  491. // CHECK: [[VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 1
  492. // CHECK: [[VAR_LHS_VOID:%.+]] = load i8*, i8** [[VAR_LHS_REF]],
  493. // CHECK: [[VAR_LHS:%.+]] = bitcast i8* [[VAR_LHS_VOID]] to [[S_FLOAT_TY]]*
  494. // var1_lhs = (S<float>*)lhs[2];
  495. // CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 2
  496. // CHECK: [[VAR1_RHS_VOID:%.+]] = load i8*, i8** [[VAR1_RHS_REF]],
  497. // CHECK: [[VAR1_RHS:%.+]] = bitcast i8* [[VAR1_RHS_VOID]] to [[S_FLOAT_TY]]*
  498. // var1_rhs = (S<float>*)rhs[2];
  499. // CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 2
  500. // CHECK: [[VAR1_LHS_VOID:%.+]] = load i8*, i8** [[VAR1_LHS_REF]],
  501. // CHECK: [[VAR1_LHS:%.+]] = bitcast i8* [[VAR1_LHS_VOID]] to [[S_FLOAT_TY]]*
  502. // t_var1_lhs = (float*)lhs[3];
  503. // CHECK: [[T_VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 3
  504. // CHECK: [[T_VAR1_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_RHS_REF]],
  505. // CHECK: [[T_VAR1_RHS:%.+]] = bitcast i8* [[T_VAR1_RHS_VOID]] to float*
  506. // t_var1_rhs = (float*)rhs[3];
  507. // CHECK: [[T_VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 3
  508. // CHECK: [[T_VAR1_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_LHS_REF]],
  509. // CHECK: [[T_VAR1_LHS:%.+]] = bitcast i8* [[T_VAR1_LHS_VOID]] to float*
  510. // t_var_lhs += t_var_rhs;
  511. // CHECK: [[T_VAR_LHS_VAL:%.+]] = load float, float* [[T_VAR_LHS]],
  512. // CHECK: [[T_VAR_RHS_VAL:%.+]] = load float, float* [[T_VAR_RHS]],
  513. // CHECK: [[UP:%.+]] = fadd float [[T_VAR_LHS_VAL]], [[T_VAR_RHS_VAL]]
  514. // CHECK: store float [[UP]], float* [[T_VAR_LHS]],
  515. // var_lhs = var_lhs.operator &(var_rhs);
  516. // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* [[VAR_LHS]], [[S_FLOAT_TY]]* dereferenceable(4) [[VAR_RHS]])
  517. // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_LHS]] to i8*
  518. // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8*
  519. // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
  520. // var1_lhs = var1_lhs.operator &&(var1_rhs);
  521. // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_LHS]])
  522. // CHECK: [[VAR1_BOOL:%.+]] = fcmp une float [[TO_FLOAT]], 0.0
  523. // CHECK: br i1 [[VAR1_BOOL]], label %[[TRUE:.+]], label %[[END2:.+]]
  524. // CHECK: [[TRUE]]
  525. // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_RHS]])
  526. // CHECK: [[VAR1_REDUCTION_BOOL:%.+]] = fcmp une float [[TO_FLOAT]], 0.0
  527. // CHECK: br label %[[END2]]
  528. // CHECK: [[END2]]
  529. // CHECK: [[COND_LVALUE:%.+]] = phi i1 [ false, %{{.+}} ], [ [[VAR1_REDUCTION_BOOL]], %[[TRUE]] ]
  530. // CHECK: [[CONV:%.+]] = uitofp i1 [[COND_LVALUE]] to float
  531. // CHECK: call void @{{.+}}([[S_FLOAT_TY]]* [[COND_LVALUE:%.+]], float [[CONV]])
  532. // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_LHS]] to i8*
  533. // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8*
  534. // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
  535. // t_var1_lhs = min(t_var1_lhs, t_var1_rhs);
  536. // CHECK: [[T_VAR1_LHS_VAL:%.+]] = load float, float* [[T_VAR1_LHS]],
  537. // CHECK: [[T_VAR1_RHS_VAL:%.+]] = load float, float* [[T_VAR1_RHS]],
  538. // CHECK: [[CMP:%.+]] = fcmp olt float [[T_VAR1_LHS_VAL]], [[T_VAR1_RHS_VAL]]
  539. // CHECK: br i1 [[CMP]]
  540. // CHECK: [[UP:%.+]] = phi float
  541. // CHECK: store float [[UP]], float* [[T_VAR1_LHS]],
  542. // CHECK: ret void
  543. // CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}},
  544. // CHECK: [[T_VAR_PRIV:%.+]] = alloca float,
  545. // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
  546. // CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
  547. // CHECK: [[T_VAR1_PRIV:%.+]] = alloca float,
  548. // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
  549. // CHECK: [[T_VAR_REF:%.+]] = load float*, float** %
  550. // CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
  551. // CHECK: [[VAR1_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
  552. // CHECK: [[T_VAR1_REF:%.+]] = load float*, float** %
  553. // For + reduction operation initial value of private variable is 0.
  554. // CHECK: store float 0.0{{.+}}, float* [[T_VAR_PRIV]],
  555. // For & reduction operation initial value of private variable is ones in all bits.
  556. // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
  557. // For && reduction operation initial value of private variable is 1.0.
  558. // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[VAR1_PRIV]])
  559. // For min reduction operation initial value of private variable is largest repesentable value.
  560. // CHECK: store float 0x47EFFFFFE0000000, float* [[T_VAR1_PRIV]],
  561. // CHECK-NOT: call i32 @__kmpc_reduce
  562. // CHECK: }
  563. // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
  564. // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
  565. // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
  566. // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*, [[S_INT_TY]]*, i32*)* [[TMAIN_MICROTASK:@.+]] to void
  567. // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
  568. // CHECK: ret
  569. //
  570. // CHECK: define {{.+}} @{{.+}}([[SS_TY]]*
  571. // CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0
  572. // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* %
  573. // CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1
  574. // CHECK: store i8
  575. // CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2
  576. // CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0
  577. // CHECK-NOT: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1
  578. // CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2
  579. // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[SS_TY]]*, i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}*)* [[SS_MICROTASK:@.+]] to void
  580. // CHECK: [[B_REF:%.+]] = getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %{{.*}}, i32 0, i32 1
  581. // CHECK: store i8 %{{.+}}, i8* [[B_REF]],
  582. // CHECK: ret
  583. // CHECK: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]*
  584. // CHECK: [[A_PRIV:%.+]] = alloca i{{[0-9]+}},
  585. // CHECK: [[B_PRIV:%.+]] = alloca i{{[0-9]+}},
  586. // CHECK: [[C_PRIV:%.+]] = alloca i{{[0-9]+}},
  587. // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[A_PRIV]],
  588. // CHECK: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REFA:%.+]],
  589. // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[B_PRIV]],
  590. // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[C_PRIV]],
  591. // CHECK: store i{{[0-9]+}}* [[C_PRIV]], i{{[0-9]+}}** [[REFC:%.+]],
  592. // CHECK: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFA]],
  593. // CHECK-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]],
  594. // CHECK-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1
  595. // CHECK-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]],
  596. // CHECK-NEXT: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_PRIV]],
  597. // CHECK-NEXT: [[DEC:%.+]] = add nsw i{{[0-9]+}} [[B_VAL]], -1
  598. // CHECK-NEXT: store i{{[0-9]+}} [[DEC]], i{{[0-9]+}}* [[B_PRIV]],
  599. // CHECK-NEXT: [[C_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFC]],
  600. // CHECK-NEXT: [[C_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[C_PRIV]],
  601. // CHECK-NEXT: [[DIV:%.+]] = sdiv i{{[0-9]+}} [[C_VAL]], 1
  602. // CHECK-NEXT: store i{{[0-9]+}} [[DIV]], i{{[0-9]+}}* [[C_PRIV]],
  603. // CHECK: call i32 @__kmpc_reduce_nowait(
  604. // CHECK: ret void
  605. // CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}},
  606. // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, align 128
  607. // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], align 128
  608. // CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_INT_TY]], align 128
  609. // CHECK: [[T_VAR1_PRIV:%.+]] = alloca i{{[0-9]+}}, align 128
  610. // Reduction list for runtime.
  611. // CHECK: [[RED_LIST:%.+]] = alloca [4 x i8*],
  612. // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
  613. // CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
  614. // CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
  615. // CHECK: [[VAR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
  616. // CHECK: [[T_VAR1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
  617. // For + reduction operation initial value of private variable is 0.
  618. // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[T_VAR_PRIV]],
  619. // For & reduction operation initial value of private variable is ones in all bits.
  620. // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[VAR_PRIV]])
  621. // For && reduction operation initial value of private variable is 1.0.
  622. // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[VAR1_PRIV]])
  623. // For min reduction operation initial value of private variable is largest repesentable value.
  624. // CHECK: store i{{[0-9]+}} 2147483647, i{{[0-9]+}}* [[T_VAR1_PRIV]],
  625. // Skip checks for internal operations.
  626. // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
  627. // CHECK: [[T_VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 0
  628. // CHECK: [[BITCAST:%.+]] = bitcast i{{[0-9]+}}* [[T_VAR_PRIV]] to i8*
  629. // CHECK: store i8* [[BITCAST]], i8** [[T_VAR_PRIV_REF]],
  630. // CHECK: [[VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 1
  631. // CHECK: [[BITCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_PRIV]] to i8*
  632. // CHECK: store i8* [[BITCAST]], i8** [[VAR_PRIV_REF]],
  633. // CHECK: [[VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 2
  634. // CHECK: [[BITCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_PRIV]] to i8*
  635. // CHECK: store i8* [[BITCAST]], i8** [[VAR1_PRIV_REF]],
  636. // CHECK: [[T_VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 3
  637. // CHECK: [[BITCAST:%.+]] = bitcast i{{[0-9]+}}* [[T_VAR1_PRIV]] to i8*
  638. // CHECK: store i8* [[BITCAST]], i8** [[T_VAR1_PRIV_REF]],
  639. // res = __kmpc_reduce_nowait(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>);
  640. // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
  641. // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
  642. // CHECK: [[BITCAST:%.+]] = bitcast [4 x i8*]* [[RED_LIST]] to i8*
  643. // CHECK: [[RES:%.+]] = call i32 @__kmpc_reduce_nowait(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], i32 4, i64 32, i8* [[BITCAST]], void (i8*, i8*)* [[REDUCTION_FUNC:@.+]], [8 x i32]* [[REDUCTION_LOCK]])
  644. // switch(res)
  645. // CHECK: switch i32 [[RES]], label %[[RED_DONE:.+]] [
  646. // CHECK: i32 1, label %[[CASE1:.+]]
  647. // CHECK: i32 2, label %[[CASE2:.+]]
  648. // CHECK: ]
  649. // case 1:
  650. // t_var += t_var_reduction;
  651. // CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_REF]],
  652. // CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
  653. // CHECK: [[UP:%.+]] = add nsw i{{[0-9]+}} [[T_VAR_VAL]], [[T_VAR_PRIV_VAL]]
  654. // CHECK: store i{{[0-9]+}} [[UP]], i{{[0-9]+}}* [[T_VAR_REF]],
  655. // var = var.operator &(var_reduction);
  656. // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]* dereferenceable(4) [[VAR_PRIV]])
  657. // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_REF]] to i8*
  658. // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8*
  659. // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
  660. // var1 = var1.operator &&(var1_reduction);
  661. // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_REF]])
  662. // CHECK: [[VAR1_BOOL:%.+]] = icmp ne i{{[0-9]+}} [[TO_INT]], 0
  663. // CHECK: br i1 [[VAR1_BOOL]], label %[[TRUE:.+]], label %[[END2:.+]]
  664. // CHECK: [[TRUE]]
  665. // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_PRIV]])
  666. // CHECK: [[VAR1_REDUCTION_BOOL:%.+]] = icmp ne i{{[0-9]+}} [[TO_INT]], 0
  667. // CHECK: [[END2]]
  668. // CHECK: [[COND_LVALUE:%.+]] = phi i1 [ false, %{{.+}} ], [ [[VAR1_REDUCTION_BOOL]], %[[TRUE]] ]
  669. // CHECK: [[CONV:%.+]] = zext i1 [[COND_LVALUE]] to i32
  670. // CHECK: call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]])
  671. // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_REF]] to i8*
  672. // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8*
  673. // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
  674. // t_var1 = min(t_var1, t_var1_reduction);
  675. // CHECK: [[T_VAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_REF]],
  676. // CHECK: [[T_VAR1_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_PRIV]],
  677. // CHECK: [[CMP:%.+]] = icmp slt i{{[0-9]+}} [[T_VAR1_VAL]], [[T_VAR1_PRIV_VAL]]
  678. // CHECK: br i1 [[CMP]]
  679. // CHECK: [[UP:%.+]] = phi i32
  680. // CHECK: store i{{[0-9]+}} [[UP]], i{{[0-9]+}}* [[T_VAR1_REF]],
  681. // __kmpc_end_reduce_nowait(<loc>, <gtid>, &<lock>);
  682. // CHECK: call void @__kmpc_end_reduce_nowait(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], [8 x i32]* [[REDUCTION_LOCK]])
  683. // break;
  684. // CHECK: br label %[[RED_DONE]]
  685. // case 2:
  686. // t_var += t_var_reduction;
  687. // CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]]
  688. // CHECK: atomicrmw add i32* [[T_VAR_REF]], i32 [[T_VAR_PRIV_VAL]] monotonic
  689. // var = var.operator &(var_reduction);
  690. // CHECK: call void @__kmpc_critical(
  691. // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]* dereferenceable(4) [[VAR_PRIV]])
  692. // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_REF]] to i8*
  693. // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8*
  694. // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
  695. // CHECK: call void @__kmpc_end_critical(
  696. // var1 = var1.operator &&(var1_reduction);
  697. // CHECK: call void @__kmpc_critical(
  698. // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_REF]])
  699. // CHECK: [[VAR1_BOOL:%.+]] = icmp ne i{{[0-9]+}} [[TO_INT]], 0
  700. // CHECK: br i1 [[VAR1_BOOL]], label %[[TRUE:.+]], label %[[END2:.+]]
  701. // CHECK: [[TRUE]]
  702. // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_PRIV]])
  703. // CHECK: [[VAR1_REDUCTION_BOOL:%.+]] = icmp ne i{{[0-9]+}} [[TO_INT]], 0
  704. // CHECK: br label %[[END2]]
  705. // CHECK: [[END2]]
  706. // CHECK: [[COND_LVALUE:%.+]] = phi i1 [ false, %{{.+}} ], [ [[VAR1_REDUCTION_BOOL]], %[[TRUE]] ]
  707. // CHECK: [[CONV:%.+]] = zext i1 [[COND_LVALUE]] to i32
  708. // CHECK: call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]])
  709. // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_REF]] to i8*
  710. // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8*
  711. // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
  712. // CHECK: call void @__kmpc_end_critical(
  713. // t_var1 = min(t_var1, t_var1_reduction);
  714. // CHECK: [[T_VAR1_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_PRIV]]
  715. // CHECK: atomicrmw min i32* [[T_VAR1_REF]], i32 [[T_VAR1_PRIV_VAL]] monotonic
  716. // break;
  717. // CHECK: br label %[[RED_DONE]]
  718. // CHECK: [[RED_DONE]]
  719. // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
  720. // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]*
  721. // CHECK: ret void
  722. // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
  723. // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
  724. // ...
  725. // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
  726. // *(Type<n>-1*)rhs[<n>-1]);
  727. // }
  728. // CHECK: define internal void [[REDUCTION_FUNC]](i8*, i8*)
  729. // t_var_lhs = (i{{[0-9]+}}*)lhs[0];
  730. // CHECK: [[T_VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS:%.+]], i64 0, i64 0
  731. // CHECK: [[T_VAR_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR_RHS_REF]],
  732. // CHECK: [[T_VAR_RHS:%.+]] = bitcast i8* [[T_VAR_RHS_VOID]] to i{{[0-9]+}}*
  733. // t_var_rhs = (i{{[0-9]+}}*)rhs[0];
  734. // CHECK: [[T_VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS:%.+]], i64 0, i64 0
  735. // CHECK: [[T_VAR_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR_LHS_REF]],
  736. // CHECK: [[T_VAR_LHS:%.+]] = bitcast i8* [[T_VAR_LHS_VOID]] to i{{[0-9]+}}*
  737. // var_lhs = (S<i{{[0-9]+}}>*)lhs[1];
  738. // CHECK: [[VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 1
  739. // CHECK: [[VAR_RHS_VOID:%.+]] = load i8*, i8** [[VAR_RHS_REF]],
  740. // CHECK: [[VAR_RHS:%.+]] = bitcast i8* [[VAR_RHS_VOID]] to [[S_INT_TY]]*
  741. // var_rhs = (S<i{{[0-9]+}}>*)rhs[1];
  742. // CHECK: [[VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 1
  743. // CHECK: [[VAR_LHS_VOID:%.+]] = load i8*, i8** [[VAR_LHS_REF]],
  744. // CHECK: [[VAR_LHS:%.+]] = bitcast i8* [[VAR_LHS_VOID]] to [[S_INT_TY]]*
  745. // var1_lhs = (S<i{{[0-9]+}}>*)lhs[2];
  746. // CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 2
  747. // CHECK: [[VAR1_RHS_VOID:%.+]] = load i8*, i8** [[VAR1_RHS_REF]],
  748. // CHECK: [[VAR1_RHS:%.+]] = bitcast i8* [[VAR1_RHS_VOID]] to [[S_INT_TY]]*
  749. // var1_rhs = (S<i{{[0-9]+}}>*)rhs[2];
  750. // CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 2
  751. // CHECK: [[VAR1_LHS_VOID:%.+]] = load i8*, i8** [[VAR1_LHS_REF]],
  752. // CHECK: [[VAR1_LHS:%.+]] = bitcast i8* [[VAR1_LHS_VOID]] to [[S_INT_TY]]*
  753. // t_var1_lhs = (i{{[0-9]+}}*)lhs[3];
  754. // CHECK: [[T_VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 3
  755. // CHECK: [[T_VAR1_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_RHS_REF]],
  756. // CHECK: [[T_VAR1_RHS:%.+]] = bitcast i8* [[T_VAR1_RHS_VOID]] to i{{[0-9]+}}*
  757. // t_var1_rhs = (i{{[0-9]+}}*)rhs[3];
  758. // CHECK: [[T_VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 3
  759. // CHECK: [[T_VAR1_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_LHS_REF]],
  760. // CHECK: [[T_VAR1_LHS:%.+]] = bitcast i8* [[T_VAR1_LHS_VOID]] to i{{[0-9]+}}*
  761. // t_var_lhs += t_var_rhs;
  762. // CHECK: [[T_VAR_LHS_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_LHS]],
  763. // CHECK: [[T_VAR_RHS_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_RHS]],
  764. // CHECK: [[UP:%.+]] = add nsw i{{[0-9]+}} [[T_VAR_LHS_VAL]], [[T_VAR_RHS_VAL]]
  765. // CHECK: store i{{[0-9]+}} [[UP]], i{{[0-9]+}}* [[T_VAR_LHS]],
  766. // var_lhs = var_lhs.operator &(var_rhs);
  767. // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_LHS]], [[S_INT_TY]]* dereferenceable(4) [[VAR_RHS]])
  768. // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_LHS]] to i8*
  769. // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8*
  770. // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
  771. // var1_lhs = var1_lhs.operator &&(var1_rhs);
  772. // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_LHS]])
  773. // CHECK: [[VAR1_BOOL:%.+]] = icmp ne i{{[0-9]+}} [[TO_INT]], 0
  774. // CHECK: br i1 [[VAR1_BOOL]], label %[[TRUE:.+]], label %[[END2:.+]]
  775. // CHECK: [[TRUE]]
  776. // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_RHS]])
  777. // CHECK: [[VAR1_REDUCTION_BOOL:%.+]] = icmp ne i{{[0-9]+}} [[TO_INT]], 0
  778. // CHECK: br label %[[END2]]
  779. // CHECK: [[END2]]
  780. // CHECK: [[COND_LVALUE:%.+]] = phi i1 [ false, %{{.+}} ], [ [[VAR1_REDUCTION_BOOL]], %[[TRUE]] ]
  781. // CHECK: [[CONV:%.+]] = zext i1 [[COND_LVALUE]] to i32
  782. // CHECK: call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]])
  783. // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_LHS]] to i8*
  784. // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8*
  785. // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
  786. // t_var1_lhs = min(t_var1_lhs, t_var1_rhs);
  787. // CHECK: [[T_VAR1_LHS_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_LHS]],
  788. // CHECK: [[T_VAR1_RHS_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_RHS]],
  789. // CHECK: [[CMP:%.+]] = icmp slt i{{[0-9]+}} [[T_VAR1_LHS_VAL]], [[T_VAR1_RHS_VAL]]
  790. // CHECK: br i1 [[CMP]]
  791. // CHECK: [[UP:%.+]] = phi i32
  792. // CHECK: store i{{[0-9]+}} [[UP]], i{{[0-9]+}}* [[T_VAR1_LHS]],
  793. // CHECK: ret void
  794. #endif