| 1 | // RUN: %clang_cc1 -fcuda-is-device \ |
| 2 | // RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \ |
| 3 | // RUN: FileCheck %s -check-prefix CHECK -check-prefix NOFTZ |
| 4 | // RUN: %clang_cc1 -fcuda-is-device -fcuda-flush-denormals-to-zero \ |
| 5 | // RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \ |
| 6 | // RUN: FileCheck %s -check-prefix CHECK -check-prefix FTZ |
| 7 | |
| 8 | // RUN: %clang_cc1 -fcuda-is-device -x hip \ |
| 9 | // RUN: -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm -o - %s | \ |
| 10 | // RUN: FileCheck %s -check-prefix CHECK -check-prefix AMDNOFTZ |
| 11 | // RUN: %clang_cc1 -fcuda-is-device -x hip -fcuda-flush-denormals-to-zero \ |
| 12 | // RUN: -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm -o - %s | \ |
| 13 | // RUN: FileCheck %s -check-prefix CHECK -check-prefix AMDFTZ |
| 14 | |
| 15 | #include "Inputs/cuda.h" |
| 16 | |
| 17 | // Checks that device function calls get emitted with the "ntpvx-f32ftz" |
| 18 | // attribute set to "true" when we compile CUDA device code with |
| 19 | // -fcuda-flush-denormals-to-zero. Further, check that we reflect the presence |
| 20 | // or absence of -fcuda-flush-denormals-to-zero in a module flag. |
| 21 | |
| 22 | // AMDGCN targets always have +fp64-fp16-denormals. |
| 23 | // AMDGCN targets without fast FMAF (e.g. gfx803) always have +fp32-denormals. |
| 24 | // For AMDGCN target with fast FMAF (e.g. gfx900), it has +fp32-denormals |
| 25 | // by default and -fp32-denormals when there is option |
| 26 | // -fcuda-flush-denormals-to-zero. |
| 27 | |
| 28 | // CHECK-LABEL: define void @foo() #0 |
| 29 | extern "C" __device__ void foo() {} |
| 30 | |
| 31 | // FTZ: attributes #0 = {{.*}} "nvptx-f32ftz"="true" |
| 32 | // NOFTZ-NOT: attributes #0 = {{.*}} "nvptx-f32ftz" |
| 33 | // AMDNOFTZ: attributes #0 = {{.*}}+fp32-denormals{{.*}}+fp64-fp16-denormals |
| 34 | // AMDFTZ: attributes #0 = {{.*}}+fp64-fp16-denormals{{.*}}-fp32-denormals |
| 35 | |
| 36 | // FTZ:!llvm.module.flags = !{{{.*}}[[MODFLAG:![0-9]+]]} |
| 37 | // FTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 1} |
| 38 | |
| 39 | // NOFTZ:!llvm.module.flags = !{{{.*}}[[MODFLAG:![0-9]+]]} |
| 40 | // NOFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 0} |
| 41 | |