; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_32 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_32 | %ptxas-verify %}


; CHECK-LABEL: atom0
define i32 @atom0(ptr %addr, i32 %val) {
; CHECK-LABEL: atom0(
; CHECK:       {
; CHECK-NEXT:    .reg .b32 %r<3>;
; CHECK-NEXT:    .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom0_param_0];
; CHECK-NEXT:    ld.param.b32 %r1, [atom0_param_1];
; CHECK-NEXT:    atom.add.u32 %r2, [%rd1], %r1;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
; CHECK-NEXT:    ret;
  %ret = atomicrmw add ptr %addr, i32 %val seq_cst
  ret i32 %ret
}

; CHECK-LABEL: atom1
define i64 @atom1(ptr %addr, i64 %val) {
; CHECK-LABEL: atom1(
; CHECK:       {
; CHECK-NEXT:    .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom1_param_0];
; CHECK-NEXT:    ld.param.b64 %rd2, [atom1_param_1];
; CHECK-NEXT:    atom.add.u64 %rd3, [%rd1], %rd2;
; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT:    ret;
  %ret = atomicrmw add ptr %addr, i64 %val seq_cst
  ret i64 %ret
}

; CHECK-LABEL: atom2
define i32 @atom2(ptr %subr, i32 %val) {
; CHECK-LABEL: atom2(
; CHECK:       {
; CHECK-NEXT:    .reg .b32 %r<4>;
; CHECK-NEXT:    .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom2_param_0];
; CHECK-NEXT:    ld.param.b32 %r1, [atom2_param_1];
; CHECK-NEXT:    neg.s32 %r2, %r1;
; CHECK-NEXT:    atom.add.u32 %r3, [%rd1], %r2;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
; CHECK-NEXT:    ret;
  %ret = atomicrmw sub ptr %subr, i32 %val seq_cst
  ret i32 %ret
}

; CHECK-LABEL: atom3
define i64 @atom3(ptr %subr, i64 %val) {
; CHECK-LABEL: atom3(
; CHECK:       {
; CHECK-NEXT:    .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom3_param_0];
; CHECK-NEXT:    ld.param.b64 %rd2, [atom3_param_1];
; CHECK-NEXT:    neg.s64 %rd3, %rd2;
; CHECK-NEXT:    atom.add.u64 %rd4, [%rd1], %rd3;
; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4;
; CHECK-NEXT:    ret;
  %ret = atomicrmw sub ptr %subr, i64 %val seq_cst
  ret i64 %ret
}

; CHECK-LABEL: atom4
define i32 @atom4(ptr %subr, i32 %val) {
; CHECK-LABEL: atom4(
; CHECK:       {
; CHECK-NEXT:    .reg .b32 %r<3>;
; CHECK-NEXT:    .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom4_param_0];
; CHECK-NEXT:    ld.param.b32 %r1, [atom4_param_1];
; CHECK-NEXT:    atom.and.b32 %r2, [%rd1], %r1;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
; CHECK-NEXT:    ret;
  %ret = atomicrmw and ptr %subr, i32 %val seq_cst
  ret i32 %ret
}

; CHECK-LABEL: atom5
define i64 @atom5(ptr %subr, i64 %val) {
; CHECK-LABEL: atom5(
; CHECK:       {
; CHECK-NEXT:    .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom5_param_0];
; CHECK-NEXT:    ld.param.b64 %rd2, [atom5_param_1];
; CHECK-NEXT:    atom.and.b64 %rd3, [%rd1], %rd2;
; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT:    ret;
  %ret = atomicrmw and ptr %subr, i64 %val seq_cst
  ret i64 %ret
}

;; NAND not yet supported
;define i32 @atom6(ptr %subr, i32 %val) {
;  %ret = atomicrmw nand ptr %subr, i32 %val seq_cst
;  ret i32 %ret
;}

;define i64 @atom7(ptr %subr, i64 %val) {
;  %ret = atomicrmw nand ptr %subr, i64 %val seq_cst
;  ret i64 %ret
;}

; CHECK-LABEL: atom8
define i32 @atom8(ptr %subr, i32 %val) {
; CHECK-LABEL: atom8(
; CHECK:       {
; CHECK-NEXT:    .reg .b32 %r<3>;
; CHECK-NEXT:    .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom8_param_0];
; CHECK-NEXT:    ld.param.b32 %r1, [atom8_param_1];
; CHECK-NEXT:    atom.or.b32 %r2, [%rd1], %r1;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
; CHECK-NEXT:    ret;
  %ret = atomicrmw or ptr %subr, i32 %val seq_cst
  ret i32 %ret
}

; CHECK-LABEL: atom9
define i64 @atom9(ptr %subr, i64 %val) {
; CHECK-LABEL: atom9(
; CHECK:       {
; CHECK-NEXT:    .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom9_param_0];
; CHECK-NEXT:    ld.param.b64 %rd2, [atom9_param_1];
; CHECK-NEXT:    atom.or.b64 %rd3, [%rd1], %rd2;
; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT:    ret;
  %ret = atomicrmw or ptr %subr, i64 %val seq_cst
  ret i64 %ret
}

; CHECK-LABEL: atom10
define i32 @atom10(ptr %subr, i32 %val) {
; CHECK-LABEL: atom10(
; CHECK:       {
; CHECK-NEXT:    .reg .b32 %r<3>;
; CHECK-NEXT:    .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom10_param_0];
; CHECK-NEXT:    ld.param.b32 %r1, [atom10_param_1];
; CHECK-NEXT:    atom.xor.b32 %r2, [%rd1], %r1;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
; CHECK-NEXT:    ret;
  %ret = atomicrmw xor ptr %subr, i32 %val seq_cst
  ret i32 %ret
}

; CHECK-LABEL: atom11
define i64 @atom11(ptr %subr, i64 %val) {
; CHECK-LABEL: atom11(
; CHECK:       {
; CHECK-NEXT:    .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom11_param_0];
; CHECK-NEXT:    ld.param.b64 %rd2, [atom11_param_1];
; CHECK-NEXT:    atom.xor.b64 %rd3, [%rd1], %rd2;
; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT:    ret;
  %ret = atomicrmw xor ptr %subr, i64 %val seq_cst
  ret i64 %ret
}

; CHECK-LABEL: atom12
define i32 @atom12(ptr %subr, i32 %val) {
; CHECK-LABEL: atom12(
; CHECK:       {
; CHECK-NEXT:    .reg .b32 %r<3>;
; CHECK-NEXT:    .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom12_param_0];
; CHECK-NEXT:    ld.param.b32 %r1, [atom12_param_1];
; CHECK-NEXT:    atom.max.s32 %r2, [%rd1], %r1;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
; CHECK-NEXT:    ret;
  %ret = atomicrmw max ptr %subr, i32 %val seq_cst
  ret i32 %ret
}

; CHECK-LABEL: atom13
define i64 @atom13(ptr %subr, i64 %val) {
; CHECK-LABEL: atom13(
; CHECK:       {
; CHECK-NEXT:    .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom13_param_0];
; CHECK-NEXT:    ld.param.b64 %rd2, [atom13_param_1];
; CHECK-NEXT:    atom.max.s64 %rd3, [%rd1], %rd2;
; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT:    ret;
  %ret = atomicrmw max ptr %subr, i64 %val seq_cst
  ret i64 %ret
}

; CHECK-LABEL: atom14
define i32 @atom14(ptr %subr, i32 %val) {
; CHECK-LABEL: atom14(
; CHECK:       {
; CHECK-NEXT:    .reg .b32 %r<3>;
; CHECK-NEXT:    .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom14_param_0];
; CHECK-NEXT:    ld.param.b32 %r1, [atom14_param_1];
; CHECK-NEXT:    atom.min.s32 %r2, [%rd1], %r1;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
; CHECK-NEXT:    ret;
  %ret = atomicrmw min ptr %subr, i32 %val seq_cst
  ret i32 %ret
}

; CHECK-LABEL: atom15
define i64 @atom15(ptr %subr, i64 %val) {
; CHECK-LABEL: atom15(
; CHECK:       {
; CHECK-NEXT:    .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom15_param_0];
; CHECK-NEXT:    ld.param.b64 %rd2, [atom15_param_1];
; CHECK-NEXT:    atom.min.s64 %rd3, [%rd1], %rd2;
; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT:    ret;
  %ret = atomicrmw min ptr %subr, i64 %val seq_cst
  ret i64 %ret
}

; CHECK-LABEL: atom16
define i32 @atom16(ptr %subr, i32 %val) {
; CHECK-LABEL: atom16(
; CHECK:       {
; CHECK-NEXT:    .reg .b32 %r<3>;
; CHECK-NEXT:    .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom16_param_0];
; CHECK-NEXT:    ld.param.b32 %r1, [atom16_param_1];
; CHECK-NEXT:    atom.max.u32 %r2, [%rd1], %r1;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
; CHECK-NEXT:    ret;
  %ret = atomicrmw umax ptr %subr, i32 %val seq_cst
  ret i32 %ret
}

; CHECK-LABEL: atom17
define i64 @atom17(ptr %subr, i64 %val) {
; CHECK-LABEL: atom17(
; CHECK:       {
; CHECK-NEXT:    .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom17_param_0];
; CHECK-NEXT:    ld.param.b64 %rd2, [atom17_param_1];
; CHECK-NEXT:    atom.max.u64 %rd3, [%rd1], %rd2;
; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT:    ret;
  %ret = atomicrmw umax ptr %subr, i64 %val seq_cst
  ret i64 %ret
}

; CHECK-LABEL: atom18
define i32 @atom18(ptr %subr, i32 %val) {
; CHECK-LABEL: atom18(
; CHECK:       {
; CHECK-NEXT:    .reg .b32 %r<3>;
; CHECK-NEXT:    .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom18_param_0];
; CHECK-NEXT:    ld.param.b32 %r1, [atom18_param_1];
; CHECK-NEXT:    atom.min.u32 %r2, [%rd1], %r1;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
; CHECK-NEXT:    ret;
  %ret = atomicrmw umin ptr %subr, i32 %val seq_cst
  ret i32 %ret
}

; CHECK-LABEL: atom19
define i64 @atom19(ptr %subr, i64 %val) {
; CHECK-LABEL: atom19(
; CHECK:       {
; CHECK-NEXT:    .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom19_param_0];
; CHECK-NEXT:    ld.param.b64 %rd2, [atom19_param_1];
; CHECK-NEXT:    atom.min.u64 %rd3, [%rd1], %rd2;
; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT:    ret;
  %ret = atomicrmw umin ptr %subr, i64 %val seq_cst
  ret i64 %ret
}

define i32 @atom20(ptr %subr, i32 %val) {
; CHECK-LABEL: atom20(
; CHECK:       {
; CHECK-NEXT:    .reg .b32 %r<3>;
; CHECK-NEXT:    .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom20_param_0];
; CHECK-NEXT:    ld.param.b32 %r1, [atom20_param_1];
; CHECK-NEXT:    atom.inc.u32 %r2, [%rd1], %r1;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
; CHECK-NEXT:    ret;
  %ret = atomicrmw uinc_wrap ptr %subr, i32 %val seq_cst
  ret i32 %ret
}

define i32 @atom21(ptr %subr, i32 %val) {
; CHECK-LABEL: atom21(
; CHECK:       {
; CHECK-NEXT:    .reg .b32 %r<3>;
; CHECK-NEXT:    .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atom21_param_0];
; CHECK-NEXT:    ld.param.b32 %r1, [atom21_param_1];
; CHECK-NEXT:    atom.dec.u32 %r2, [%rd1], %r1;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
; CHECK-NEXT:    ret;
  %ret = atomicrmw udec_wrap ptr %subr, i32 %val seq_cst
  ret i32 %ret
}

declare float @llvm.nvvm.atomic.load.add.f32.p0(ptr %addr, float %val)

; CHECK-LABEL: atomic_add_f32_generic
define float @atomic_add_f32_generic(ptr %addr, float %val) {
; CHECK-LABEL: atomic_add_f32_generic(
; CHECK:       {
; CHECK-NEXT:    .reg .b32 %r<3>;
; CHECK-NEXT:    .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atomic_add_f32_generic_param_0];
; CHECK-NEXT:    ld.param.b32 %r1, [atomic_add_f32_generic_param_1];
; CHECK-NEXT:    atom.add.f32 %r2, [%rd1], %r1;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
; CHECK-NEXT:    ret;
  %ret = call float @llvm.nvvm.atomic.load.add.f32.p0(ptr %addr, float %val)
  ret float %ret
}

declare float @llvm.nvvm.atomic.load.add.f32.p1(ptr addrspace(1) %addr, float %val)

; CHECK-LABEL: atomic_add_f32_addrspace1
define float @atomic_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) {
; CHECK-LABEL: atomic_add_f32_addrspace1(
; CHECK:       {
; CHECK-NEXT:    .reg .b32 %r<3>;
; CHECK-NEXT:    .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atomic_add_f32_addrspace1_param_0];
; CHECK-NEXT:    ld.param.b32 %r1, [atomic_add_f32_addrspace1_param_1];
; CHECK-NEXT:    atom.global.add.f32 %r2, [%rd1], %r1;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
; CHECK-NEXT:    ret;
  %ret = call float @llvm.nvvm.atomic.load.add.f32.p1(ptr addrspace(1) %addr, float %val)
  ret float %ret
}

declare float @llvm.nvvm.atomic.load.add.f32.p3(ptr addrspace(3) %addr, float %val)

; CHECK-LABEL: atomic_add_f32_addrspace3
define float @atomic_add_f32_addrspace3(ptr addrspace(3) %addr, float %val) {
; CHECK-LABEL: atomic_add_f32_addrspace3(
; CHECK:       {
; CHECK-NEXT:    .reg .b32 %r<3>;
; CHECK-NEXT:    .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atomic_add_f32_addrspace3_param_0];
; CHECK-NEXT:    ld.param.b32 %r1, [atomic_add_f32_addrspace3_param_1];
; CHECK-NEXT:    atom.shared.add.f32 %r2, [%rd1], %r1;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
; CHECK-NEXT:    ret;
  %ret = call float @llvm.nvvm.atomic.load.add.f32.p3(ptr addrspace(3) %addr, float %val)
  ret float %ret
}

; CHECK-LABEL: atomicrmw_add_f32_generic
define float @atomicrmw_add_f32_generic(ptr %addr, float %val) {
; CHECK-LABEL: atomicrmw_add_f32_generic(
; CHECK:       {
; CHECK-NEXT:    .reg .b32 %r<3>;
; CHECK-NEXT:    .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atomicrmw_add_f32_generic_param_0];
; CHECK-NEXT:    ld.param.b32 %r1, [atomicrmw_add_f32_generic_param_1];
; CHECK-NEXT:    atom.add.f32 %r2, [%rd1], %r1;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
; CHECK-NEXT:    ret;
  %ret = atomicrmw fadd ptr %addr, float %val seq_cst
  ret float %ret
}

; CHECK-LABEL: atomicrmw_add_f16_generic
define half @atomicrmw_add_f16_generic(ptr %addr, half %val) {
; CHECK-LABEL: atomicrmw_add_f16_generic(
; CHECK:       {
; CHECK-NEXT:    .reg .pred %p<2>;
; CHECK-NEXT:    .reg .b16 %rs<4>;
; CHECK-NEXT:    .reg .b32 %r<20>;
; CHECK-NEXT:    .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b16 %rs1, [atomicrmw_add_f16_generic_param_1];
; CHECK-NEXT:    ld.param.b64 %rd2, [atomicrmw_add_f16_generic_param_0];
; CHECK-NEXT:    and.b64 %rd1, %rd2, -4;
; CHECK-NEXT:    cvt.u32.u64 %r6, %rd2;
; CHECK-NEXT:    and.b32 %r7, %r6, 3;
; CHECK-NEXT:    shl.b32 %r1, %r7, 3;
; CHECK-NEXT:    mov.b32 %r8, 65535;
; CHECK-NEXT:    shl.b32 %r9, %r8, %r1;
; CHECK-NEXT:    not.b32 %r2, %r9;
; CHECK-NEXT:    ld.b32 %r19, [%rd1];
; CHECK-NEXT:    cvt.f32.f16 %r12, %rs1;
; CHECK-NEXT:  $L__BB24_1: // %atomicrmw.start
; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    shr.u32 %r10, %r19, %r1;
; CHECK-NEXT:    cvt.u16.u32 %rs2, %r10;
; CHECK-NEXT:    cvt.f32.f16 %r11, %rs2;
; CHECK-NEXT:    add.rn.f32 %r13, %r11, %r12;
; CHECK-NEXT:    cvt.rn.f16.f32 %rs3, %r13;
; CHECK-NEXT:    cvt.u32.u16 %r14, %rs3;
; CHECK-NEXT:    shl.b32 %r15, %r14, %r1;
; CHECK-NEXT:    and.b32 %r16, %r19, %r2;
; CHECK-NEXT:    or.b32 %r17, %r16, %r15;
; CHECK-NEXT:    membar.sys;
; CHECK-NEXT:    atom.cas.b32 %r5, [%rd1], %r19, %r17;
; CHECK-NEXT:    setp.ne.b32 %p1, %r5, %r19;
; CHECK-NEXT:    mov.b32 %r19, %r5;
; CHECK-NEXT:    @%p1 bra $L__BB24_1;
; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
; CHECK-NEXT:    shr.u32 %r18, %r5, %r1;
; CHECK-NEXT:    st.param.b16 [func_retval0], %r18;
; CHECK-NEXT:    ret;
  %ret = atomicrmw fadd ptr %addr, half %val seq_cst
  ret half %ret
}

; CHECK-LABEL: atomicrmw_add_f32_addrspace1
define float @atomicrmw_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) {
; CHECK-LABEL: atomicrmw_add_f32_addrspace1(
; CHECK:       {
; CHECK-NEXT:    .reg .b32 %r<3>;
; CHECK-NEXT:    .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atomicrmw_add_f32_addrspace1_param_0];
; CHECK-NEXT:    ld.param.b32 %r1, [atomicrmw_add_f32_addrspace1_param_1];
; CHECK-NEXT:    atom.global.add.f32 %r2, [%rd1], %r1;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
; CHECK-NEXT:    ret;
  %ret = atomicrmw fadd ptr addrspace(1) %addr, float %val seq_cst
  ret float %ret
}

; CHECK-LABEL: atomicrmw_add_f32_addrspace3
define float @atomicrmw_add_f32_addrspace3(ptr addrspace(3) %addr, float %val) {
; CHECK-LABEL: atomicrmw_add_f32_addrspace3(
; CHECK:       {
; CHECK-NEXT:    .reg .b32 %r<3>;
; CHECK-NEXT:    .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atomicrmw_add_f32_addrspace3_param_0];
; CHECK-NEXT:    ld.param.b32 %r1, [atomicrmw_add_f32_addrspace3_param_1];
; CHECK-NEXT:    atom.shared.add.f32 %r2, [%rd1], %r1;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
; CHECK-NEXT:    ret;
  %ret = atomicrmw fadd ptr addrspace(3) %addr, float %val seq_cst
  ret float %ret
}

; CHECK-LABEL: atomic_cmpxchg_i32
define i32 @atomic_cmpxchg_i32(ptr %addr, i32 %cmp, i32 %new) {
; CHECK-LABEL: atomic_cmpxchg_i32(
; CHECK:       {
; CHECK-NEXT:    .reg .b32 %r<4>;
; CHECK-NEXT:    .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atomic_cmpxchg_i32_param_0];
; CHECK-NEXT:    membar.sys;
; CHECK-NEXT:    ld.param.b32 %r1, [atomic_cmpxchg_i32_param_1];
; CHECK-NEXT:    ld.param.b32 %r2, [atomic_cmpxchg_i32_param_2];
; CHECK-NEXT:    atom.cas.b32 %r3, [%rd1], %r1, %r2;
; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
; CHECK-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new seq_cst seq_cst
  ret i32 %new
}

; CHECK-LABEL: atomic_cmpxchg_i64
define i64 @atomic_cmpxchg_i64(ptr %addr, i64 %cmp, i64 %new) {
; CHECK-LABEL: atomic_cmpxchg_i64(
; CHECK:       {
; CHECK-NEXT:    .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.b64 %rd1, [atomic_cmpxchg_i64_param_0];
; CHECK-NEXT:    membar.sys;
; CHECK-NEXT:    ld.param.b64 %rd2, [atomic_cmpxchg_i64_param_1];
; CHECK-NEXT:    ld.param.b64 %rd3, [atomic_cmpxchg_i64_param_2];
; CHECK-NEXT:    atom.cas.b64 %rd4, [%rd1], %rd2, %rd3;
; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT:    ret;
  %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new seq_cst seq_cst
  ret i64 %new
}
