; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr8 \
; RUN:   -ppc-asm-full-reg-names -ppc-quadword-atomics \
; RUN:   -ppc-track-subreg-liveness < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr7 \
; RUN:   -ppc-asm-full-reg-names -ppc-quadword-atomics \
; RUN:   -ppc-track-subreg-liveness < %s | FileCheck --check-prefix=PWR7 %s


define i128 @swap(i128* %a, i128 %x) {
; CHECK-LABEL: swap:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    sync
; CHECK-NEXT:  .LBB0_1: # %entry
; CHECK-NEXT:    #
; CHECK-NEXT:    lqarx r6, 0, r3
; CHECK-NEXT:    mr r9, r5
; CHECK-NEXT:    mr r8, r4
; CHECK-NEXT:    stqcx. r8, 0, r3
; CHECK-NEXT:    bne cr0, .LBB0_1
; CHECK-NEXT:  # %bb.2: # %entry
; CHECK-NEXT:    lwsync
; CHECK-NEXT:    mr r3, r6
; CHECK-NEXT:    mr r4, r7
; CHECK-NEXT:    blr
;
; PWR7-LABEL: swap:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    std r0, 16(r1)
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    sync
; PWR7-NEXT:    bl __sync_lock_test_and_set_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    lwsync
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
entry:
  %0 = atomicrmw xchg i128* %a, i128 %x seq_cst, align 16
  ret i128 %0
}

define i128 @add(i128* %a, i128 %x) {
; CHECK-LABEL: add:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    sync
; CHECK-NEXT:  .LBB1_1: # %entry
; CHECK-NEXT:    #
; CHECK-NEXT:    lqarx r6, 0, r3
; CHECK-NEXT:    addc r9, r5, r7
; CHECK-NEXT:    adde r8, r4, r6
; CHECK-NEXT:    stqcx. r8, 0, r3
; CHECK-NEXT:    bne cr0, .LBB1_1
; CHECK-NEXT:  # %bb.2: # %entry
; CHECK-NEXT:    lwsync
; CHECK-NEXT:    mr r3, r6
; CHECK-NEXT:    mr r4, r7
; CHECK-NEXT:    blr
;
; PWR7-LABEL: add:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    std r0, 16(r1)
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    sync
; PWR7-NEXT:    bl __sync_fetch_and_add_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    lwsync
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
entry:
  %0 = atomicrmw add i128* %a, i128 %x seq_cst, align 16
  ret i128 %0
}

define i128 @sub(i128* %a, i128 %x) {
; CHECK-LABEL: sub:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    sync
; CHECK-NEXT:  .LBB2_1: # %entry
; CHECK-NEXT:    #
; CHECK-NEXT:    lqarx r6, 0, r3
; CHECK-NEXT:    subc r9, r7, r5
; CHECK-NEXT:    subfe r8, r4, r6
; CHECK-NEXT:    stqcx. r8, 0, r3
; CHECK-NEXT:    bne cr0, .LBB2_1
; CHECK-NEXT:  # %bb.2: # %entry
; CHECK-NEXT:    lwsync
; CHECK-NEXT:    mr r3, r6
; CHECK-NEXT:    mr r4, r7
; CHECK-NEXT:    blr
;
; PWR7-LABEL: sub:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    std r0, 16(r1)
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    sync
; PWR7-NEXT:    bl __sync_fetch_and_sub_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    lwsync
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
entry:
  %0 = atomicrmw sub i128* %a, i128 %x seq_cst, align 16
  ret i128 %0
}

define i128 @and(i128* %a, i128 %x) {
; CHECK-LABEL: and:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    sync
; CHECK-NEXT:  .LBB3_1: # %entry
; CHECK-NEXT:    #
; CHECK-NEXT:    lqarx r6, 0, r3
; CHECK-NEXT:    and r9, r5, r7
; CHECK-NEXT:    and r8, r4, r6
; CHECK-NEXT:    stqcx. r8, 0, r3
; CHECK-NEXT:    bne cr0, .LBB3_1
; CHECK-NEXT:  # %bb.2: # %entry
; CHECK-NEXT:    lwsync
; CHECK-NEXT:    mr r3, r6
; CHECK-NEXT:    mr r4, r7
; CHECK-NEXT:    blr
;
; PWR7-LABEL: and:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    std r0, 16(r1)
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    sync
; PWR7-NEXT:    bl __sync_fetch_and_and_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    lwsync
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
entry:
  %0 = atomicrmw and i128* %a, i128 %x seq_cst, align 16
  ret i128 %0
}

define i128 @or(i128* %a, i128 %x) {
; CHECK-LABEL: or:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    sync
; CHECK-NEXT:  .LBB4_1: # %entry
; CHECK-NEXT:    #
; CHECK-NEXT:    lqarx r6, 0, r3
; CHECK-NEXT:    or r9, r5, r7
; CHECK-NEXT:    or r8, r4, r6
; CHECK-NEXT:    stqcx. r8, 0, r3
; CHECK-NEXT:    bne cr0, .LBB4_1
; CHECK-NEXT:  # %bb.2: # %entry
; CHECK-NEXT:    lwsync
; CHECK-NEXT:    mr r3, r6
; CHECK-NEXT:    mr r4, r7
; CHECK-NEXT:    blr
;
; PWR7-LABEL: or:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    std r0, 16(r1)
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    sync
; PWR7-NEXT:    bl __sync_fetch_and_or_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    lwsync
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
entry:
  %0 = atomicrmw or i128* %a, i128 %x seq_cst, align 16
  ret i128 %0
}

define i128 @xor(i128* %a, i128 %x) {
; CHECK-LABEL: xor:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    sync
; CHECK-NEXT:  .LBB5_1: # %entry
; CHECK-NEXT:    #
; CHECK-NEXT:    lqarx r6, 0, r3
; CHECK-NEXT:    xor r9, r5, r7
; CHECK-NEXT:    xor r8, r4, r6
; CHECK-NEXT:    stqcx. r8, 0, r3
; CHECK-NEXT:    bne cr0, .LBB5_1
; CHECK-NEXT:  # %bb.2: # %entry
; CHECK-NEXT:    lwsync
; CHECK-NEXT:    mr r3, r6
; CHECK-NEXT:    mr r4, r7
; CHECK-NEXT:    blr
;
; PWR7-LABEL: xor:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    std r0, 16(r1)
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    sync
; PWR7-NEXT:    bl __sync_fetch_and_xor_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    lwsync
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
entry:
  %0 = atomicrmw xor i128* %a, i128 %x seq_cst, align 16
  ret i128 %0
}

define i128 @nand(i128* %a, i128 %x) {
; CHECK-LABEL: nand:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    sync
; CHECK-NEXT:  .LBB6_1: # %entry
; CHECK-NEXT:    #
; CHECK-NEXT:    lqarx r6, 0, r3
; CHECK-NEXT:    nand r9, r5, r7
; CHECK-NEXT:    nand r8, r4, r6
; CHECK-NEXT:    stqcx. r8, 0, r3
; CHECK-NEXT:    bne cr0, .LBB6_1
; CHECK-NEXT:  # %bb.2: # %entry
; CHECK-NEXT:    lwsync
; CHECK-NEXT:    mr r3, r6
; CHECK-NEXT:    mr r4, r7
; CHECK-NEXT:    blr
;
; PWR7-LABEL: nand:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    std r0, 16(r1)
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    sync
; PWR7-NEXT:    bl __sync_fetch_and_nand_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    lwsync
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
entry:
  %0 = atomicrmw nand i128* %a, i128 %x seq_cst, align 16
  ret i128 %0
}

;; CmpXchg
define i128 @cas_weak_acquire_acquire(i128* %a, i128 %cmp, i128 %new) {
; CHECK-LABEL: cas_weak_acquire_acquire:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:  .LBB7_1: # %entry
; CHECK-NEXT:    #
; CHECK-NEXT:    lqarx r8, 0, r3
; CHECK-NEXT:    xor r11, r9, r5
; CHECK-NEXT:    xor r10, r8, r4
; CHECK-NEXT:    or. r11, r11, r10
; CHECK-NEXT:    bne cr0, .LBB7_3
; CHECK-NEXT:  # %bb.2: # %entry
; CHECK-NEXT:    #
; CHECK-NEXT:    mr r11, r7
; CHECK-NEXT:    mr r10, r6
; CHECK-NEXT:    stqcx. r10, 0, r3
; CHECK-NEXT:    bne cr0, .LBB7_1
; CHECK-NEXT:    b .LBB7_4
; CHECK-NEXT:  .LBB7_3: # %entry
; CHECK-NEXT:    stqcx. r8, 0, r3
; CHECK-NEXT:  .LBB7_4: # %entry
; CHECK-NEXT:    lwsync
; CHECK-NEXT:    mr r3, r8
; CHECK-NEXT:    mr r4, r9
; CHECK-NEXT:    blr
;
; PWR7-LABEL: cas_weak_acquire_acquire:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    std r0, 16(r1)
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    bl __sync_val_compare_and_swap_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    lwsync
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
entry:
  %0 = cmpxchg weak i128* %a, i128 %cmp, i128 %new acquire acquire
  %1 = extractvalue { i128, i1 } %0, 0
  ret i128 %1
}

define i128 @cas_weak_release_monotonic(i128* %a, i128 %cmp, i128 %new) {
; CHECK-LABEL: cas_weak_release_monotonic:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    lwsync
; CHECK-NEXT:  .LBB8_1: # %entry
; CHECK-NEXT:    #
; CHECK-NEXT:    lqarx r8, 0, r3
; CHECK-NEXT:    xor r11, r9, r5
; CHECK-NEXT:    xor r10, r8, r4
; CHECK-NEXT:    or. r11, r11, r10
; CHECK-NEXT:    bne cr0, .LBB8_3
; CHECK-NEXT:  # %bb.2: # %entry
; CHECK-NEXT:    #
; CHECK-NEXT:    mr r11, r7
; CHECK-NEXT:    mr r10, r6
; CHECK-NEXT:    stqcx. r10, 0, r3
; CHECK-NEXT:    bne cr0, .LBB8_1
; CHECK-NEXT:    b .LBB8_4
; CHECK-NEXT:  .LBB8_3: # %entry
; CHECK-NEXT:    stqcx. r8, 0, r3
; CHECK-NEXT:  .LBB8_4: # %entry
; CHECK-NEXT:    mr r3, r8
; CHECK-NEXT:    mr r4, r9
; CHECK-NEXT:    blr
;
; PWR7-LABEL: cas_weak_release_monotonic:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    std r0, 16(r1)
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    lwsync
; PWR7-NEXT:    bl __sync_val_compare_and_swap_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
entry:
  %0 = cmpxchg weak i128* %a, i128 %cmp, i128 %new release monotonic
  %1 = extractvalue { i128, i1 } %0, 0
  ret i128 %1
}

define i128 @cas_sc_sc(i128* %a, i128 %cmp, i128 %new) {
; CHECK-LABEL: cas_sc_sc:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    sync
; CHECK-NEXT:  .LBB9_1: # %entry
; CHECK-NEXT:    #
; CHECK-NEXT:    lqarx r8, 0, r3
; CHECK-NEXT:    xor r11, r9, r5
; CHECK-NEXT:    xor r10, r8, r4
; CHECK-NEXT:    or. r11, r11, r10
; CHECK-NEXT:    bne cr0, .LBB9_3
; CHECK-NEXT:  # %bb.2: # %entry
; CHECK-NEXT:    #
; CHECK-NEXT:    mr r11, r7
; CHECK-NEXT:    mr r10, r6
; CHECK-NEXT:    stqcx. r10, 0, r3
; CHECK-NEXT:    bne cr0, .LBB9_1
; CHECK-NEXT:    b .LBB9_4
; CHECK-NEXT:  .LBB9_3: # %entry
; CHECK-NEXT:    stqcx. r8, 0, r3
; CHECK-NEXT:  .LBB9_4: # %entry
; CHECK-NEXT:    lwsync
; CHECK-NEXT:    mr r3, r8
; CHECK-NEXT:    mr r4, r9
; CHECK-NEXT:    blr
;
; PWR7-LABEL: cas_sc_sc:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    std r0, 16(r1)
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    sync
; PWR7-NEXT:    bl __sync_val_compare_and_swap_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    lwsync
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
entry:
  %0 = cmpxchg i128* %a, i128 %cmp, i128 %new seq_cst seq_cst
  %1 = extractvalue { i128, i1 } %0, 0
  ret i128 %1
}

define i128 @cas_acqrel_acquire(i128* %a, i128 %cmp, i128 %new) {
; CHECK-LABEL: cas_acqrel_acquire:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    lwsync
; CHECK-NEXT:  .LBB10_1: # %entry
; CHECK-NEXT:    #
; CHECK-NEXT:    lqarx r8, 0, r3
; CHECK-NEXT:    xor r11, r9, r5
; CHECK-NEXT:    xor r10, r8, r4
; CHECK-NEXT:    or. r11, r11, r10
; CHECK-NEXT:    bne cr0, .LBB10_3
; CHECK-NEXT:  # %bb.2: # %entry
; CHECK-NEXT:    #
; CHECK-NEXT:    mr r11, r7
; CHECK-NEXT:    mr r10, r6
; CHECK-NEXT:    stqcx. r10, 0, r3
; CHECK-NEXT:    bne cr0, .LBB10_1
; CHECK-NEXT:    b .LBB10_4
; CHECK-NEXT:  .LBB10_3: # %entry
; CHECK-NEXT:    stqcx. r8, 0, r3
; CHECK-NEXT:  .LBB10_4: # %entry
; CHECK-NEXT:    lwsync
; CHECK-NEXT:    mr r3, r8
; CHECK-NEXT:    mr r4, r9
; CHECK-NEXT:    blr
;
; PWR7-LABEL: cas_acqrel_acquire:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    std r0, 16(r1)
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    lwsync
; PWR7-NEXT:    bl __sync_val_compare_and_swap_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    lwsync
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
entry:
  %0 = cmpxchg i128* %a, i128 %cmp, i128 %new acq_rel acquire
  %1 = extractvalue { i128, i1 } %0, 0
  ret i128 %1
}
