cpu     x64
bits    64

;// Reader (chunk, size, loops);


global  Reader
extern data

; Note:
; Unix ABI says integer param are put in these registers in this order:
;       rdi, rsi, rdx, rcx, r8, r9

        section .text
;------------------------------------------------------------------------------
; Name:         has_sse2
;
;has_sse2:
;_has_sse2:
;        push    rbx
;        push    rcx
;        push    rdx
;        mov     rax, 1
;        cpuid
;        test    rdx, 0x4000000
;        setnz   al
;        pop     rdx
;        pop     rcx
;        pop     rbx
;        ret

;------------------------------------------------------------------------------
; Name:         Reader
; Purpose:      Reads 64-bit values sequentially from an area of memory.
; Params:       rdi = ptr to memory area
;               rsi = length in bytes
;               rdx = loops
;               rcx = ptr to two 64-bit byte offsets
;               r8 = first cycles
;				r9 = second cycles
;rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, and r15.
;Reader(data, (uint64_t)set_m[num1], (uint64_t)loop_t[num1], (uint64_t )eleven, &low, &hi);
;------------------------------------------------------------------------------


;-----------------------------
;
;
;
;
;
;
;-----------------------------
Reader:
	push rdi ; mem region
	push rsi ; length
	push rdx ; loops
	push rcx ; 
	push r8  ; 
	push r9  ; 
	push r10 ;
	push r11 ;
	push r12 ;
	push r13 ;
	push r14 ;
	add rsi, rdi
.L1:
	mov r10, rdi
	
.L2:
	
	mov rbx, [r10] ; read every 64-bit number
	add r10, 1
	cmp r10, rsi
	jb  .L2

	dec rdx
	jnz .L1

	cpuid
	rdtsc
	mov r11, rax
	mov r12, rdx
	; work
	rdtsc
	

	pop rdi
	pop rsi
	pop rdx
	pop rcx
	pop r8
	pop r9
	pop r10
	ret

Reader:
        push r10
		push r11
		push r12
		push r13
		push r14
		push r15
		mov r11, rcx
		mov r13, rdx
        add     rsi, rdi       ; rsi now points to end.

		cpuid
        rdtsc
        mov r14, rax
        mov r15, rdx
        rdtsc
        sub rax, r14
        sub rdx, r15
        mov r14, rax
        mov r15, rdx

		mov rdx, r13
		;mov r13, rcx

.L1:
        mov     r10, rdi

.L2:
        mov     bx, word[r10]
;		mov     rax, word[r10]
;		mov     rax, word[r10]
;		mov     rax, word[r10]
;		mov     rax, word[r10]
;		mov     rax, word[r10]
;		mov     rax, word[r10]
;		mov     rax, word[r10]
;		mov     rax, word[r10]

        add     r10, 1
        cmp     r10, rsi
        jb      .L2

        dec     rdx
        jnz     .L1

		mov     r10, rdi
		add     r10, r11

		;mov    r11, rdi
		;add    r11, [r13+8]
		;mov    r13, r11
		;add     r10, r13

		;add 


;		mov   r13, 
;		add    r13, [rcx+8]
		

		cpuid
		;rcx
		rdtsc
		mov r11, rax
		mov r12, rdx

		mov bx, word[r10]
		mov bx, word[r10+8]
		;mov bx, word[rdi+rcx+8]
		;mov bx, word[rdi+r13]
		;mov bx, word[rdi+r13]
		;mov bx, word[rdi+r13]
		;mov bx, word[rdi+r13]
		;mov bx, word[rdi+r13]
		;mov bx, word[rdi+r13]
		;mov bx, word[rdi+r13]
		;mov bx, word[rdi+r13]
		;mov bx, word[rdi+r13]

		rdtsc
		sub rax, r11
		sub rdx, r12
		sub rax, r14
		sub rdx, r15
		;div rax, 2
		;div rdx, 2



	;div 2
	mov [r8], rax
	;mov rax, rdx
	;div 2
	mov [r9], rdx

		;div 2/m64
		;mov [r8], rax
		;div rdx/m64
		;mov [r9], rax

		;mov [r8], rax
		;mov [r9], rdx

        pop     r10
		pop     r11
		pop     r12
		pop     r13
		pop     r14
		pop     r15
        ret
