.486              ;Target processor.  Use instructions for Pentium class machines
.MODEL FLAT, C    ;Use the flat memory model. Use C calling conventions

.CODE             ;Indicates the start of a code segment.


;void OrR(void *R1, const void *R2, unsigned maxX)
	public	OrR
OrR proc \
        uses esi edi, \
	R1:ptr byte, \
        R2:ptr byte, \
        maxX:DWORD

        mov     ecx,[maxX]	; cx=amount of pixels

        mov     edi,[R1]	;
        mov     esi,[R2]	;

	sub	ecx,4
	jl	LoopSimple

		; Processing of 4px block with size 3xDWORD	
LoopPix4:lodsd
	or	[edi],eax
	add	edi,4
	sub	ecx,4
	jae	LoopPix4

			; Simple loop for 0,1,2,3 pixels (works also for more px)
LoopSimple:
	add	ecx,4		; ecx was -4, correct counter
        jz	ToEnd		; array has zero size
LoopPix:lodsb
        or	[edi],al
	inc	edi
	loop	LoopPix
ToEnd:	ret
	
OrR endp


;void AndR(void *R1, const void *R2, unsigned maxX)
	public	AndR
AndR proc \
        uses esi edi, \
	R1:ptr byte, \
        R2:ptr byte, \
        maxX:DWORD

        mov     ecx,[maxX]	; cx=amount of pixels

        mov     edi,[R1]	;
        mov     esi,[R2]	;

	sub	ecx,4
	jl	LoopSimple

		; Processing of 4px block with size 3xDWORD	
LoopPix4:lodsd
	and	[edi],eax	
	add	edi,4
	sub	ecx,4
	jae	LoopPix4

			; Simple loop for 0,1,2,3 pixels (works also for more px)
LoopSimple:
	add	ecx,4		; ecx was -4, correct counter
        jz	ToEnd		; array has zero size
LoopPix:lodsb
        and	[edi],al
	inc	edi
	loop	LoopPix
ToEnd:	ret
	
AndR endp


;void ShrR(void *R, unsigned maxX)
	public	ShrR
ShrR proc \
	R:ptr byte, \
        maxX:DWORD

	mov	edx,[R]
	or	edx,edx
	jz	ToEnd
	mov	ecx,[maxX]
	jecxz	ToEnd

	mov	al,byte ptr [edx]
	sar	al,1		; duplicate upper 8th bit
	mov	byte ptr [edx],al
	dec	ecx			; CY is preserved for next byte
	jz	ToEnd
LoopPx1:inc	edx			; CY is preserved
	mov	al,byte ptr [edx]
	rcr	al,1
	mov	byte ptr [edx],al
	loop	LoopPx1			; CY is preserved

ToEnd:	ret
	
ShrR endp


;void ShlR(void *R, unsigned maxX)
	public	ShlR
ShlR proc \
	R:ptr byte, \
        maxX:DWORD

	mov	edx,[R]
	or	edx,edx
	jz	ToEnd
	mov	ecx,[maxX]
	jecxz	ToEnd
	
	add	edx,ecx
	dec	edx			; end of blob

	mov	al,byte ptr [edx]
	mov	ah,al
	shr	ah,1			; bit 0 to CY
	rcl	al,1			; duplicated bit 0
	mov	byte ptr [edx],al
	dec	ecx			; CY is preserved for next byte
	jz	ToEnd
LoopPx1:dec	edx			; CY is preserved
	mov	al,byte ptr [edx]
	rcl	al,1
	mov	byte ptr [edx],al
	loop	LoopPx1			; CY is preserved

ToEnd:	ret
	
ShlR endp


;void AddLu32u8(unsigned Size, uint32_t *Accu, uint8_t *pAdd)
	public  AddLu32u8
AddLu32u8 proc \
        uses esi, \
        count:DWORD, \
        Accu: ptr byte, \
        pAdd: ptr byte \
        
        mov	esi, [pAdd]
        or	esi,esi
        jz	ToEnd
        mov	edx, [Accu]
        or	edx,edx
        jz	ToEnd
        mov	ecx, [count]
        jecxz	ToEnd

ByteLop:movzx	eax,byte ptr [esi]
	inc	esi
	add	[edx],eax
	add	edx,4
	loop	ByteLop
ToEnd:
        ret                     ; _cdecl return        
        
AddLu32u8 endp



;void SubLu32u8(unsigned Size, uint32_t *Accu, uint8_t *pSub)
	public  SubLu32u8
SubLu32u8 proc \
        uses esi, \
        count:DWORD, \
        Accu: ptr byte, \
        pSub: ptr byte \
        
        mov	esi, [pSub]
        or	esi,esi
        jz	ToEnd
        mov	edx, [Accu]
        or	edx,edx
        jz	ToEnd
        mov	ecx, [count]
        jecxz	ToEnd

ByteLop:movzx	eax,byte ptr [esi]
        inc	esi
	sub	[edx],eax
	add	edx,4
	loop	ByteLop
ToEnd:
        ret                     ; _cdecl return        
        
SubLu32u8 endp


;***********  FOR GAUSIAN ***********


;void AbsDiff_u32(uint32_t *Out, uint32_t *In, int SizeX)
	public	AbsDiff_u32
AbsDiff_u32 proc \
	uses esi, \ 
	R1:ptr byte, \
	R2:ptr byte, \
	maxX:DWORD
	
	mov	esi,[R2]		; source ptr
	or	esi,esi
	jz	ToEnd			; NULL ptr
	mov	edx,[R1]		; dest ptr
	mov	ecx,[maxX]
	jecxz	ToEnd			; zero amount

LoopPx1:lodsd
	sub	eax,[edx]
	jns	Positive
	not	eax
	inc	eax
Positive:mov	[edx],eax
	add	edx,4
	loop	LoopPx1

ToEnd:	ret
	
AbsDiff_u32 endp


;void AbsDiffCopy_u32(uint32_t *Out, uint32_t *In1, uint32_t *In2, int SizeX)
	public	AbsDiffCopy_u32
AbsDiffCopy_u32 proc \
	uses edi esi, \
	OutR: ptr byte, \
	In1:ptr byte, \
	In2:ptr byte, \
	maxX:DWORD
	
	mov	edi,[OutR]
	mov	esi,[In2]		; source ptr
	or	esi,esi
	jz	ToEnd			; NULL ptr
	mov	edx,[In1]		; dest ptr
	mov	ecx,[maxX]
	jecxz	ToEnd			; zero amount

LoopPx1:lodsd
	sub	eax,[edx]
	jns	Positive
	not	eax
	inc	eax
Positive:stosd
	add	edx,4
	loop	LoopPx1

ToEnd:	ret
	
AbsDiffCopy_u32 endp


	end

