mirror of https://github.com/nodejs/node.git
477 lines
9.0 KiB
NASM
477 lines
9.0 KiB
NASM
TITLE ../openssl/crypto/bn/asm/x86-mont.asm
|
|
IF @Version LT 800
|
|
ECHO MASM version 8.00 or later is strongly recommended.
|
|
ENDIF
|
|
.686
|
|
.XMM
|
|
IF @Version LT 800
|
|
XMMWORD STRUCT 16
|
|
DQ 2 dup (?)
|
|
XMMWORD ENDS
|
|
ENDIF
|
|
|
|
.MODEL FLAT
|
|
OPTION DOTNAME
|
|
IF @Version LT 800
|
|
.text$ SEGMENT PAGE 'CODE'
|
|
ELSE
|
|
.text$ SEGMENT ALIGN(64) 'CODE'
|
|
ENDIF
|
|
;EXTERN _OPENSSL_ia32cap_P:NEAR
|
|
ALIGN 16
|
|
_bn_mul_mont PROC PUBLIC
|
|
$L_bn_mul_mont_begin::
|
|
push ebp
|
|
push ebx
|
|
push esi
|
|
push edi
|
|
xor eax,eax
|
|
mov edi,DWORD PTR 40[esp]
|
|
cmp edi,4
|
|
jl $L000just_leave
|
|
lea esi,DWORD PTR 20[esp]
|
|
lea edx,DWORD PTR 24[esp]
|
|
mov ebp,esp
|
|
add edi,2
|
|
neg edi
|
|
lea esp,DWORD PTR [edi*4+esp-32]
|
|
neg edi
|
|
mov eax,esp
|
|
sub eax,edx
|
|
and eax,2047
|
|
sub esp,eax
|
|
xor edx,esp
|
|
and edx,2048
|
|
xor edx,2048
|
|
sub esp,edx
|
|
and esp,-64
|
|
mov eax,DWORD PTR [esi]
|
|
mov ebx,DWORD PTR 4[esi]
|
|
mov ecx,DWORD PTR 8[esi]
|
|
mov edx,DWORD PTR 12[esi]
|
|
mov esi,DWORD PTR 16[esi]
|
|
mov esi,DWORD PTR [esi]
|
|
mov DWORD PTR 4[esp],eax
|
|
mov DWORD PTR 8[esp],ebx
|
|
mov DWORD PTR 12[esp],ecx
|
|
mov DWORD PTR 16[esp],edx
|
|
mov DWORD PTR 20[esp],esi
|
|
lea ebx,DWORD PTR [edi-3]
|
|
mov DWORD PTR 24[esp],ebp
|
|
lea eax,DWORD PTR _OPENSSL_ia32cap_P
|
|
bt DWORD PTR [eax],26
|
|
jnc $L001non_sse2
|
|
mov eax,-1
|
|
movd mm7,eax
|
|
mov esi,DWORD PTR 8[esp]
|
|
mov edi,DWORD PTR 12[esp]
|
|
mov ebp,DWORD PTR 16[esp]
|
|
xor edx,edx
|
|
xor ecx,ecx
|
|
movd mm4,DWORD PTR [edi]
|
|
movd mm5,DWORD PTR [esi]
|
|
movd mm3,DWORD PTR [ebp]
|
|
pmuludq mm5,mm4
|
|
movq mm2,mm5
|
|
movq mm0,mm5
|
|
pand mm0,mm7
|
|
pmuludq mm5,QWORD PTR 20[esp]
|
|
pmuludq mm3,mm5
|
|
paddq mm3,mm0
|
|
movd mm1,DWORD PTR 4[ebp]
|
|
movd mm0,DWORD PTR 4[esi]
|
|
psrlq mm2,32
|
|
psrlq mm3,32
|
|
inc ecx
|
|
ALIGN 16
|
|
$L0021st:
|
|
pmuludq mm0,mm4
|
|
pmuludq mm1,mm5
|
|
paddq mm2,mm0
|
|
paddq mm3,mm1
|
|
movq mm0,mm2
|
|
pand mm0,mm7
|
|
movd mm1,DWORD PTR 4[ecx*4+ebp]
|
|
paddq mm3,mm0
|
|
movd mm0,DWORD PTR 4[ecx*4+esi]
|
|
psrlq mm2,32
|
|
movd DWORD PTR 28[ecx*4+esp],mm3
|
|
psrlq mm3,32
|
|
lea ecx,DWORD PTR 1[ecx]
|
|
cmp ecx,ebx
|
|
jl $L0021st
|
|
pmuludq mm0,mm4
|
|
pmuludq mm1,mm5
|
|
paddq mm2,mm0
|
|
paddq mm3,mm1
|
|
movq mm0,mm2
|
|
pand mm0,mm7
|
|
paddq mm3,mm0
|
|
movd DWORD PTR 28[ecx*4+esp],mm3
|
|
psrlq mm2,32
|
|
psrlq mm3,32
|
|
paddq mm3,mm2
|
|
movq QWORD PTR 32[ebx*4+esp],mm3
|
|
inc edx
|
|
$L003outer:
|
|
xor ecx,ecx
|
|
movd mm4,DWORD PTR [edx*4+edi]
|
|
movd mm5,DWORD PTR [esi]
|
|
movd mm6,DWORD PTR 32[esp]
|
|
movd mm3,DWORD PTR [ebp]
|
|
pmuludq mm5,mm4
|
|
paddq mm5,mm6
|
|
movq mm0,mm5
|
|
movq mm2,mm5
|
|
pand mm0,mm7
|
|
pmuludq mm5,QWORD PTR 20[esp]
|
|
pmuludq mm3,mm5
|
|
paddq mm3,mm0
|
|
movd mm6,DWORD PTR 36[esp]
|
|
movd mm1,DWORD PTR 4[ebp]
|
|
movd mm0,DWORD PTR 4[esi]
|
|
psrlq mm2,32
|
|
psrlq mm3,32
|
|
paddq mm2,mm6
|
|
inc ecx
|
|
dec ebx
|
|
$L004inner:
|
|
pmuludq mm0,mm4
|
|
pmuludq mm1,mm5
|
|
paddq mm2,mm0
|
|
paddq mm3,mm1
|
|
movq mm0,mm2
|
|
movd mm6,DWORD PTR 36[ecx*4+esp]
|
|
pand mm0,mm7
|
|
movd mm1,DWORD PTR 4[ecx*4+ebp]
|
|
paddq mm3,mm0
|
|
movd mm0,DWORD PTR 4[ecx*4+esi]
|
|
psrlq mm2,32
|
|
movd DWORD PTR 28[ecx*4+esp],mm3
|
|
psrlq mm3,32
|
|
paddq mm2,mm6
|
|
dec ebx
|
|
lea ecx,DWORD PTR 1[ecx]
|
|
jnz $L004inner
|
|
mov ebx,ecx
|
|
pmuludq mm0,mm4
|
|
pmuludq mm1,mm5
|
|
paddq mm2,mm0
|
|
paddq mm3,mm1
|
|
movq mm0,mm2
|
|
pand mm0,mm7
|
|
paddq mm3,mm0
|
|
movd DWORD PTR 28[ecx*4+esp],mm3
|
|
psrlq mm2,32
|
|
psrlq mm3,32
|
|
movd mm6,DWORD PTR 36[ebx*4+esp]
|
|
paddq mm3,mm2
|
|
paddq mm3,mm6
|
|
movq QWORD PTR 32[ebx*4+esp],mm3
|
|
lea edx,DWORD PTR 1[edx]
|
|
cmp edx,ebx
|
|
jle $L003outer
|
|
emms
|
|
jmp $L005common_tail
|
|
ALIGN 16
|
|
$L001non_sse2:
|
|
mov esi,DWORD PTR 8[esp]
|
|
lea ebp,DWORD PTR 1[ebx]
|
|
mov edi,DWORD PTR 12[esp]
|
|
xor ecx,ecx
|
|
mov edx,esi
|
|
and ebp,1
|
|
sub edx,edi
|
|
lea eax,DWORD PTR 4[ebx*4+edi]
|
|
or ebp,edx
|
|
mov edi,DWORD PTR [edi]
|
|
jz $L006bn_sqr_mont
|
|
mov DWORD PTR 28[esp],eax
|
|
mov eax,DWORD PTR [esi]
|
|
xor edx,edx
|
|
ALIGN 16
|
|
$L007mull:
|
|
mov ebp,edx
|
|
mul edi
|
|
add ebp,eax
|
|
lea ecx,DWORD PTR 1[ecx]
|
|
adc edx,0
|
|
mov eax,DWORD PTR [ecx*4+esi]
|
|
cmp ecx,ebx
|
|
mov DWORD PTR 28[ecx*4+esp],ebp
|
|
jl $L007mull
|
|
mov ebp,edx
|
|
mul edi
|
|
mov edi,DWORD PTR 20[esp]
|
|
add eax,ebp
|
|
mov esi,DWORD PTR 16[esp]
|
|
adc edx,0
|
|
imul edi,DWORD PTR 32[esp]
|
|
mov DWORD PTR 32[ebx*4+esp],eax
|
|
xor ecx,ecx
|
|
mov DWORD PTR 36[ebx*4+esp],edx
|
|
mov DWORD PTR 40[ebx*4+esp],ecx
|
|
mov eax,DWORD PTR [esi]
|
|
mul edi
|
|
add eax,DWORD PTR 32[esp]
|
|
mov eax,DWORD PTR 4[esi]
|
|
adc edx,0
|
|
inc ecx
|
|
jmp $L0082ndmadd
|
|
ALIGN 16
|
|
$L0091stmadd:
|
|
mov ebp,edx
|
|
mul edi
|
|
add ebp,DWORD PTR 32[ecx*4+esp]
|
|
lea ecx,DWORD PTR 1[ecx]
|
|
adc edx,0
|
|
add ebp,eax
|
|
mov eax,DWORD PTR [ecx*4+esi]
|
|
adc edx,0
|
|
cmp ecx,ebx
|
|
mov DWORD PTR 28[ecx*4+esp],ebp
|
|
jl $L0091stmadd
|
|
mov ebp,edx
|
|
mul edi
|
|
add eax,DWORD PTR 32[ebx*4+esp]
|
|
mov edi,DWORD PTR 20[esp]
|
|
adc edx,0
|
|
mov esi,DWORD PTR 16[esp]
|
|
add ebp,eax
|
|
adc edx,0
|
|
imul edi,DWORD PTR 32[esp]
|
|
xor ecx,ecx
|
|
add edx,DWORD PTR 36[ebx*4+esp]
|
|
mov DWORD PTR 32[ebx*4+esp],ebp
|
|
adc ecx,0
|
|
mov eax,DWORD PTR [esi]
|
|
mov DWORD PTR 36[ebx*4+esp],edx
|
|
mov DWORD PTR 40[ebx*4+esp],ecx
|
|
mul edi
|
|
add eax,DWORD PTR 32[esp]
|
|
mov eax,DWORD PTR 4[esi]
|
|
adc edx,0
|
|
mov ecx,1
|
|
ALIGN 16
|
|
$L0082ndmadd:
|
|
mov ebp,edx
|
|
mul edi
|
|
add ebp,DWORD PTR 32[ecx*4+esp]
|
|
lea ecx,DWORD PTR 1[ecx]
|
|
adc edx,0
|
|
add ebp,eax
|
|
mov eax,DWORD PTR [ecx*4+esi]
|
|
adc edx,0
|
|
cmp ecx,ebx
|
|
mov DWORD PTR 24[ecx*4+esp],ebp
|
|
jl $L0082ndmadd
|
|
mov ebp,edx
|
|
mul edi
|
|
add ebp,DWORD PTR 32[ebx*4+esp]
|
|
adc edx,0
|
|
add ebp,eax
|
|
adc edx,0
|
|
mov DWORD PTR 28[ebx*4+esp],ebp
|
|
xor eax,eax
|
|
mov ecx,DWORD PTR 12[esp]
|
|
add edx,DWORD PTR 36[ebx*4+esp]
|
|
adc eax,DWORD PTR 40[ebx*4+esp]
|
|
lea ecx,DWORD PTR 4[ecx]
|
|
mov DWORD PTR 32[ebx*4+esp],edx
|
|
cmp ecx,DWORD PTR 28[esp]
|
|
mov DWORD PTR 36[ebx*4+esp],eax
|
|
je $L005common_tail
|
|
mov edi,DWORD PTR [ecx]
|
|
mov esi,DWORD PTR 8[esp]
|
|
mov DWORD PTR 12[esp],ecx
|
|
xor ecx,ecx
|
|
xor edx,edx
|
|
mov eax,DWORD PTR [esi]
|
|
jmp $L0091stmadd
|
|
ALIGN 16
|
|
$L006bn_sqr_mont:
|
|
mov DWORD PTR [esp],ebx
|
|
mov DWORD PTR 12[esp],ecx
|
|
mov eax,edi
|
|
mul edi
|
|
mov DWORD PTR 32[esp],eax
|
|
mov ebx,edx
|
|
shr edx,1
|
|
and ebx,1
|
|
inc ecx
|
|
ALIGN 16
|
|
$L010sqr:
|
|
mov eax,DWORD PTR [ecx*4+esi]
|
|
mov ebp,edx
|
|
mul edi
|
|
add eax,ebp
|
|
lea ecx,DWORD PTR 1[ecx]
|
|
adc edx,0
|
|
lea ebp,DWORD PTR [eax*2+ebx]
|
|
shr eax,31
|
|
cmp ecx,DWORD PTR [esp]
|
|
mov ebx,eax
|
|
mov DWORD PTR 28[ecx*4+esp],ebp
|
|
jl $L010sqr
|
|
mov eax,DWORD PTR [ecx*4+esi]
|
|
mov ebp,edx
|
|
mul edi
|
|
add eax,ebp
|
|
mov edi,DWORD PTR 20[esp]
|
|
adc edx,0
|
|
mov esi,DWORD PTR 16[esp]
|
|
lea ebp,DWORD PTR [eax*2+ebx]
|
|
imul edi,DWORD PTR 32[esp]
|
|
shr eax,31
|
|
mov DWORD PTR 32[ecx*4+esp],ebp
|
|
lea ebp,DWORD PTR [edx*2+eax]
|
|
mov eax,DWORD PTR [esi]
|
|
shr edx,31
|
|
mov DWORD PTR 36[ecx*4+esp],ebp
|
|
mov DWORD PTR 40[ecx*4+esp],edx
|
|
mul edi
|
|
add eax,DWORD PTR 32[esp]
|
|
mov ebx,ecx
|
|
adc edx,0
|
|
mov eax,DWORD PTR 4[esi]
|
|
mov ecx,1
|
|
ALIGN 16
|
|
$L0113rdmadd:
|
|
mov ebp,edx
|
|
mul edi
|
|
add ebp,DWORD PTR 32[ecx*4+esp]
|
|
adc edx,0
|
|
add ebp,eax
|
|
mov eax,DWORD PTR 4[ecx*4+esi]
|
|
adc edx,0
|
|
mov DWORD PTR 28[ecx*4+esp],ebp
|
|
mov ebp,edx
|
|
mul edi
|
|
add ebp,DWORD PTR 36[ecx*4+esp]
|
|
lea ecx,DWORD PTR 2[ecx]
|
|
adc edx,0
|
|
add ebp,eax
|
|
mov eax,DWORD PTR [ecx*4+esi]
|
|
adc edx,0
|
|
cmp ecx,ebx
|
|
mov DWORD PTR 24[ecx*4+esp],ebp
|
|
jl $L0113rdmadd
|
|
mov ebp,edx
|
|
mul edi
|
|
add ebp,DWORD PTR 32[ebx*4+esp]
|
|
adc edx,0
|
|
add ebp,eax
|
|
adc edx,0
|
|
mov DWORD PTR 28[ebx*4+esp],ebp
|
|
mov ecx,DWORD PTR 12[esp]
|
|
xor eax,eax
|
|
mov esi,DWORD PTR 8[esp]
|
|
add edx,DWORD PTR 36[ebx*4+esp]
|
|
adc eax,DWORD PTR 40[ebx*4+esp]
|
|
mov DWORD PTR 32[ebx*4+esp],edx
|
|
cmp ecx,ebx
|
|
mov DWORD PTR 36[ebx*4+esp],eax
|
|
je $L005common_tail
|
|
mov edi,DWORD PTR 4[ecx*4+esi]
|
|
lea ecx,DWORD PTR 1[ecx]
|
|
mov eax,edi
|
|
mov DWORD PTR 12[esp],ecx
|
|
mul edi
|
|
add eax,DWORD PTR 32[ecx*4+esp]
|
|
adc edx,0
|
|
mov DWORD PTR 32[ecx*4+esp],eax
|
|
xor ebp,ebp
|
|
cmp ecx,ebx
|
|
lea ecx,DWORD PTR 1[ecx]
|
|
je $L012sqrlast
|
|
mov ebx,edx
|
|
shr edx,1
|
|
and ebx,1
|
|
ALIGN 16
|
|
$L013sqradd:
|
|
mov eax,DWORD PTR [ecx*4+esi]
|
|
mov ebp,edx
|
|
mul edi
|
|
add eax,ebp
|
|
lea ebp,DWORD PTR [eax*1+eax]
|
|
adc edx,0
|
|
shr eax,31
|
|
add ebp,DWORD PTR 32[ecx*4+esp]
|
|
lea ecx,DWORD PTR 1[ecx]
|
|
adc eax,0
|
|
add ebp,ebx
|
|
adc eax,0
|
|
cmp ecx,DWORD PTR [esp]
|
|
mov DWORD PTR 28[ecx*4+esp],ebp
|
|
mov ebx,eax
|
|
jle $L013sqradd
|
|
mov ebp,edx
|
|
add edx,edx
|
|
shr ebp,31
|
|
add edx,ebx
|
|
adc ebp,0
|
|
$L012sqrlast:
|
|
mov edi,DWORD PTR 20[esp]
|
|
mov esi,DWORD PTR 16[esp]
|
|
imul edi,DWORD PTR 32[esp]
|
|
add edx,DWORD PTR 32[ecx*4+esp]
|
|
mov eax,DWORD PTR [esi]
|
|
adc ebp,0
|
|
mov DWORD PTR 32[ecx*4+esp],edx
|
|
mov DWORD PTR 36[ecx*4+esp],ebp
|
|
mul edi
|
|
add eax,DWORD PTR 32[esp]
|
|
lea ebx,DWORD PTR [ecx-1]
|
|
adc edx,0
|
|
mov ecx,1
|
|
mov eax,DWORD PTR 4[esi]
|
|
jmp $L0113rdmadd
|
|
ALIGN 16
|
|
$L005common_tail:
|
|
mov ebp,DWORD PTR 16[esp]
|
|
mov edi,DWORD PTR 4[esp]
|
|
lea esi,DWORD PTR 32[esp]
|
|
mov eax,DWORD PTR [esi]
|
|
mov ecx,ebx
|
|
xor edx,edx
|
|
ALIGN 16
|
|
$L014sub:
|
|
sbb eax,DWORD PTR [edx*4+ebp]
|
|
mov DWORD PTR [edx*4+edi],eax
|
|
dec ecx
|
|
mov eax,DWORD PTR 4[edx*4+esi]
|
|
lea edx,DWORD PTR 1[edx]
|
|
jge $L014sub
|
|
sbb eax,0
|
|
and esi,eax
|
|
not eax
|
|
mov ebp,edi
|
|
and ebp,eax
|
|
or esi,ebp
|
|
ALIGN 16
|
|
$L015copy:
|
|
mov eax,DWORD PTR [ebx*4+esi]
|
|
mov DWORD PTR [ebx*4+edi],eax
|
|
mov DWORD PTR 32[ebx*4+esp],ecx
|
|
dec ebx
|
|
jge $L015copy
|
|
mov esp,DWORD PTR 24[esp]
|
|
mov eax,1
|
|
$L000just_leave:
|
|
pop edi
|
|
pop esi
|
|
pop ebx
|
|
pop ebp
|
|
ret
|
|
_bn_mul_mont ENDP
|
|
DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
|
|
DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
|
|
DB 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
|
|
DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
|
|
DB 111,114,103,62,0
|
|
.text$ ENDS
|
|
.bss SEGMENT 'BSS'
|
|
COMM _OPENSSL_ia32cap_P:DWORD:4
|
|
.bss ENDS
|
|
END
|