# Baillie-PSW primality test # Thanks to some long calculations, we know this works for all 64-bit # numbers. This code stops seeking quadratic non-residues when the # number of composites is low. .file "isPrime8.s" .text .set NUM,%rdi # the parameter to isPrime8 comes here ########## # macros # ########## .macro addmod fromreg,toreg add \fromreg,\toreg jnc 1f sub NUM,\toreg jmp 2f 1: sub NUM,\toreg jnc 2f add NUM,\toreg 2: .endm .macro submod fromreg,toreg sub \fromreg,\toreg jnc 1f add NUM,\toreg 1: .endm ############### # subroutines # ############### Coprime: # set Z flag iff GCD(RAX,NUM) == 1 # uses RAX,RCX,RDX mov NUM,%rdx 10: sub %rax,%rdx je 12f jnc 11f neg %rdx sub %rdx,%rax 11: bsf %rdx,%ecx shr %cl,%rdx jmp 10b 12: cmp $1,%rax ret ########################################################### # isPrime8: determines whether an 8-byte integer is prime, # returns a bool. # bool isPrime8(uint8 num); ########################################################### .globl isPrime8 .type isPrime8,@function isPrime8: # First step: test numbers below 64 with a bitmask. cmp $64,NUM jae 100f xor %edx,%edx # %rdx := 1 << NUM inc %dl mov %dil,%cl shl %cl,%rdx .set Bm1,(1<<2)|(1<<3)|(1<<5)|(1<<7)|(1<<11)|(1<<13) .set Bm2,(1<<17)|(1<<19)|(1<<23)|(1<<29)|(1<<31)|(1<<37) .set Bm3,(1<<41)|(1<<43)|(1<<47)|(1<<53)|(1<<59)|(1<<61) movabs $Bm1|Bm2|Bm3,%rax test %rax,%rdx jnz ReturnTrueA ReturnFalseA: xor %eax,%eax ret ReturnTrueA: xor %eax,%eax orb $1,%al ret 100: # Second step: trial division up to 5. # There are eight mod-30 classes which might be prime. mov NUM,%rax # compute N mod 30 xor %edx,%edx mov $30,%ecx div %rcx xor %eax,%eax # compute 1 << (N mod 30) inc %al mov %dl,%cl shl %cl,%eax .set Bm1,(1<<1)|(1<<11)|(1<<19)|(1<<29) .set Bm2,(1<<7)|(1<<13)|(1<<17)|(1<<23) test $Bm1|Bm2,%eax jz ReturnFalseA # In the fifth step, we'll want to know whether # NUM is one of (7,13,17,23) mod 30. test $Bm2,%eax setnz %r9b # Third step: trial division up to 101. # It's faster to do GCDs with products of primes. .set N1,7*11*13*17*19*23*29*31*37*41*43*47*53 .set N2,59*61*67*71*73*79*83*89*97*101 movabs $N1,%rax call Coprime jnz ReturnFalseA # Because NUM is >= 64, it can't be a # prime factor of the N1 product. # Now, the number has no factor below 59. # It is either prime, or is >= 59*59. cmp $59*59,NUM jc ReturnTrueA # in case it's an N2 factor movabs $N2,%rax call Coprime jnz ReturnFalseA # Fourth step: strong primality test with base 2. # This exposes almost all composites. .set NUMM1,%r8 # compute NUM-1 mov NUM,NUMM1 dec NUMM1 # factor NUM-1 into an odd part and a power of two .set ODD,%rsi mov NUMM1,ODD bsf ODD,%rax # exponent of twopower # shift the odd part to the left end bsr ODD,%rcx xor $63,%cl # compute shift amount shl %cl,ODD .set EXPO,%cl # put twopower here mov %al,EXPO # compute Base^ODD modulo NUM mov $2,%edx .set POWER,%rdx # use remainder register add ODD,ODD # already did first bit jz 401f 400: mov POWER,%rax # square POWER mul POWER div NUM # remainder goes to %rdx=POWER add ODD,ODD # check next bit jnc 400b addmod POWER,POWER # multiply by Base=2 test ODD,ODD # loop jnz 400b 401: cmp $1,POWER je 403f # POWER==1: maybe prime cmp NUMM1,POWER je 403f # POWER==NUM-1: maybe prime # square EXPO-1 times dec EXPO jz ReturnFalseA 402: mov POWER,%rax mul POWER div NUM cmp POWER,NUMM1 # POWER==NUM-1: maybe prime je 403f cmp $1,POWER # POWER==1: composite loopnz 402b jmp ReturnFalseA # loop finished: composite 403: # There are only 31_894_014 composite numbers (below 2^64) which pass # the strong primality test. Of those, some have a small factor: # 31_712_357 remain, from 42799 to 18446744066047760377. Of course, the # insidious Wieferich-prime squares (1093^2 and 3511^2) are there. # Fifth step: Lucas test with the right discriminant. # We need a 4n+1 discriminant which is a quadratic non-residue modulo # NUM. (Thus the Wieferich squares are insidious: for those NUMs, there # is no such discriminant.) # Exhaustive testing of the 32M show that one such test with the least # good discriminant discriminates all composites except for those two. # To find such a discriminant, use the Jacobi symbol, precomputed in the # NonQRvectors, below. # This works even when NUM is composite because we seek non-residues. # One needn't seek a discriminant until the bitter end. # Try the primes from 5 to 89. # If none of those work, only thirteen composites remain to be checked # individually, in the sixth step. # Long ago, we tested whether NUM == (7,13,17,23) mod 30. # If so, 5 is the right discriminant. .set NonQRptr,%r8 lea NonQRvectors(%rip),NonQRptr test %r9b,%r9b jnz 560f # Five is a wrong discriminant: find a good one. .set NonQRsize,13 551: add $NonQRsize,NonQRptr movsxb (NonQRptr),%rcx test %cl,%cl # zero means we ran out jz SixthStep jns 552f neg %rcx # absolute value 552: mov NUM,%rax # check bit-vector xor %rdx,%rdx div %rcx mov %dl,%cl # bit-vector index shr $3,%rdx # byte index and $7,%cl # bit index lea 1(NonQRptr,%rdx),%rax # byte address mov (%rax),%al # fetch bit shr %cl,%al # move bit to carry flag ror %al jnc 551b 560: # make Lucas sequence coefficients: P=1, Q=(discr-P)/4 .set LUCQ,%rsi movsxb (NonQRptr),LUCQ # Q = (discr-P)/4 dec LUCQ sar $2,LUCQ jns 561f add NUM,LUCQ # make Q positive mod NUM 561: # Compute U[NUM],U[NUM+1] mod NUM # (We need only U[NUM+1], but must compute both to get either.) .set NUMBIT,%r8 bsr NUM,%rcx # put highest set bit into NUMBIT xor NUMBIT,NUMBIT inc NUMBIT shl %cl,NUMBIT shr NUMBIT # first bit is always 1: don't test it .set U2Np1,%r9 .set LUCUlo,%r10 .set LUCUhi,%r11 # make sequence values U[1] U[2] # (not U[0],U[1]: we ignore the first bit) xor LUCUlo,LUCUlo # U[1]=1 inc LUCUlo mov LUCUlo,LUCUhi # U[2]=1 570: # for each remaining bit in NUM # double index: U[x],U[x+1] -> U[2x],U[2x+1] mov LUCUlo,%rax # U[2n+1] = U[n+1]^2 + Q*U[n]^2 mul LUCUlo div NUM mov %rdx,%rax mul LUCQ div NUM mov %rdx,U2Np1 mov LUCUhi,%rax mul LUCUhi div NUM addmod %rdx,U2Np1 mov LUCUhi,%rax # U[2n] = U[n]*(2*U[n+1]-U[n]) addmod %rax,%rax submod LUCUlo,%rax mul LUCUlo div NUM mov %rdx,LUCUlo mov U2Np1,LUCUhi test NUMBIT,NUM # check next bit jz 580f 571: # increment index: U[x],U[x+1] -> U[x+1],U[x+2] mov LUCUlo,%rax # U[n+2] = U[n+1] + Q*U[n] mul LUCQ add LUCUhi,%rax adc $0,%rdx div NUM mov LUCUhi,LUCUlo mov %rdx,LUCUhi 580: # end loop shr NUMBIT jnz 570b test LUCUhi,LUCUhi # composite if non-zero jnz ReturnFalseB ReturnTrueB: xor %eax,%eax orb $1,%al ret ReturnFalseB: xor %eax,%eax ret # A row of this table has P (a 3-mod-4 prime) and a bit-vector of P # values. The K'th bit-value indicates whether an _odd_ number congruent # to K mod P is a quadratic non-residue mod P. NonQRvectors: .byte 5,0x0c # but the bit-vector part isn't used .skip 11 .byte -7,0x68 .skip 11 .byte -11,0xc4,0x05 .skip 10 .byte 13,0xe4,0x09 .skip 10 .byte 17,0xe8,0x5c,0x00 .skip 9 .byte -19,0x0c,0xf5,0x04 .skip 9 .byte -23,0xa0,0xcc,0x7a .skip 9 .byte 29,0x0c,0xdd,0x2e,0x0c .skip 8 .byte -31,0x48,0xb8,0xe2,0x6d .skip 8 .byte 37,0x64,0xe1,0xde,0xa1,0x09 .skip 7 .byte 41,0xc8,0xf8,0x4a,0x7d,0x4c,0x00 .skip 6 .byte -43,0xac,0x11,0x5c,0x7c,0xa7,0x04 .skip 6 .byte -47,0x20,0xac,0xd8,0xe4,0xca,0x7b .skip 6 .byte 53,0x2c,0x51,0xfc,0xcc,0x8f,0x22,0x0d .skip 5 .byte -59,0x44,0x6d,0x84,0xc1,0xe7,0x9d,0xd4,0x05 .skip 4 .byte 61,0xc4,0x0d,0xa6,0xf5,0x6b,0x19,0xec,0x08 .skip 4 .byte -67,0xac,0x39,0x14,0xd8,0x45,0x7e,0x3d,0xa6,0x04 .skip 3 .byte -71,0x80,0x68,0xe2,0x94,0x8e,0xd6,0xb8,0xe9,0x7e .skip 3 .byte 73,0xa0,0xec,0x72,0xf4,0x86,0xbd,0x38,0xdd,0x14,0x00 .skip 2 .byte -79,0xc8,0xd0,0x02,0x79,0xae,0x8a,0x61,0xbf,0xf4,0x6c .skip 2 .byte -83,0x64,0xe1,0x5c,0x01,0x8d,0xec,0xf4,0x57,0x8c,0x97,0x05 .skip 1 .byte 89,0xc8,0xf0,0x88,0xfd,0x6a,0x4a,0x59,0xfd,0x46,0x3c,0x4c,0x00 .byte 0 # end # Sixth step: check for the 13 composites which aren't exposed in the # other steps. ## # ## 1194649 1093*1093 ## 12327121 3511*3511 ## 172683484287699841 239919073*719757217 ## # ## 395009109077493751 314248751*1256995001 ## 693958851755989121 480956981*1442870941 ## 1329453744705764701 208534901*6375209801 ## 2735096497083370501 1045963001*2614907501 ## 6092870717272192309 281908189*21612961081 ## 6414442613241233671 895435831*7163486641 ## 9921760091402570401 671557217*14774258753 ## 11708069165918597341 57267541*204445117801 ## 15263286937772200441 1251185569*12199059289 ## 15398785001660949001 1801*12277801*696389401 SixthStep: lea SixthTable(%rip),%rax cmp 8*5(%rax),NUM ja 602f je ReturnFalseB cmp 8*2(%rax),NUM ja 601f je ReturnFalseB cmp 8*0(%rax),NUM jb ReturnTrueB je ReturnFalseB cmp 8*1(%rax),NUM jne ReturnTrueB jmp ReturnFalseB 601: cmp 8*3(%rax),NUM jb ReturnTrueB je ReturnFalseB cmp 8*4(%rax),NUM jne ReturnTrueB jmp ReturnFalseB 602: cmp 8*9(%rax),NUM ja 604f je ReturnFalseB cmp 8*7(%rax),NUM ja 603f je ReturnFalseB cmp 8*6(%rax),NUM jne ReturnTrueB jmp ReturnFalseB 603: cmp 8*8(%rax),NUM jne ReturnTrueB jmp ReturnFalseB 604: cmp 8*11(%rax),NUM ja 605f je ReturnFalseB cmp 8*10(%rax),NUM jne ReturnTrueB jmp ReturnFalseB 605: cmp 8*12(%rax),NUM jne ReturnTrueB xor %eax,%eax # return false ret .align 8,0 SixthTable: .quad 1194649 .quad 12327121 .quad 172683484287699841 .quad 395009109077493751 .quad 693958851755989121 .quad 1329453744705764701 .quad 2735096497083370501 .quad 6092870717272192309 .quad 6414442613241233671 .quad 9921760091402570401 .quad 11708069165918597341 .quad 15263286937772200441 .quad 15398785001660949001