261 lines
6.1 KiB
Raku
261 lines
6.1 KiB
Raku
#!/usr/bin/env perl
|
|
|
|
# ====================================================================
|
|
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
|
# project. The module is, however, dual licensed under OpenSSL and
|
|
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
|
# details see http://www.openssl.org/~appro/cryptogams/.
|
|
# ====================================================================
|
|
|
|
# SHA1 block procedure for PA-RISC.
|
|
|
|
# June 2009.
|
|
#
|
|
# On PA-7100LC performance is >30% better than gcc 3.2 generated code
|
|
# for aligned input and >50% better for unaligned. Compared to vendor
|
|
# compiler on PA-8600 it's almost 60% faster in 64-bit build and just
|
|
# few percent faster in 32-bit one (this for aligned input, data for
|
|
# unaligned input is not available).
|
|
#
|
|
# Special thanks to polarhome.com for providing HP-UX account.
|
|
|
|
$flavour = shift;
|
|
$output = shift;
|
|
open STDOUT,">$output";
|
|
|
|
if ($flavour =~ /64/) {
|
|
$LEVEL ="2.0W";
|
|
$SIZE_T =8;
|
|
$FRAME_MARKER =80;
|
|
$SAVED_RP =16;
|
|
$PUSH ="std";
|
|
$PUSHMA ="std,ma";
|
|
$POP ="ldd";
|
|
$POPMB ="ldd,mb";
|
|
} else {
|
|
$LEVEL ="1.0";
|
|
$SIZE_T =4;
|
|
$FRAME_MARKER =48;
|
|
$SAVED_RP =20;
|
|
$PUSH ="stw";
|
|
$PUSHMA ="stwm";
|
|
$POP ="ldw";
|
|
$POPMB ="ldwm";
|
|
}
|
|
|
|
$FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker
|
|
# [+ argument transfer]
|
|
$ctx="%r26"; # arg0
|
|
$inp="%r25"; # arg1
|
|
$num="%r24"; # arg2
|
|
|
|
$t0="%r28";
|
|
$t1="%r29";
|
|
$K="%r31";
|
|
|
|
@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
|
|
"%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0);
|
|
|
|
@V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23");
|
|
|
|
sub BODY_00_19 {
|
|
my ($i,$a,$b,$c,$d,$e)=@_;
|
|
my $j=$i+1;
|
|
$code.=<<___ if ($i<15);
|
|
addl $K,$e,$e ; $i
|
|
shd $a,$a,27,$t1
|
|
addl @X[$i],$e,$e
|
|
and $c,$b,$t0
|
|
addl $t1,$e,$e
|
|
andcm $d,$b,$t1
|
|
shd $b,$b,2,$b
|
|
or $t1,$t0,$t0
|
|
addl $t0,$e,$e
|
|
___
|
|
$code.=<<___ if ($i>=15); # with forward Xupdate
|
|
addl $K,$e,$e ; $i
|
|
shd $a,$a,27,$t1
|
|
xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
|
|
addl @X[$i%16],$e,$e
|
|
and $c,$b,$t0
|
|
xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
|
|
addl $t1,$e,$e
|
|
andcm $d,$b,$t1
|
|
shd $b,$b,2,$b
|
|
or $t1,$t0,$t0
|
|
xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
|
|
add $t0,$e,$e
|
|
shd @X[$j%16],@X[$j%16],31,@X[$j%16]
|
|
___
|
|
}
|
|
|
|
sub BODY_20_39 {
|
|
my ($i,$a,$b,$c,$d,$e)=@_;
|
|
my $j=$i+1;
|
|
$code.=<<___ if ($i<79);
|
|
xor @X[($j+2)%16],@X[$j%16],@X[$j%16] ; $i
|
|
addl $K,$e,$e
|
|
shd $a,$a,27,$t1
|
|
xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
|
|
addl @X[$i%16],$e,$e
|
|
xor $b,$c,$t0
|
|
xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
|
|
addl $t1,$e,$e
|
|
shd $b,$b,2,$b
|
|
xor $d,$t0,$t0
|
|
shd @X[$j%16],@X[$j%16],31,@X[$j%16]
|
|
addl $t0,$e,$e
|
|
___
|
|
$code.=<<___ if ($i==79); # with context load
|
|
ldw 0($ctx),@X[0] ; $i
|
|
addl $K,$e,$e
|
|
shd $a,$a,27,$t1
|
|
ldw 4($ctx),@X[1]
|
|
addl @X[$i%16],$e,$e
|
|
xor $b,$c,$t0
|
|
ldw 8($ctx),@X[2]
|
|
addl $t1,$e,$e
|
|
shd $b,$b,2,$b
|
|
xor $d,$t0,$t0
|
|
ldw 12($ctx),@X[3]
|
|
addl $t0,$e,$e
|
|
ldw 16($ctx),@X[4]
|
|
___
|
|
}
|
|
|
|
sub BODY_40_59 {
|
|
my ($i,$a,$b,$c,$d,$e)=@_;
|
|
my $j=$i+1;
|
|
$code.=<<___;
|
|
shd $a,$a,27,$t1 ; $i
|
|
addl $K,$e,$e
|
|
xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
|
|
xor $d,$c,$t0
|
|
addl @X[$i%16],$e,$e
|
|
xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
|
|
and $b,$t0,$t0
|
|
addl $t1,$e,$e
|
|
shd $b,$b,2,$b
|
|
xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
|
|
addl $t0,$e,$e
|
|
and $d,$c,$t1
|
|
shd @X[$j%16],@X[$j%16],31,@X[$j%16]
|
|
addl $t1,$e,$e
|
|
___
|
|
}
|
|
|
|
$code=<<___;
|
|
.LEVEL $LEVEL
|
|
.SPACE \$TEXT\$
|
|
.SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
|
|
|
|
.EXPORT sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
|
|
sha1_block_data_order
|
|
.PROC
|
|
.CALLINFO FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16
|
|
.ENTRY
|
|
$PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
|
|
$PUSHMA %r3,$FRAME(%sp)
|
|
$PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
|
|
$PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
|
|
$PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
|
|
$PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
|
|
$PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
|
|
$PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
|
|
$PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
|
|
$PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
|
|
$PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
|
|
$PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
|
|
$PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
|
|
$PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
|
|
$PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
|
|
|
|
ldw 0($ctx),$A
|
|
ldw 4($ctx),$B
|
|
ldw 8($ctx),$C
|
|
ldw 12($ctx),$D
|
|
ldw 16($ctx),$E
|
|
|
|
extru $inp,31,2,$t0 ; t0=inp&3;
|
|
sh3addl $t0,%r0,$t0 ; t0*=8;
|
|
subi 32,$t0,$t0 ; t0=32-t0;
|
|
mtctl $t0,%cr11 ; %sar=t0;
|
|
|
|
L\$oop
|
|
ldi 3,$t0
|
|
andcm $inp,$t0,$t0 ; 64-bit neutral
|
|
___
|
|
for ($i=0;$i<15;$i++) { # load input block
|
|
$code.="\tldw `4*$i`($t0),@X[$i]\n"; }
|
|
$code.=<<___;
|
|
cmpb,*= $inp,$t0,L\$aligned
|
|
ldw 60($t0),@X[15]
|
|
ldw 64($t0),@X[16]
|
|
___
|
|
for ($i=0;$i<16;$i++) { # align input
|
|
$code.="\tvshd @X[$i],@X[$i+1],@X[$i]\n"; }
|
|
$code.=<<___;
|
|
L\$aligned
|
|
ldil L'0x5a827000,$K ; K_00_19
|
|
ldo 0x999($K),$K
|
|
___
|
|
for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
|
|
$code.=<<___;
|
|
ldil L'0x6ed9e000,$K ; K_20_39
|
|
ldo 0xba1($K),$K
|
|
___
|
|
|
|
for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
|
|
$code.=<<___;
|
|
ldil L'0x8f1bb000,$K ; K_40_59
|
|
ldo 0xcdc($K),$K
|
|
___
|
|
|
|
for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
|
|
$code.=<<___;
|
|
ldil L'0xca62c000,$K ; K_60_79
|
|
ldo 0x1d6($K),$K
|
|
___
|
|
for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
|
|
|
|
$code.=<<___;
|
|
addl @X[0],$A,$A
|
|
addl @X[1],$B,$B
|
|
addl @X[2],$C,$C
|
|
addl @X[3],$D,$D
|
|
addl @X[4],$E,$E
|
|
stw $A,0($ctx)
|
|
stw $B,4($ctx)
|
|
stw $C,8($ctx)
|
|
stw $D,12($ctx)
|
|
stw $E,16($ctx)
|
|
addib,*<> -1,$num,L\$oop
|
|
ldo 64($inp),$inp
|
|
|
|
$POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
|
|
$POP `-$FRAME+1*$SIZE_T`(%sp),%r4
|
|
$POP `-$FRAME+2*$SIZE_T`(%sp),%r5
|
|
$POP `-$FRAME+3*$SIZE_T`(%sp),%r6
|
|
$POP `-$FRAME+4*$SIZE_T`(%sp),%r7
|
|
$POP `-$FRAME+5*$SIZE_T`(%sp),%r8
|
|
$POP `-$FRAME+6*$SIZE_T`(%sp),%r9
|
|
$POP `-$FRAME+7*$SIZE_T`(%sp),%r10
|
|
$POP `-$FRAME+8*$SIZE_T`(%sp),%r11
|
|
$POP `-$FRAME+9*$SIZE_T`(%sp),%r12
|
|
$POP `-$FRAME+10*$SIZE_T`(%sp),%r13
|
|
$POP `-$FRAME+11*$SIZE_T`(%sp),%r14
|
|
$POP `-$FRAME+12*$SIZE_T`(%sp),%r15
|
|
$POP `-$FRAME+13*$SIZE_T`(%sp),%r16
|
|
bv (%r2)
|
|
.EXIT
|
|
$POPMB -$FRAME(%sp),%r3
|
|
.PROCEND
|
|
.STRINGZ "SHA1 block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
|
|
___
|
|
|
|
$code =~ s/\`([^\`]*)\`/eval $1/gem;
|
|
$code =~ s/,\*/,/gm if ($SIZE_T==4);
|
|
$code =~ s/\bbv\b/bve/gm if ($SIZE_T==8);
|
|
print $code;
|
|
close STDOUT;
|