mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	ARM: 7626/1: arm/crypto: Make asm SHA-1 and AES code Thumb-2 compatible
This patch fixes aes-armv4.S and sha1-armv4-large.S to work
natively in Thumb.  This allows ARM/Thumb interworking workarounds
to be removed.
I also take the opportunity to convert some explicit assembler
directives for exported functions to the standard
ENTRY()/ENDPROC().
For the code itself:
  * In sha1_block_data_order, use of TEQ with sp is deprecated in
    ARMv7 and not supported in Thumb.  For the branches back to
    .L_00_15 and .L_40_59, the TEQ is converted to a CMP, under the
    assumption that clobbering the C flag here will not cause
    incorrect behaviour.
    For the first branch back to .L_20_39_or_60_79 the C flag is
    important, so sp is moved temporarily into another register so
    that TEQ can be used for the comparison.
  * In the AES code, most forms of register-indexed addressing with
    shifts and rotates are not permitted for loads and stores in
    Thumb, so the address calculation is done using a separate
    instruction for the Thumb case.
The resulting code is unlikely to be optimally scheduled, but it
should not have a large impact given the overall size of the code.
I haven't run any benchmarks.
Signed-off-by: Dave Martin <dave.martin@linaro.org>
Tested-by: David McCullough <ucdevel@gmail.com> (ARM only)
Acked-by: David McCullough <ucdevel@gmail.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
			
			
This commit is contained in:
		
							parent
							
								
									9931faca02
								
							
						
					
					
						commit
						638591cd7b
					
				| @ -34,8 +34,9 @@ | |||||||
| @ A little glue here to select the correct code below for the ARM CPU
 | @ A little glue here to select the correct code below for the ARM CPU
 | ||||||
| @ that is being targetted.
 | @ that is being targetted.
 | ||||||
| 
 | 
 | ||||||
|  | #include <linux/linkage.h> | ||||||
|  | 
 | ||||||
| .text | .text | ||||||
| .code	32
 |  | ||||||
| 
 | 
 | ||||||
| .type	AES_Te,%object | .type	AES_Te,%object | ||||||
| .align	5
 | .align	5
 | ||||||
| @ -145,10 +146,8 @@ AES_Te: | |||||||
| 
 | 
 | ||||||
| @ void AES_encrypt(const unsigned char *in, unsigned char *out,
 | @ void AES_encrypt(const unsigned char *in, unsigned char *out,
 | ||||||
| @ 		 const AES_KEY *key) {
 | @ 		 const AES_KEY *key) {
 | ||||||
| .global AES_encrypt
 |  | ||||||
| .type   AES_encrypt,%function |  | ||||||
| .align	5
 | .align	5
 | ||||||
| AES_encrypt: | ENTRY(AES_encrypt) | ||||||
| 	sub	r3,pc,#8		@ AES_encrypt
 | 	sub	r3,pc,#8		@ AES_encrypt
 | ||||||
| 	stmdb   sp!,{r1,r4-r12,lr} | 	stmdb   sp!,{r1,r4-r12,lr} | ||||||
| 	mov	r12,r0		@ inp
 | 	mov	r12,r0		@ inp
 | ||||||
| @ -239,15 +238,8 @@ AES_encrypt: | |||||||
| 	strb	r6,[r12,#14] | 	strb	r6,[r12,#14] | ||||||
| 	strb	r3,[r12,#15] | 	strb	r3,[r12,#15] | ||||||
| #endif | #endif | ||||||
| #if __ARM_ARCH__>=5 |  | ||||||
| 	ldmia	sp!,{r4-r12,pc} | 	ldmia	sp!,{r4-r12,pc} | ||||||
| #else | ENDPROC(AES_encrypt) | ||||||
| 	ldmia   sp!,{r4-r12,lr} |  | ||||||
| 	tst	lr,#1 |  | ||||||
| 	moveq	pc,lr			@ be binary compatible with V4, yet
 |  | ||||||
| 	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
 |  | ||||||
| #endif |  | ||||||
| .size	AES_encrypt,.-AES_encrypt |  | ||||||
| 
 | 
 | ||||||
| .type   _armv4_AES_encrypt,%function | .type   _armv4_AES_encrypt,%function | ||||||
| .align	2
 | .align	2
 | ||||||
| @ -386,10 +378,8 @@ _armv4_AES_encrypt: | |||||||
| 	ldr	pc,[sp],#4		@ pop and return
 | 	ldr	pc,[sp],#4		@ pop and return
 | ||||||
| .size	_armv4_AES_encrypt,.-_armv4_AES_encrypt | .size	_armv4_AES_encrypt,.-_armv4_AES_encrypt | ||||||
| 
 | 
 | ||||||
| .global private_AES_set_encrypt_key
 |  | ||||||
| .type   private_AES_set_encrypt_key,%function |  | ||||||
| .align	5
 | .align	5
 | ||||||
| private_AES_set_encrypt_key: | ENTRY(private_AES_set_encrypt_key) | ||||||
| _armv4_AES_set_encrypt_key: | _armv4_AES_set_encrypt_key: | ||||||
| 	sub	r3,pc,#8		@ AES_set_encrypt_key
 | 	sub	r3,pc,#8		@ AES_set_encrypt_key
 | ||||||
| 	teq	r0,#0 | 	teq	r0,#0 | ||||||
| @ -658,15 +648,11 @@ _armv4_AES_set_encrypt_key: | |||||||
| 
 | 
 | ||||||
| .Ldone:	mov	r0,#0 | .Ldone:	mov	r0,#0 | ||||||
| 	ldmia   sp!,{r4-r12,lr} | 	ldmia   sp!,{r4-r12,lr} | ||||||
| .Labrt:	tst	lr,#1 | .Labrt:	mov	pc,lr | ||||||
| 	moveq	pc,lr			@ be binary compatible with V4, yet
 | ENDPROC(private_AES_set_encrypt_key) | ||||||
| 	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
 |  | ||||||
| .size	private_AES_set_encrypt_key,.-private_AES_set_encrypt_key |  | ||||||
| 
 | 
 | ||||||
| .global private_AES_set_decrypt_key
 |  | ||||||
| .type   private_AES_set_decrypt_key,%function |  | ||||||
| .align	5
 | .align	5
 | ||||||
| private_AES_set_decrypt_key: | ENTRY(private_AES_set_decrypt_key) | ||||||
| 	str	lr,[sp,#-4]!            @ push lr
 | 	str	lr,[sp,#-4]!            @ push lr
 | ||||||
| #if 0 | #if 0 | ||||||
| 	@ kernel does both of these in setkey so optimise this bit out by
 | 	@ kernel does both of these in setkey so optimise this bit out by
 | ||||||
| @ -748,15 +734,8 @@ private_AES_set_decrypt_key: | |||||||
| 	bne	.Lmix | 	bne	.Lmix | ||||||
| 
 | 
 | ||||||
| 	mov	r0,#0 | 	mov	r0,#0 | ||||||
| #if __ARM_ARCH__>=5 |  | ||||||
| 	ldmia	sp!,{r4-r12,pc} | 	ldmia	sp!,{r4-r12,pc} | ||||||
| #else | ENDPROC(private_AES_set_decrypt_key) | ||||||
| 	ldmia   sp!,{r4-r12,lr} |  | ||||||
| 	tst	lr,#1 |  | ||||||
| 	moveq	pc,lr			@ be binary compatible with V4, yet
 |  | ||||||
| 	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
 |  | ||||||
| #endif |  | ||||||
| .size	private_AES_set_decrypt_key,.-private_AES_set_decrypt_key |  | ||||||
| 
 | 
 | ||||||
| .type	AES_Td,%object | .type	AES_Td,%object | ||||||
| .align	5
 | .align	5
 | ||||||
| @ -862,10 +841,8 @@ AES_Td: | |||||||
| 
 | 
 | ||||||
| @ void AES_decrypt(const unsigned char *in, unsigned char *out,
 | @ void AES_decrypt(const unsigned char *in, unsigned char *out,
 | ||||||
| @ 		 const AES_KEY *key) {
 | @ 		 const AES_KEY *key) {
 | ||||||
| .global AES_decrypt
 |  | ||||||
| .type   AES_decrypt,%function |  | ||||||
| .align	5
 | .align	5
 | ||||||
| AES_decrypt: | ENTRY(AES_decrypt) | ||||||
| 	sub	r3,pc,#8		@ AES_decrypt
 | 	sub	r3,pc,#8		@ AES_decrypt
 | ||||||
| 	stmdb   sp!,{r1,r4-r12,lr} | 	stmdb   sp!,{r1,r4-r12,lr} | ||||||
| 	mov	r12,r0		@ inp
 | 	mov	r12,r0		@ inp
 | ||||||
| @ -956,15 +933,8 @@ AES_decrypt: | |||||||
| 	strb	r6,[r12,#14] | 	strb	r6,[r12,#14] | ||||||
| 	strb	r3,[r12,#15] | 	strb	r3,[r12,#15] | ||||||
| #endif | #endif | ||||||
| #if __ARM_ARCH__>=5 |  | ||||||
| 	ldmia	sp!,{r4-r12,pc} | 	ldmia	sp!,{r4-r12,pc} | ||||||
| #else | ENDPROC(AES_decrypt) | ||||||
| 	ldmia   sp!,{r4-r12,lr} |  | ||||||
| 	tst	lr,#1 |  | ||||||
| 	moveq	pc,lr			@ be binary compatible with V4, yet
 |  | ||||||
| 	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
 |  | ||||||
| #endif |  | ||||||
| .size	AES_decrypt,.-AES_decrypt |  | ||||||
| 
 | 
 | ||||||
| .type   _armv4_AES_decrypt,%function | .type   _armv4_AES_decrypt,%function | ||||||
| .align	2
 | .align	2
 | ||||||
| @ -1064,7 +1034,9 @@ _armv4_AES_decrypt: | |||||||
| 	and	r9,lr,r1,lsr#8 | 	and	r9,lr,r1,lsr#8 | ||||||
| 
 | 
 | ||||||
| 	ldrb	r7,[r10,r7]		@ Td4[s1>>0]
 | 	ldrb	r7,[r10,r7]		@ Td4[s1>>0]
 | ||||||
| 	ldrb	r1,[r10,r1,lsr#24]	@ Td4[s1>>24]
 |  ARM(	ldrb	r1,[r10,r1,lsr#24]  )	@ Td4[s1>>24]
 | ||||||
|  |  THUMB(	add	r1,r10,r1,lsr#24    ) 	@ Td4[s1>>24]
 | ||||||
|  |  THUMB(	ldrb	r1,[r1]		    ) | ||||||
| 	ldrb	r8,[r10,r8]		@ Td4[s1>>16]
 | 	ldrb	r8,[r10,r8]		@ Td4[s1>>16]
 | ||||||
| 	eor	r0,r7,r0,lsl#24 | 	eor	r0,r7,r0,lsl#24 | ||||||
| 	ldrb	r9,[r10,r9]		@ Td4[s1>>8]
 | 	ldrb	r9,[r10,r9]		@ Td4[s1>>8]
 | ||||||
| @ -1077,7 +1049,9 @@ _armv4_AES_decrypt: | |||||||
| 	ldrb	r8,[r10,r8]		@ Td4[s2>>0]
 | 	ldrb	r8,[r10,r8]		@ Td4[s2>>0]
 | ||||||
| 	and	r9,lr,r2,lsr#16 | 	and	r9,lr,r2,lsr#16 | ||||||
| 
 | 
 | ||||||
| 	ldrb	r2,[r10,r2,lsr#24]	@ Td4[s2>>24]
 |  ARM(	ldrb	r2,[r10,r2,lsr#24]  )	@ Td4[s2>>24]
 | ||||||
|  |  THUMB(	add	r2,r10,r2,lsr#24    )	@ Td4[s2>>24]
 | ||||||
|  |  THUMB(	ldrb	r2,[r2]		    ) | ||||||
| 	eor	r0,r0,r7,lsl#8 | 	eor	r0,r0,r7,lsl#8 | ||||||
| 	ldrb	r9,[r10,r9]		@ Td4[s2>>16]
 | 	ldrb	r9,[r10,r9]		@ Td4[s2>>16]
 | ||||||
| 	eor	r1,r8,r1,lsl#16 | 	eor	r1,r8,r1,lsl#16 | ||||||
| @ -1090,7 +1064,9 @@ _armv4_AES_decrypt: | |||||||
| 	and	r9,lr,r3		@ i2
 | 	and	r9,lr,r3		@ i2
 | ||||||
| 
 | 
 | ||||||
| 	ldrb	r9,[r10,r9]		@ Td4[s3>>0]
 | 	ldrb	r9,[r10,r9]		@ Td4[s3>>0]
 | ||||||
| 	ldrb	r3,[r10,r3,lsr#24]	@ Td4[s3>>24]
 |  ARM(	ldrb	r3,[r10,r3,lsr#24]  )	@ Td4[s3>>24]
 | ||||||
|  |  THUMB(	add	r3,r10,r3,lsr#24    )	@ Td4[s3>>24]
 | ||||||
|  |  THUMB(	ldrb	r3,[r3]		    ) | ||||||
| 	eor	r0,r0,r7,lsl#16 | 	eor	r0,r0,r7,lsl#16 | ||||||
| 	ldr	r7,[r11,#0] | 	ldr	r7,[r11,#0] | ||||||
| 	eor	r1,r1,r8,lsl#8 | 	eor	r1,r1,r8,lsl#8 | ||||||
|  | |||||||
| @ -51,13 +51,12 @@ | |||||||
| @ Profiler-assisted and platform-specific optimization resulted in 10%
 | @ Profiler-assisted and platform-specific optimization resulted in 10%
 | ||||||
| @ improvement on Cortex A8 core and 12.2 cycles per byte.
 | @ improvement on Cortex A8 core and 12.2 cycles per byte.
 | ||||||
| 
 | 
 | ||||||
|  | #include <linux/linkage.h> | ||||||
|  | 
 | ||||||
| .text | .text | ||||||
| 
 | 
 | ||||||
| .global	sha1_block_data_order
 |  | ||||||
| .type	sha1_block_data_order,%function |  | ||||||
| 
 |  | ||||||
| .align	2
 | .align	2
 | ||||||
| sha1_block_data_order: | ENTRY(sha1_block_data_order) | ||||||
| 	stmdb	sp!,{r4-r12,lr} | 	stmdb	sp!,{r4-r12,lr} | ||||||
| 	add	r2,r1,r2,lsl#6	@ r2 to point at the end of r1
 | 	add	r2,r1,r2,lsl#6	@ r2 to point at the end of r1
 | ||||||
| 	ldmia	r0,{r3,r4,r5,r6,r7} | 	ldmia	r0,{r3,r4,r5,r6,r7} | ||||||
| @ -194,7 +193,7 @@ sha1_block_data_order: | |||||||
| 	eor	r10,r10,r7,ror#2		@ F_00_19(B,C,D)
 | 	eor	r10,r10,r7,ror#2		@ F_00_19(B,C,D)
 | ||||||
| 	str	r9,[r14,#-4]! | 	str	r9,[r14,#-4]! | ||||||
| 	add	r3,r3,r10			@ E+=F_00_19(B,C,D)
 | 	add	r3,r3,r10			@ E+=F_00_19(B,C,D)
 | ||||||
| 	teq	r14,sp | 	cmp	r14,sp | ||||||
| 	bne	.L_00_15		@ [((11+4)*5+2)*3]
 | 	bne	.L_00_15		@ [((11+4)*5+2)*3]
 | ||||||
| #if __ARM_ARCH__<7 | #if __ARM_ARCH__<7 | ||||||
| 	ldrb	r10,[r1,#2] | 	ldrb	r10,[r1,#2] | ||||||
| @ -374,7 +373,9 @@ sha1_block_data_order: | |||||||
| 						@ F_xx_xx
 | 						@ F_xx_xx
 | ||||||
| 	add	r3,r3,r9			@ E+=X[i]
 | 	add	r3,r3,r9			@ E+=X[i]
 | ||||||
| 	add	r3,r3,r10			@ E+=F_20_39(B,C,D)
 | 	add	r3,r3,r10			@ E+=F_20_39(B,C,D)
 | ||||||
| 	teq	r14,sp			@ preserve carry
 |  ARM(	teq	r14,sp		)	@ preserve carry
 | ||||||
|  |  THUMB(	mov	r11,sp		) | ||||||
|  |  THUMB(	teq	r14,r11		)	@ preserve carry
 | ||||||
| 	bne	.L_20_39_or_60_79	@ [+((12+3)*5+2)*4]
 | 	bne	.L_20_39_or_60_79	@ [+((12+3)*5+2)*4]
 | ||||||
| 	bcs	.L_done			@ [+((12+3)*5+2)*4], spare 300 bytes
 | 	bcs	.L_done			@ [+((12+3)*5+2)*4], spare 300 bytes
 | ||||||
| 
 | 
 | ||||||
| @ -466,7 +467,7 @@ sha1_block_data_order: | |||||||
| 	add	r3,r3,r9			@ E+=X[i]
 | 	add	r3,r3,r9			@ E+=X[i]
 | ||||||
| 	add	r3,r3,r10			@ E+=F_40_59(B,C,D)
 | 	add	r3,r3,r10			@ E+=F_40_59(B,C,D)
 | ||||||
| 	add	r3,r3,r11,ror#2 | 	add	r3,r3,r11,ror#2 | ||||||
| 	teq	r14,sp | 	cmp	r14,sp | ||||||
| 	bne	.L_40_59		@ [+((12+5)*5+2)*4]
 | 	bne	.L_40_59		@ [+((12+5)*5+2)*4]
 | ||||||
| 
 | 
 | ||||||
| 	ldr	r8,.LK_60_79 | 	ldr	r8,.LK_60_79 | ||||||
| @ -485,19 +486,12 @@ sha1_block_data_order: | |||||||
| 	teq	r1,r2 | 	teq	r1,r2 | ||||||
| 	bne	.Lloop			@ [+18], total 1307
 | 	bne	.Lloop			@ [+18], total 1307
 | ||||||
| 
 | 
 | ||||||
| #if __ARM_ARCH__>=5 |  | ||||||
| 	ldmia	sp!,{r4-r12,pc} | 	ldmia	sp!,{r4-r12,pc} | ||||||
| #else |  | ||||||
| 	ldmia	sp!,{r4-r12,lr} |  | ||||||
| 	tst	lr,#1 |  | ||||||
| 	moveq	pc,lr			@ be binary compatible with V4, yet
 |  | ||||||
| 	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
 |  | ||||||
| #endif |  | ||||||
| .align	2
 | .align	2
 | ||||||
| .LK_00_19:	.word	0x5a827999
 | .LK_00_19:	.word	0x5a827999
 | ||||||
| .LK_20_39:	.word	0x6ed9eba1
 | .LK_20_39:	.word	0x6ed9eba1
 | ||||||
| .LK_40_59:	.word	0x8f1bbcdc
 | .LK_40_59:	.word	0x8f1bbcdc
 | ||||||
| .LK_60_79:	.word	0xca62c1d6
 | .LK_60_79:	.word	0xca62c1d6
 | ||||||
| .size	sha1_block_data_order,.-sha1_block_data_order | ENDPROC(sha1_block_data_order) | ||||||
| .asciz	"SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
 | .asciz	"SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
 | ||||||
| .align	2
 | .align	2
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Dave Martin
						Dave Martin