Posted: Mon Jun 04, 2012 6:48 pm
Mirroring should have negligible overhead. Here's are versions of the copy function that writes the data in reverse order. Use the first one if you're using 16 bit color, and the second one if you're using 8 bit color.
What all does your draw function actually do?
Code: Select all
! void _word_8byte_copy_wordreverse(short *dst, short *src, int count)
.align 4
.global _word_8byte_copy_wordreverse
_word_8byte_copy_wordreverse:
mov r6,r0 !adjust the dst pointer so we start at the end
shll2 r0
shll r0
add r0,r4 !dst = dst + count * 8
1: mov.w @r5+,r0
dt r6
mov.w @r5+,r1
mov.w @r5+,r2
mov.w @r5+,r3
mov.w r0,@-r4
mov.w r1,@-r4
mov.w r2,@-r4
bf/s 1b
mov.w r3,@-r4
rts
nop
! void _word_8byte_copy_bytereverse(short *dst, short *src, int count)
.align 4
.global _word_8byte_copy_bytereverse
_word_8byte_copy_bytereverse:
mov r6,r0 !adjust the dst pointer so we start at the end
shll2 r0
shll r0
add r0,r4 !dst = dst + count * 8
1: mov.w @r5+,r0
dt r6
mov.w @r5+,r1
mov.w @r5+,r2
mov.w @r5+,r3
swap.b r0,r0
mov.w r0,@-r4
swap.b r1,r1
mov.w r1,@-r4
swap.b r2,r2
mov.w r2,@-r4
swap.b r3,r3
bf/s 1b
mov.w r3,@-r4
rts
nop