.macro rgbMul(rInOut) and hold,rInOut,r5 lsr r10,last,16 ;r10 = 0000 00RR and r12,rInOut,r4 ;r12 = 0000 00BB lsr hold,rInOut,8 ;hold = 0000 RRGG and r11,last,$ff ;r11 = 0000 00GG ;process fead r10,r30 mul_fead r10,r11,r30 ;r * c mul_fead r11,r12,r30 ;g * c mul r12 ;b * c ; lsr r10,r10,8 lsr r11,r11,8 lsr r12,r12,8 ;assemble lsl r10,r10,16 ;00RR 0000 lsl hold,r11,8 ;0000 GG00 or hold,last,r12 ;0000 GGBB or rInOut,last,r10 ;00RR GGBB .endmacro org hcoreStartPc .def pixelCount 4050 ;740*450/5 (5 cores) / 16 loads .def pixelOffset 129600 nop nop move hold,pixelOffset moveh hold,last,>>pixelOffset or r0,last,0 ; gssr hold,0 ;get core number fead last,r0 nop mul r31 ;offset = pixel count * core number move r30,0 ;counter doAgain move hold,testPic moveh hold,last,>>testPic add r0,last,r31 ;source+offset move hold,frame moveh hold,last,>>frame add r1,last,r31 ;dest+offset move hold,pixelCount moveh hold,last,>>pixelCount or r2,last,0 ;count move hold,$ffff moveh hold,last,$00ff or r5,last,0 ; move r4,$ff move r3,1 ; rqld r0,0 rqld r0,2 rqld r0,4 rqld r0,6 rqld r0,8 rqld r0,10 rqld r0,12 rqld r0,14 rqld r0,16 rqld r0,18 rqld r0,20 rqld r0,22 rqld r0,24 rqld r0,26 rqld r0,28 rqld r0,30 loop ld r13 ;00RRGGBB ld r14 ;00RRGGBB ld r15 ;00RRGGBB ld r16 ;00RRGGBB ld r17 ;00RRGGBB ld r18 ;00RRGGBB ld r19 ;00RRGGBB ld r20 ;00RRGGBB ld r21 ;00RRGGBB ld r22 ;00RRGGBB ld r23 ;00RRGGBB ld r24 ;00RRGGBB ld r25 ;00RRGGBB ld r26 ;00RRGGBB ld r27 ;00RRGGBB ld r28 ;00RRGGBB cmpeq r3,r2 rqld.tc r0,32 ;request next rqld.tc r0,34 ;request next rqld.tc r0,36 ;request next rqld.tc r0,38 ;request next rqld.tc r0,40 ;request next rqld.tc r0,42 ;request next rqld.tc r0,44 ;request next rqld.tc r0,46 ;request next rqld.tc r0,48 ;request next rqld.tc r0,50 ;request next rqld.tc r0,52 ;request next rqld.tc r0,54 ;request next rqld.tc r0,56 ;request next rqld.tc r0,58 ;request next rqld.tc r0,60 ;request next rqld.tc r0,62 ;request next .execm rgbMul(r13) .execm rgbMul(r14) .execm rgbMul(r15) .execm rgbMul(r16) .execm rgbMul(r17) .execm rgbMul(r18) .execm rgbMul(r19) .execm rgbMul(r20) .execm rgbMul(r21) .execm rgbMul(r22) .execm rgbMul(r23) .execm rgbMul(r24) .execm rgbMul(r25) .execm rgbMul(r26) .execm rgbMul(r27) .execm rgbMul(r28) st r13,r1,0 st r14,r1,2 st r15,r1,4 st r16,r1,6 st r17,r1,8 st r18,r1,10 st r19,r1,12 st r20,r1,14 st r21,r1,16 st r22,r1,18 st r23,r1,20 st r24,r1,22 st r25,r1,24 st r26,r1,26 st r27,r1,28 st r28,r1,30 cmpeq r3,r2 br.tc loop add r3,r3,1 ;delay slot add r0,r0,32 ;delay slot add r1,r1,32 ;delay slot nop ;delay slot add hold,r30,1 and r30,last,$ff br doAgain nop ;delay slot nop ;delay slot nop ;delay slot nop ;delay slot