diff options
author | Tom Rondeau | 2012-04-13 18:36:53 -0400 |
---|---|---|
committer | Tom Rondeau | 2012-04-13 18:36:53 -0400 |
commit | f919f9dcbb54a08e6e26d6c229ce92fb784fa1b2 (patch) | |
tree | 7e846386b9eb1676f9a93fc4a1e55916b9accc97 /volk/spu_lib | |
parent | 6a1e9783fec6ed827f49db27c171591d30f32933 (diff) | |
download | gnuradio-f919f9dcbb54a08e6e26d6c229ce92fb784fa1b2.tar.gz gnuradio-f919f9dcbb54a08e6e26d6c229ce92fb784fa1b2.tar.bz2 gnuradio-f919f9dcbb54a08e6e26d6c229ce92fb784fa1b2.zip |
Removed whitespace and added dtools/bin/remove-whitespace as a tool to do this in the future.
The sed script was provided by Moritz Fischer.
Diffstat (limited to 'volk/spu_lib')
-rw-r--r-- | volk/spu_lib/gc_spu_macs.h | 34 | ||||
-rw-r--r-- | volk/spu_lib/spu_16s_cmpgt_unaligned.c | 66 | ||||
-rw-r--r-- | volk/spu_lib/spu_16s_vector_subtract_unaligned.c | 68 | ||||
-rw-r--r-- | volk/spu_lib/spu_16s_vector_sum_unaligned.c | 68 | ||||
-rw-r--r-- | volk/spu_lib/spu_32fc_pointwise_multiply_unaligned.c | 94 | ||||
-rw-r--r-- | volk/spu_lib/spu_memcpy_unaligned.c | 122 | ||||
-rw-r--r-- | volk/spu_lib/spu_memset_unaligned.S | 44 |
7 files changed, 248 insertions, 248 deletions
diff --git a/volk/spu_lib/gc_spu_macs.h b/volk/spu_lib/gc_spu_macs.h index 8e3e3f2a6..e86dce3f5 100644 --- a/volk/spu_lib/gc_spu_macs.h +++ b/volk/spu_lib/gc_spu_macs.h @@ -1,19 +1,19 @@ /* -*- asm -*- */ /* * Copyright 2008 Free Software Foundation, Inc. - * + * * This file is part of GNU Radio - * + * * GNU Radio is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3, or (at your option) * any later version. - * + * * GNU Radio is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. @@ -279,7 +279,7 @@ name: iluh _gc_t0, 4*(s)*0x0101 + 0x0001; \ iohl _gc_t0, 4*(s)*0x0101 + 0x0203; \ shufb rt, ra, ra, _gc_t0; - + // replicate double from slot s [0,1] #define VSPLTD(rt, ra, s) \ /* sp is always 16-byte aligned */ \ @@ -301,13 +301,13 @@ name: #define MIN_SELB(rt, ra, rb, rc) selb rt, ra, rb, rc; #define MAX_SELB(rt, ra, rb, rc) selb rt, rb, ra, rc; - + // words #define MIN(rt, ra, rb) \ cgt _gc_t0, ra, rb; \ MIN_SELB(rt, ra, rb, _gc_t0) - + #define MAX(rt, ra, rb) \ cgt _gc_t0, ra, rb; \ MAX_SELB(rt, ra, rb, _gc_t0) @@ -315,17 +315,17 @@ name: #define UMIN(rt, ra, rb) \ clgt _gc_t0, ra, rb; \ MIN_SELB(rt, ra, rb, _gc_t0) - + #define UMAX(rt, ra, rb) \ clgt _gc_t0, ra, rb; \ MAX_SELB(rt, ra, rb, _gc_t0) // bytes - + #define MINB(rt, ra, rb) \ cgtb _gc_t0, ra, rb; \ MIN_SELB(rt, ra, rb, _gc_t0) - + #define MAXB(rt, ra, rb) \ cgtb _gc_t0, ra, rb; \ MAX_SELB(rt, ra, rb, _gc_t0) @@ -333,17 +333,17 @@ name: #define UMINB(rt, ra, rb) \ clgtb _gc_t0, ra, rb; \ MIN_SELB(rt, ra, rb, _gc_t0) - + #define UMAXB(rt, ra, rb) \ clgtb _gc_t0, ra, rb; \ MAX_SELB(rt, ra, rb, _gc_t0) // halfwords - + #define MINH(rt, ra, rb) \ cgth _gc_t0, ra, rb; \ MIN_SELB(rt, ra, rb, _gc_t0) - + #define MAXH(rt, ra, rb) \ cgth _gc_t0, ra, rb; \ MAX_SELB(rt, ra, rb, _gc_t0) @@ -351,17 +351,17 @@ name: #define UMINH(rt, ra, rb) \ clgth _gc_t0, ra, rb; \ MIN_SELB(rt, ra, rb, _gc_t0) - + #define UMAXH(rt, ra, rb) \ clgth _gc_t0, ra, rb; \ MAX_SELB(rt, ra, rb, _gc_t0) // floats - + #define FMIN(rt, ra, rb) \ fcgt _gc_t0, ra, rb; \ MIN_SELB(rt, ra, rb, _gc_t0) - + #define FMAX(rt, ra, rb) \ fcgt _gc_t0, ra, rb; \ MAX_SELB(rt, ra, rb, _gc_t0) @@ -370,7 +370,7 @@ name: #define FMINMAG(rt, ra, rb) \ fcmgt _gc_t0, ra, rb; \ MIN_SELB(rt, ra, rb, _gc_t0) - + // Ignoring the sign, select the values with the maximum magnitude #define FMAXMAG(rt, ra, rb) \ fcmgt _gc_t0, ra, rb; \ diff --git a/volk/spu_lib/spu_16s_cmpgt_unaligned.c b/volk/spu_lib/spu_16s_cmpgt_unaligned.c index 765cacd9a..8811e6801 100644 --- a/volk/spu_lib/spu_16s_cmpgt_unaligned.c +++ b/volk/spu_lib/spu_16s_cmpgt_unaligned.c @@ -4,14 +4,14 @@ void* libvector_16s_cmpgt_unaligned(void* target, void* src, signed short val, u //loop iterator i int i = 0; void* retval = target; - + //put the target and source addresses into qwords vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0}; vector unsigned int address_counter_src = {(unsigned int)src, 0, 0 ,0}; - + //create shuffle masks - + //shuffle mask building blocks: //all from the first vector vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, @@ -19,9 +19,9 @@ void* libvector_16s_cmpgt_unaligned(void* target, void* src, signed short val, u //all from the second vector vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f}; - - + + //gamma: second half of the second, first half of the first, break at (unsigned int)src%16 vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned int)src%16)); vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp); @@ -29,16 +29,16 @@ void* libvector_16s_cmpgt_unaligned(void* target, void* src, signed short val, u vector unsigned char cmp_res = spu_or(gt_res, eq_res); vector unsigned char sixteen_uchar = spu_splats((unsigned char)16); vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, + vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned int)src%16); - - + + vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -((unsigned int)target%16)); vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned int)target%16)); - + //alpha: first half of first, second half of second, break at (unsigned int)target%16 src_cmp = spu_splats((unsigned char)((unsigned int)target%16)); gt_res = spu_cmpgt(oneup, src_cmp); @@ -47,13 +47,13 @@ void* libvector_16s_cmpgt_unaligned(void* target, void* src, signed short val, u phase_change = spu_and(sixteen_uchar, cmp_res); vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); - + //delta: first half of first, first half of second, break at (unsigned int)target%16 vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha); //epsilon: second half of second, second half of first, break at (unsigned int)target%16 vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha); //zeta: second half of second, first half of first, break at 16 - (unsigned int)target%16 - vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); + vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); //beta: first half of first, second half of second, break at num_bytes%16 src_cmp = spu_splats((unsigned char)(num_bytes%16)); @@ -63,17 +63,17 @@ void* libvector_16s_cmpgt_unaligned(void* target, void* src, signed short val, u phase_change = spu_and(sixteen_uchar, cmp_res); vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); - - - - + + + + qword src_past; qword src_present; qword tgt_past; qword tgt_present; - + qword in_temp; qword out_temp0; qword out_temp1; @@ -85,53 +85,53 @@ void* libvector_16s_cmpgt_unaligned(void* target, void* src, signed short val, u vector unsigned short compare; vector unsigned short ones = {1, 1, 1, 1, 1, 1, 1, 1}; vector unsigned short after_and; - + for(i = 0; i < num_bytes/16; ++i) { - + src_present = si_lqd((qword)address_counter_src, 16); tgt_present = si_lqd((qword)address_counter_tgt, 16); - + in_temp = spu_shuffle(src_present, src_past, (vector unsigned char)shuffle_mask_gamma); compare = spu_cmpgt((vector signed short) in_temp, vec_val); after_and = spu_and(compare, ones); - - + + out_temp0 = spu_shuffle(tgt_past, (qword)after_and, shuffle_mask_delta); out_temp1 = spu_shuffle(tgt_present, (qword)after_and, shuffle_mask_epsilon); si_stqd(out_temp0, (qword)address_counter_tgt, 0); si_stqd(out_temp1, (qword)address_counter_tgt, 16); - + tgt_past = out_temp1; src_past = src_present; address_counter_src = spu_add(address_counter_src, 16); address_counter_tgt = spu_add(address_counter_tgt, 16); - + } - + src_present = si_lqd((qword)address_counter_src, 16); tgt_present = si_lqd((qword)address_counter_tgt, 16); - - + + in_temp = spu_shuffle(src_present, src_past,(vector unsigned char) shuffle_mask_gamma); - + compare = spu_cmpgt((vector signed short) in_temp, vec_val); after_and = spu_and(compare, ones); - + qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta); qword meld = spu_shuffle((qword)after_and, target_temp, (vector unsigned char)shuffle_mask_beta); - - + + out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta); out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon); - + si_stqd(out_temp0, (qword)address_counter_tgt, 0); si_stqd(out_temp1, (qword)address_counter_tgt, 16); - + return retval; } @@ -156,5 +156,5 @@ int main(){ } printf("\n"); } -*/ +*/ diff --git a/volk/spu_lib/spu_16s_vector_subtract_unaligned.c b/volk/spu_lib/spu_16s_vector_subtract_unaligned.c index a3ce6c2fe..ea110c8d2 100644 --- a/volk/spu_lib/spu_16s_vector_subtract_unaligned.c +++ b/volk/spu_lib/spu_16s_vector_subtract_unaligned.c @@ -4,15 +4,15 @@ void* libvector_16s_vector_subtract_unaligned(void* target, void* src0, void* s //loop iterator i int i = 0; void* retval = target; - + //put the target and source addresses into qwords vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0}; vector unsigned int address_counter_src0 = {(unsigned int)src0, 0, 0 ,0}; vector unsigned int address_counter_src1 = {(unsigned int)src1, 0, 0, 0}; - + //create shuffle masks - + //shuffle mask building blocks: //all from the first vector vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, @@ -20,9 +20,9 @@ void* libvector_16s_vector_subtract_unaligned(void* target, void* src0, void* s //all from the second vector vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f}; - - + + //gamma: second half of the second, first half of the first, break at (unsigned int)src0%16 vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned int)src0%16)); vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp); @@ -30,7 +30,7 @@ void* libvector_16s_vector_subtract_unaligned(void* target, void* src0, void* s vector unsigned char cmp_res = spu_or(gt_res, eq_res); vector unsigned char sixteen_uchar = spu_splats((unsigned char)16); vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, + vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned int)src0%16); @@ -41,17 +41,17 @@ void* libvector_16s_vector_subtract_unaligned(void* target, void* src0, void* s cmp_res = spu_or(gt_res, eq_res); sixteen_uchar = spu_splats((unsigned char)16); phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_eta = spu_add((vector unsigned int)phase_change, + vector unsigned int shuffle_mask_eta = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); shuffle_mask_eta = spu_rlqwbyte(shuffle_mask_eta, (unsigned int)src1%16); - - - + + + vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -((unsigned int)target%16)); vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned int)target%16)); - + //alpha: first half of first, second half of second, break at (unsigned int)target%16 src_cmp = spu_splats((unsigned char)((unsigned int)target%16)); gt_res = spu_cmpgt(oneup, src_cmp); @@ -60,13 +60,13 @@ void* libvector_16s_vector_subtract_unaligned(void* target, void* src0, void* s phase_change = spu_and(sixteen_uchar, cmp_res); vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); - + //delta: first half of first, first half of second, break at (unsigned int)target%16 vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha); //epsilon: second half of second, second half of first, break at (unsigned int)target%16 vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha); //zeta: second half of second, first half of first, break at 16 - (unsigned int)target%16 - vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); + vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); //beta: first half of first, second half of second, break at num_bytes%16 src_cmp = spu_splats((unsigned char)(num_bytes%16)); @@ -76,19 +76,19 @@ void* libvector_16s_vector_subtract_unaligned(void* target, void* src0, void* s phase_change = spu_and(sixteen_uchar, cmp_res); vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); - - - - + + + + qword src0_past; qword src0_present; qword src1_past; qword src1_present; qword tgt_past; qword tgt_present; - + qword in_temp0; qword in_temp1; qword out_temp0; @@ -99,54 +99,54 @@ void* libvector_16s_vector_subtract_unaligned(void* target, void* src0, void* s src0_past = si_lqd((qword)address_counter_src0, 0); src1_past = si_lqd((qword)address_counter_src1, 0); tgt_past = si_lqd((qword)address_counter_tgt, 0); - + for(i = 0; i < num_bytes/16; ++i) { - + src0_present = si_lqd((qword)address_counter_src0, 16); src1_present = si_lqd((qword)address_counter_src1, 16); tgt_present = si_lqd((qword)address_counter_tgt, 16); - + in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char)shuffle_mask_gamma); in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char)shuffle_mask_eta); - + sum = spu_sub((vector signed short)in_temp0, (vector signed short)in_temp1); - + out_temp0 = spu_shuffle(tgt_past, (qword)sum, shuffle_mask_delta); out_temp1 = spu_shuffle(tgt_present, (qword)sum, shuffle_mask_epsilon); - + si_stqd(out_temp0, (qword)address_counter_tgt, 0); si_stqd(out_temp1, (qword)address_counter_tgt, 16); - + tgt_past = out_temp1; src0_past = src0_present; src1_past = src1_present; address_counter_src0 = spu_add(address_counter_src0, 16); address_counter_src1 = spu_add(address_counter_src1, 16); address_counter_tgt = spu_add(address_counter_tgt, 16); - - + + } - + src0_present = si_lqd((qword)address_counter_src0, 16); src1_present = si_lqd((qword)address_counter_src1, 16); tgt_present = si_lqd((qword)address_counter_tgt, 16); - - + + in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char) shuffle_mask_gamma); in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char) shuffle_mask_eta); sum = spu_sub((vector signed short)in_temp0, (vector signed short)in_temp1); qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta); qword meld = spu_shuffle((qword)sum, target_temp, (vector unsigned char)shuffle_mask_beta); - - + + out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta); out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon); - + si_stqd(out_temp0, (qword)address_counter_tgt, 0); si_stqd(out_temp1, (qword)address_counter_tgt, 16); - + return retval; } diff --git a/volk/spu_lib/spu_16s_vector_sum_unaligned.c b/volk/spu_lib/spu_16s_vector_sum_unaligned.c index 5a1cb9aaf..0097b4f56 100644 --- a/volk/spu_lib/spu_16s_vector_sum_unaligned.c +++ b/volk/spu_lib/spu_16s_vector_sum_unaligned.c @@ -4,15 +4,15 @@ void* libvector_16s_vector_sum_unaligned(void* target, void* src0, void* src1, //loop iterator i int i = 0; void* retval = target; - + //put the target and source addresses into qwords vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0}; vector unsigned int address_counter_src0 = {(unsigned int)src0, 0, 0 ,0}; vector unsigned int address_counter_src1 = {(unsigned int)src1, 0, 0, 0}; - + //create shuffle masks - + //shuffle mask building blocks: //all from the first vector vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, @@ -20,9 +20,9 @@ void* libvector_16s_vector_sum_unaligned(void* target, void* src0, void* src1, //all from the second vector vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f}; - - + + //gamma: second half of the second, first half of the first, break at (unsigned int)src0%16 vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned int)src0%16)); vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp); @@ -30,7 +30,7 @@ void* libvector_16s_vector_sum_unaligned(void* target, void* src0, void* src1, vector unsigned char cmp_res = spu_or(gt_res, eq_res); vector unsigned char sixteen_uchar = spu_splats((unsigned char)16); vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, + vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned int)src0%16); @@ -41,17 +41,17 @@ void* libvector_16s_vector_sum_unaligned(void* target, void* src0, void* src1, cmp_res = spu_or(gt_res, eq_res); sixteen_uchar = spu_splats((unsigned char)16); phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_eta = spu_add((vector unsigned int)phase_change, + vector unsigned int shuffle_mask_eta = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); shuffle_mask_eta = spu_rlqwbyte(shuffle_mask_eta, (unsigned int)src1%16); - - - + + + vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -((unsigned int)target%16)); vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned int)target%16)); - + //alpha: first half of first, second half of second, break at (unsigned int)target%16 src_cmp = spu_splats((unsigned char)((unsigned int)target%16)); gt_res = spu_cmpgt(oneup, src_cmp); @@ -60,13 +60,13 @@ void* libvector_16s_vector_sum_unaligned(void* target, void* src0, void* src1, phase_change = spu_and(sixteen_uchar, cmp_res); vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); - + //delta: first half of first, first half of second, break at (unsigned int)target%16 vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha); //epsilon: second half of second, second half of first, break at (unsigned int)target%16 vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha); //zeta: second half of second, first half of first, break at 16 - (unsigned int)target%16 - vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); + vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); //beta: first half of first, second half of second, break at num_bytes%16 src_cmp = spu_splats((unsigned char)(num_bytes%16)); @@ -76,19 +76,19 @@ void* libvector_16s_vector_sum_unaligned(void* target, void* src0, void* src1, phase_change = spu_and(sixteen_uchar, cmp_res); vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); - - - - + + + + qword src0_past; qword src0_present; qword src1_past; qword src1_present; qword tgt_past; qword tgt_present; - + qword in_temp0; qword in_temp1; qword out_temp0; @@ -99,54 +99,54 @@ void* libvector_16s_vector_sum_unaligned(void* target, void* src0, void* src1, src0_past = si_lqd((qword)address_counter_src0, 0); src1_past = si_lqd((qword)address_counter_src1, 0); tgt_past = si_lqd((qword)address_counter_tgt, 0); - + for(i = 0; i < num_bytes/16; ++i) { - + src0_present = si_lqd((qword)address_counter_src0, 16); src1_present = si_lqd((qword)address_counter_src1, 16); tgt_present = si_lqd((qword)address_counter_tgt, 16); - + in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char)shuffle_mask_gamma); in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char)shuffle_mask_eta); - + sum = spu_add((vector signed int)in_temp0, (vector signed int)in_temp1); - + out_temp0 = spu_shuffle(tgt_past, (qword)sum, shuffle_mask_delta); out_temp1 = spu_shuffle(tgt_present, (qword)sum, shuffle_mask_epsilon); - + si_stqd(out_temp0, (qword)address_counter_tgt, 0); si_stqd(out_temp1, (qword)address_counter_tgt, 16); - + tgt_past = out_temp1; src0_past = src0_present; src1_past = src1_present; address_counter_src0 = spu_add(address_counter_src0, 16); address_counter_src1 = spu_add(address_counter_src1, 16); address_counter_tgt = spu_add(address_counter_tgt, 16); - - + + } - + src0_present = si_lqd((qword)address_counter_src0, 16); src1_present = si_lqd((qword)address_counter_src1, 16); tgt_present = si_lqd((qword)address_counter_tgt, 16); - - + + in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char) shuffle_mask_gamma); in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char) shuffle_mask_eta); sum = spu_add((vector signed int)in_temp0, (vector signed int)in_temp1); qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta); qword meld = spu_shuffle((qword)sum, target_temp, (vector unsigned char)shuffle_mask_beta); - - + + out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta); out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon); - + si_stqd(out_temp0, (qword)address_counter_tgt, 0); si_stqd(out_temp1, (qword)address_counter_tgt, 16); - + return retval; } diff --git a/volk/spu_lib/spu_32fc_pointwise_multiply_unaligned.c b/volk/spu_lib/spu_32fc_pointwise_multiply_unaligned.c index 58fd4aa0c..d1c960488 100644 --- a/volk/spu_lib/spu_32fc_pointwise_multiply_unaligned.c +++ b/volk/spu_lib/spu_32fc_pointwise_multiply_unaligned.c @@ -7,15 +7,15 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target, void* src0, voi //loop iterator i int i = 0; void* retval = target; - + //put the target and source addresses into qwords vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0}; vector unsigned int address_counter_src0 = {(unsigned int)src0, 0, 0 ,0}; vector unsigned int address_counter_src1 = {(unsigned int)src1, 0, 0, 0}; - + //create shuffle masks - + //shuffle mask building blocks: //all from the first vector vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, @@ -23,9 +23,9 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target, void* src0, voi //all from the second vector vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f}; - - + + //gamma: second half of the second, first half of the first, break at (unsigned int)src0%16 vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned int)src0%16)); vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp); @@ -33,7 +33,7 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target, void* src0, voi vector unsigned char cmp_res = spu_or(gt_res, eq_res); vector unsigned char sixteen_uchar = spu_splats((unsigned char)16); vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, + vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned int)src0%16); @@ -44,17 +44,17 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target, void* src0, voi cmp_res = spu_or(gt_res, eq_res); sixteen_uchar = spu_splats((unsigned char)16); phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_eta = spu_add((vector unsigned int)phase_change, + vector unsigned int shuffle_mask_eta = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); shuffle_mask_eta = spu_rlqwbyte(shuffle_mask_eta, (unsigned int)src1%16); - - - + + + vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -((unsigned int)target%16)); vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned int)target%16)); - + //alpha: first half of first, second half of second, break at (unsigned int)target%16 src_cmp = spu_splats((unsigned char)((unsigned int)target%16)); gt_res = spu_cmpgt(oneup, src_cmp); @@ -63,13 +63,13 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target, void* src0, voi phase_change = spu_and(sixteen_uchar, cmp_res); vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); - + //delta: first half of first, first half of second, break at (unsigned int)target%16 vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha); //epsilon: second half of second, second half of first, break at (unsigned int)target%16 vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha); //zeta: second half of second, first half of first, break at 16 - (unsigned int)target%16 - vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); + vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); //beta: first half of first, second half of second, break at num_bytes%16 src_cmp = spu_splats((unsigned char)(num_bytes%16)); @@ -79,19 +79,19 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target, void* src0, voi phase_change = spu_and(sixteen_uchar, cmp_res); vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); - - - - + + + + qword src0_past; qword src0_present; qword src1_past; qword src1_present; qword tgt_past; qword tgt_present; - + qword in_temp0; qword in_temp1; qword out_temp0; @@ -101,7 +101,7 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target, void* src0, voi src0_past = si_lqd((qword)address_counter_src0, 0); src1_past = si_lqd((qword)address_counter_src1, 0); tgt_past = si_lqd((qword)address_counter_tgt, 0); - + vector unsigned char shuffle_mask_complexprod0 = {0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b}; vector unsigned char shuffle_mask_complexprod1 = {0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, @@ -110,7 +110,7 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target, void* src0, voi 0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f}; vector unsigned char sign_changer = {0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00}; - + vector float prod0; qword shuf0; vector float prod1; @@ -118,54 +118,54 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target, void* src0, voi qword summand0; qword summand1; vector float sum; - + for(i = 0; i < num_bytes/16; ++i) { - + src0_present = si_lqd((qword)address_counter_src0, 16); src1_present = si_lqd((qword)address_counter_src1, 16); tgt_present = si_lqd((qword)address_counter_tgt, 16); - + in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char)shuffle_mask_gamma); in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char)shuffle_mask_eta); - + prod0 = spu_mul((vector float)in_temp0, (vector float)in_temp1); shuf0 = spu_shuffle((qword)in_temp1, (qword)in_temp1, shuffle_mask_complexprod0); prod1 = spu_mul((vector float)in_temp0, (vector float)shuf0); sign_change = spu_xor(prod0, (vector float)sign_changer); - + summand0 = spu_shuffle((qword)sign_change, (qword)prod1, shuffle_mask_complexprod1); - + summand1 = spu_shuffle((qword)sign_change, (qword)prod1, shuffle_mask_complexprod2); - + sum = spu_add((vector float)summand0, (vector float)summand1); - + out_temp0 = spu_shuffle(tgt_past, (qword)sum, shuffle_mask_delta); out_temp1 = spu_shuffle(tgt_present, (qword)sum, shuffle_mask_epsilon); - + si_stqd(out_temp0, (qword)address_counter_tgt, 0); si_stqd(out_temp1, (qword)address_counter_tgt, 16); - + tgt_past = out_temp1; src0_past = src0_present; src1_past = src1_present; address_counter_src0 = spu_add(address_counter_src0, 16); address_counter_src1 = spu_add(address_counter_src1, 16); address_counter_tgt = spu_add(address_counter_tgt, 16); - - + + } - + src0_present = si_lqd((qword)address_counter_src0, 16); src1_present = si_lqd((qword)address_counter_src1, 16); tgt_present = si_lqd((qword)address_counter_tgt, 16); - - + + in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char) shuffle_mask_gamma); in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char) shuffle_mask_eta); - - + + prod0 = spu_mul((vector float)in_temp0, (vector float)in_temp1); shuf0 = spu_shuffle((qword)in_temp1, (qword)in_temp1, shuffle_mask_complexprod0); prod1 = spu_mul(prod0, (vector float)shuf0); @@ -173,20 +173,20 @@ void* libvector_pointwise_multiply_32fc_unaligned(void* target, void* src0, voi summand0 = spu_shuffle((qword)sign_change, (qword)prod1, shuffle_mask_complexprod1); summand1 = spu_shuffle((qword)sign_change, (qword)prod1, shuffle_mask_complexprod2); sum = spu_add((vector float)summand0, (vector float)summand1); - - - + + + qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta); qword meld = spu_shuffle((qword)sum, target_temp, (vector unsigned char)shuffle_mask_beta); - - - + + + out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta); out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon); - + si_stqd(out_temp0, (qword)address_counter_tgt, 0); si_stqd(out_temp1, (qword)address_counter_tgt, 16); - + return retval; } @@ -209,14 +209,14 @@ int main(){ vector_product_complex(res, pooh, bear, 48*sizeof(float)); - + for(i = 0; i < 48; ++i) { printf("%f, ", res[i]); } printf("\n"); - + } */ diff --git a/volk/spu_lib/spu_memcpy_unaligned.c b/volk/spu_lib/spu_memcpy_unaligned.c index 2a0dabcd7..0f15b5d80 100644 --- a/volk/spu_lib/spu_memcpy_unaligned.c +++ b/volk/spu_lib/spu_memcpy_unaligned.c @@ -5,14 +5,14 @@ void* libvector_memcpy_unaligned(void* target, void* src, unsigned int num_bytes //loop iterator i int i = 0; void* retval = target; - + //put the target and source addresses into qwords vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0}; vector unsigned int address_counter_src = {(unsigned int)src, 0, 0 ,0}; - + //create shuffle masks - + //shuffle mask building blocks: //all from the first vector vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, @@ -20,9 +20,9 @@ void* libvector_memcpy_unaligned(void* target, void* src, unsigned int num_bytes //all from the second vector vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f}; - - + + //gamma: second half of the second, first half of the first, break at (unsigned int)src%16 vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned int)src%16)); vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp); @@ -30,16 +30,16 @@ void* libvector_memcpy_unaligned(void* target, void* src, unsigned int num_bytes vector unsigned char cmp_res = spu_or(gt_res, eq_res); vector unsigned char sixteen_uchar = spu_splats((unsigned char)16); vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, + vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned int)src%16); - - + + vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -((unsigned int)target%16)); vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned int)target%16)); - + //alpha: first half of first, second half of second, break at (unsigned int)target%16 src_cmp = spu_splats((unsigned char)((unsigned int)target%16)); gt_res = spu_cmpgt(oneup, src_cmp); @@ -48,13 +48,13 @@ void* libvector_memcpy_unaligned(void* target, void* src, unsigned int num_bytes phase_change = spu_and(sixteen_uchar, cmp_res); vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); - + //delta: first half of first, first half of second, break at (unsigned int)target%16 vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha); //epsilon: second half of second, second half of first, break at (unsigned int)target%16 vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha); //zeta: second half of second, first half of first, break at 16 - (unsigned int)target%16 - vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); + vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); //beta: first half of first, second half of second, break at num_bytes%16 src_cmp = spu_splats((unsigned char)(num_bytes%16)); @@ -64,61 +64,61 @@ void* libvector_memcpy_unaligned(void* target, void* src, unsigned int num_bytes phase_change = spu_and(sixteen_uchar, cmp_res); vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); - - - - + + + + qword src_past; qword src_present; qword tgt_past; qword tgt_present; - + qword in_temp; qword out_temp0; qword out_temp1; src_past = si_lqd((qword)address_counter_src, 0); tgt_past = si_lqd((qword)address_counter_tgt, 0); - + for(i = 0; i < num_bytes/16; ++i) { - + src_present = si_lqd((qword)address_counter_src, 16); tgt_present = si_lqd((qword)address_counter_tgt, 16); - + in_temp = spu_shuffle(src_present, src_past, (vector unsigned char)shuffle_mask_gamma); - + out_temp0 = spu_shuffle(tgt_past, in_temp, shuffle_mask_delta); out_temp1 = spu_shuffle(tgt_present, in_temp, shuffle_mask_epsilon); si_stqd(out_temp0, (qword)address_counter_tgt, 0); si_stqd(out_temp1, (qword)address_counter_tgt, 16); - + tgt_past = out_temp1; src_past = src_present; address_counter_src = spu_add(address_counter_src, 16); address_counter_tgt = spu_add(address_counter_tgt, 16); - + } - + src_present = si_lqd((qword)address_counter_src, 16); tgt_present = si_lqd((qword)address_counter_tgt, 16); - - + + in_temp = spu_shuffle(src_present, src_past,(vector unsigned char) shuffle_mask_gamma); qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta); qword meld = spu_shuffle(in_temp, target_temp, (vector unsigned char)shuffle_mask_beta); - - + + out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta); out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon); - + si_stqd(out_temp0, (qword)address_counter_tgt, 0); si_stqd(out_temp1, (qword)address_counter_tgt, 16); - + return retval; } @@ -133,9 +133,9 @@ void* mcpy(void* target, void* src, size_t num_bytes){ //put the target and source addresses into qwords vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0}; vector unsigned int address_counter_src = {(unsigned int)src, 0, 0 ,0}; - + //create shuffle masks - + //shuffle mask building blocks: //all from the first vector vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, @@ -143,9 +143,9 @@ void* mcpy(void* target, void* src, size_t num_bytes){ //all from the second vector vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f}; - - + + //gamma: second half of the second, first half of the first, break at src%16 vector unsigned char src_cmp = spu_splats((unsigned char)(src%16)); vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp); @@ -153,16 +153,16 @@ void* mcpy(void* target, void* src, size_t num_bytes){ vector unsigned char cmp_res = spu_or(gt_res, eq_res); vector unsigned char sixteen_uchar = spu_splats((unsigned char)16); vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, + vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, src%16); - - + + vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -(target%16)); vector unsigned char tgt_first = spu_rlqwbyte(oneup, -(target%16)); - + //alpha: first half of first, second half of second, break at target%16 src_cmp = spu_splats((unsigned char)(target%16)); gt_res = spu_cmpgt(oneup, src_cmp); @@ -171,13 +171,13 @@ void* mcpy(void* target, void* src, size_t num_bytes){ phase_change = spu_and(sixteen_uchar, cmp_res); vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); - + //delta: first half of first, first half of second, break at target%16 vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha); //epsilon: second half of second, second half of first, break at target%16 vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha); //zeta: second half of second, first half of first, break at 16 - target%16 - vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, target%16); + vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, target%16); //beta: first half of first, second half of second, break at num_bytes%16 src_cmp = spu_splats((unsigned char)(num_bytes%16)); @@ -187,10 +187,10 @@ void* mcpy(void* target, void* src, size_t num_bytes){ phase_change = spu_and(sixteen_uchar, cmp_res); vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change, (vector unsigned int)oneup); - - + + printf("num_bytesmod16 %d\n", num_bytes%16); - printf("beta %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d\n", + printf("beta %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d\n", spu_extract((vector unsigned char) shuffle_mask_beta, 0), spu_extract((vector unsigned char) shuffle_mask_beta, 1), spu_extract((vector unsigned char) shuffle_mask_beta, 2), @@ -207,64 +207,64 @@ void* mcpy(void* target, void* src, size_t num_bytes){ spu_extract((vector unsigned char) shuffle_mask_beta, 13), spu_extract((vector unsigned char) shuffle_mask_beta, 14), spu_extract((vector unsigned char) shuffle_mask_beta, 15)); - - - - + + + + qword src_past; qword src_present; qword tgt_past; qword tgt_present; - + qword in_temp; qword out_temp0; qword out_temp1; src_past = si_lqd((qword)address_counter_src, 0); tgt_past = si_lqd((qword)address_counter_tgt, 0); - + for(i = 0; i < num_bytes/16; ++i) { - + src_present = si_lqd((qword)address_counter_src, 16); tgt_present = si_lqd((qword)address_counter_tgt, 16); - + in_temp = spu_shuffle(src_present, src_past, (vector unsigned char)shuffle_mask_gamma); - + out_temp0 = spu_shuffle(tgt_past, in_temp, shuffle_mask_delta); out_temp1 = spu_shuffle(tgt_present, in_temp, shuffle_mask_epsilon); si_stqd(out_temp0, (qword)address_counter_tgt, 0); si_stqd(out_temp1, (qword)address_counter_tgt, 16); - + tgt_past = out_temp1; src_past = src_present; address_counter_src = spu_add(address_counter_src, 16); address_counter_tgt = spu_add(address_counter_tgt, 16); - + } - + src_present = si_lqd((qword)address_counter_src, 16); tgt_present = si_lqd((qword)address_counter_tgt, 16); - - + + in_temp = spu_shuffle(src_present, src_past,(vector unsigned char) shuffle_mask_gamma); qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta); qword meld = spu_shuffle(in_temp, target_temp, (vector unsigned char)shuffle_mask_beta); - - + + out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta); out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon); - + si_stqd(out_temp0, (qword)address_counter_tgt, 0); si_stqd(out_temp1, (qword)address_counter_tgt, 16); return retval; - + } */ /* @@ -286,5 +286,5 @@ int main(){ } printf("\n"); } - + */ diff --git a/volk/spu_lib/spu_memset_unaligned.S b/volk/spu_lib/spu_memset_unaligned.S index a655c4c52..c260a125c 100644 --- a/volk/spu_lib/spu_memset_unaligned.S +++ b/volk/spu_lib/spu_memset_unaligned.S @@ -1,19 +1,19 @@ /* -*- asm -*- */ /* * Copyright 2008 Free Software Foundation, Inc. - * + * * This file is part of GNU Radio - * + * * GNU Radio is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3, or (at your option) * any later version. - * + * * GNU Radio is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. @@ -33,11 +33,11 @@ * size_t i; * for (i = 0; i < n; i++) * p[i] = c; - * + * * return pv; * } */ - + #define p_arg arg1 // we're going to clobber arg1 w/ the return value #define c arg2 // the constant we're writing #define n arg3 // how many bytes to write @@ -50,16 +50,16 @@ #define an r18 // aligned n (n rounded down to mod 16 boundary) #define next_p r19 #define cond1 r20 -#define cond2 r21 +#define cond2 r21 #define m r22 #define r r23 - + PROC_ENTRY(libvector_memset_unaligned) - + // Hint the return from do_head, in case we go that way. // There's pretty much nothing to can do to hint the branch to it. hbrr do_head_br, head_complete - + MR(p, p_arg) // leaves p, the return value, in the correct reg (r3) BRZ_RETURN(n) @@ -69,11 +69,11 @@ head_complete: /* - * preconditions: + * preconditions: * p%16 == 0, n > 0 */ hbrr middle_loop_br, middle_loop - + ROUND_DOWN(an, n, 16) // an is "aligned n" MODULO(n, n, 16) // what's left over in the last quad brz an, do_tail // no whole quad words; skip to tail @@ -96,20 +96,20 @@ middle_loop: stqd c, 4*16(p) stqd c, 5*16(p) stqd c, 6*16(p) - + MR(p, next_p) stqd c, 7*16-128(next_p) or cond2, n, an middle_loop_br: brnz cond1, middle_loop - + /* - * if an and n are both zero, return now + * if an and n are both zero, return now */ BRZ_RETURN(cond2) /* - * otherwise handle last of full quad words + * otherwise handle last of full quad words * * 0 <= an < 128, p%16 == 0 */ @@ -119,18 +119,18 @@ middle2: */ brz an, do_tail hbrr middle2_loop_br, middle2_loop - + .p2align 3 -middle2_loop: +middle2_loop: ai next_p, p, 16 stqd c, 0(p) ai an, an, -16 LMR(p, next_p) middle2_loop_br: brnz an, middle2_loop - + /* We're done with the full quadwords. */ - + /* * Handle the final partial quadword. * We'll be modifying only the left hand portion of the quad. @@ -146,7 +146,7 @@ do_tail: shlqby mask, mask, t1 selb t0, old, c, mask stqd t0, 0(p) -do_tail_ret: +do_tail_ret: RETURN() /* @@ -176,7 +176,7 @@ do_head: MR(t1, p) sf t0, m, r // t0 = r - m a p, p, m // p += m - rotqby mask, mask, t0 // rotate 0's to the right place + rotqby mask, mask, t0 // rotate 0's to the right place sf n, m, n // n -= m selb t0, c, old, mask // merge stqd t0, 0(t1) |