#include "ext.h" #include "z_dsp.h" void *times_class; long fts_mode; // *~ illustrates platform-safe Altivec optimization /* The idea here is that we can create source code that works on multiple platforms at compile time, and works on both Altivec- and non-Altivec PowerPC machines at runtime. We do this by isolating Altivec-specific code inside #ifdefs and using the routines sys_optimize and sys_altivec that return whether the user can and wants to optimize to protect non-Altivec machines from crashing. The conditions for sys_optimize() to return true: 1. Altivec-capable machine 2. User has checked Optimize in the DSP Status window 3. Signal Vector size is a multiple of 4 Another thing is that assuming you are going to want to decide whether to use optimized code or not, you should only call sys_optimize() in your DSP method. Then you can assume that the value will not change until your DSP method is called again. In other words, the change of the user optimization state is a "triggering event" for a DSP chain recompilation. sys_optimize() CANNOT be called in your perform routine because it may allocate memory, and in any case it is SLOW, much slower than storing its current value in your object or, as is done in the example below, simply putting different functions (optimized vs. non-optimized) on the DSP call chain depending on what its value is in your DSP method. When you want to use something that is specific to the Altivec compiler, you need to put it within two things: #ifdef __ALTIVEC__ and #pragrma altivec_model on The first one is true is the compiler is capable of generating altivec code. This could be true WHETHER OR NOT the actual machine running the compiler has Altivec instructions! The second tells the compiler to start generating these instructions (and therefore, understanding the use of the word vector etc.). By leaving the altivec code generation OFF by default in the project settings, we eliminate the risk that in the future, Altivec optimization might be automatically performed by the compiler that would crash non-Altivec machines ALSO note that the only include file you need to write altivec code etc. is z_dsp.h, which include z_altivec.h. See z_altivec.h for more obscurity... */ typedef struct _times { t_pxobject x_obj; t_float x_val; // x_vecPtr is a vector that we use to store a 4 floats the replicate the // scalar that we multiply the signal by. You can multiply 4 x 4 but // not 4 x 1, so this saves the time of setting this up in the perform // method call each time. It would be easier not to do it this way however. #ifdef __ALTIVEC__ #pragma altivec_model on vector float *x_vecPtr; #pragma altivec_model off #endif } t_times; void *times_new(double val); t_int *times_perform(t_int *w); t_int *scale_perform(t_int *w); void times_float(t_times *x, double f); void times_int(t_times *x, long n); void times_dsp(t_times *x, t_signal **sp, short *count); void times_assist(t_times *x, void *b, long m, long a, char *s); void times_free(t_times *x); // these are our altivec-specific routines; their prototypes are // only defined if the compiler can generate Altivec code #ifdef __ALTIVEC__ void times_newAV(t_times *x, double val); t_int *times_performAV(t_int *w); t_int *scale_performAV(t_int *w); void times_floatAV(t_times *x, double f); void times_intAV(t_times *x, long n); #endif void main(void) { setup((t_messlist **)×_class, (method)times_new, (method)times_free, (short)sizeof(t_times), 0L, A_DEFFLOAT, 0); dsp_initclass(); fts_mode = (long)dsp_fts_mode; addmess((method)times_dsp, "dsp", A_CANT, 0); if (!fts_mode) { addfloat((method)times_float); addint((method)times_int); } addmess((method)times_assist, "assist", A_CANT, 0); rescopy('STR#',3227); } void *times_new(double val) { t_times *x = (t_times *)newobject(times_class); dsp_setup((t_pxobject *)x,2); outlet_new((t_pxobject *)x, "signal"); x->x_val = val; #ifdef __ALTIVEC__ // We want to set up space for our "scalar" vector if the // object is running on an Altivec machine, whether or not the // user wants Altivec optimization if (sys_altivec()) times_newAV(x,x->x_val); #endif return (x); } void times_free(t_times *x) { #ifdef __ALTIVEC__ #pragma altivec_model on // here we need to turn the altivec model on because we want to pass // vector float to the compile-time sizeof operator if (sys_altivec()) freebytes16((char *)x->x_vecPtr,sizeof (vector float)); #pragma altivec_model off #endif } // this routine covers both inlets. It doesn't matter which one is involved void times_float(t_times *x, double f) { x->x_val = f; // if user wants to optimize, store the new float value into our vector if (sys_optimize()) #ifdef __ALTIVEC__ times_floatAV(x,f); #endif } void times_int(t_times *x, long n) { times_float(x,(double)n); } // here are the two non-vector-optimized perform routines t_int *times_perform(t_int *w) { t_float *in1,*in2,*out; int n; if (*(long *)(w[1])) goto out; in1 = (t_float *)(w[2]); in2 = (t_float *)(w[3]); out = (t_float *)(w[4]); n = (int)(w[5]); while (--n) *++out = *++in1 * *++in2; out: return (w + 6); } t_int *scale_perform(t_int *w) { t_float *in, *out; float val; int n; t_times *x = (t_times *)(w[3]); if (x->x_obj.z_disabled) goto out; in = (t_float *)(w[1]); out = (t_float *)(w[2]); val = x->x_val; n = (int)(w[4]); while (--n) *++out = val * *++in; out: return (w + 5); } #ifdef __ALTIVEC__ #pragma altivec_model on // turn AltiVec code generation on void times_newAV(t_times *x, double val) { // Notes: getbytes16 and freebytes16 are new Max support calls like getbytes and freebytes but return memory // allocated on 16-byte boundaries // vec_splat_float is an Altivec routine that copies a scalar to all four locations in a vector float x->x_vecPtr = (vector float *)getbytes16(sizeof (vector float)); // why dynamically allocate just one vector? because it will vec_splat_float(*(x->x_vecPtr),val); // be aligned correctly this way. Just adding vector variables } // to your object's struct doesn't guarantee alignment. void times_floatAV(t_times *x, double f) { // copy the new scalar to the vector vec_splat_float(*(x->x_vecPtr),f); } void times_intAV(t_times *x, long n) { vec_splat_float(*(x->x_vecPtr),(float)n); } // here is an Altivec-optimized routine that multiples two signals together, producing a third signal // vec_madd(a,b,c) does result = a * (b + c), so we need a zero vector that we initialize locally // we think this doesn't take a signficant amount of additional time t_int *times_performAV(t_int *w) { vector float *v_in1, *v_in2, *v_out, zero = (vector float) (0, 0, 0, 0); unsigned int n = (int)(w[5]); if (*(long *)(w[1])) goto out; v_in1 = (vector float *)(w[2]); v_in2 = (vector float *)(w[3]); v_out = (vector float *)(w[4]); while (--n) { *++v_out = vec_madd(*++v_in1, *++v_in2, zero); } out: return (w + 6); } // here is an Altivec-optimized routine that multiples a signal by a scalar // vec_madd is used as above, but the val argument refers to the vector // we stored in our object t_int *scale_performAV(t_int *w) { t_times *x = (t_times *)(w[3]); int n = (int)(w[4]); vector float *v_in, *v_out, val, zero = (vector float) (0, 0, 0, 0); floatToVector foo; if (x->x_obj.z_disabled) goto out; v_in = (vector float *)(w[1]); v_out = (vector float *)(w[2]); val = *x->x_vecPtr; while (--n) { *++v_out = vec_madd(*++v_in, val, zero); } out: return (w + 5); } #pragma altivec_model off // turn AltiVec code generation off #endif void times_assist(t_times *x, void *b, long m, long a, char *s) { assist_string(3227,m,a,fts_mode? 4 : 1,3,s); } void times_dsp(t_times *x, t_signal **sp, short *count) { long i; if (fts_mode) // old compatibility code dsp_add(times_perform, 5, &x->x_obj.z_disabled, sp[0]->s_vec-1, sp[1]->s_vec-1, sp[2]->s_vec-1, sp[0]->s_n+1); else { if (sys_optimize()) { #ifdef __ALTIVEC__ #pragma altivec_model on // we need to do this in case the scalar was stored with optimization off // and we are just now turning optimization on. See I told you using this // vector was tricky... times_floatAV(x,x->x_val); // splat the vector // if optimization is on, we are going to use one of our two optimized // perform routines. We pick the proper one based on whether signals // are connected to both inlets. If they are, we use times_performAV, // otherwise we will be multiplying by a scalar if (!count[0]) dsp_add(scale_performAV, 4, sp[1]->s_vec-vec_step(vector float), sp[2]->s_vec-vec_step(vector float), x, (sp[0]->s_n / 4)+1); else if (!count[1]) dsp_add(scale_performAV, 4, sp[0]->s_vec-vec_step(vector float), sp[2]->s_vec-vec_step(vector float), x, (sp[0]->s_n / 4)+1); else { dsp_add(times_performAV, 5, &x->x_obj.z_disabled, sp[0]->s_vec-vec_step(vector float), sp[1]->s_vec-vec_step(vector float), sp[2]->s_vec-vec_step(vector float), (sp[0]->s_n / 4)+1); } #pragma altivec_model off #else // this error would be generated at runtime if the code were to be compiled on // a compiler or machine without ALTIVEC and for some reason sys_optimize returned true error("*~: no optimizations available"); #endif } else { // this is the normal *~ code if (!count[0]) dsp_add(scale_perform, 4, sp[1]->s_vec-1, sp[2]->s_vec-1, x, sp[0]->s_n+1); else if (!count[1]) dsp_add(scale_perform, 4, sp[0]->s_vec-1, sp[2]->s_vec-1, x, sp[0]->s_n+1); else dsp_add(times_perform, 5, &x->x_obj.z_disabled, sp[0]->s_vec-1, sp[1]->s_vec-1, sp[2]->s_vec-1, sp[0]->s_n+1); } } }