  | 
  
    Octopus
    
   | 
 
 
 
 
Go to the documentation of this file.
   29#if defined(__AVX512F__) || defined(__AVX512PF__) || \ 
   30    defined(__AVX512BW__) || defined(__AVX512ER__) || \ 
   31    defined(__AVX512CD__) || defined(__AVX512DQ__) || \ 
   37#define VEC_TYPE __m512d 
   38#define VEC_LD(addr) _mm512_load_pd(addr) 
   39#define VEC_LDU(addr) _mm512_loadu_pd(addr) 
   40#define VEC_ST(addr, vec) _mm512_stream_pd(addr, vec) 
   41#define VEC_STU(addr, vec) _mm512_storeu_pd(addr, vec) 
   42#define VEC_FMA(aa, bb, cc) _mm512_fmadd_pd(aa, bb, cc) 
   43#define VEC_SCAL(aa) _mm512_set1_pd(aa) 
   44#define VEC_ZERO _mm512_setzero_pd() 
   46#define FENCE _mm_mfence() 
   50#elif defined(__AVX2__) 
   54#if defined(__FMA4__) || defined(__FMA__) 
   58#define VEC_TYPE __m256d 
   59#define VEC_LD(addr) _mm256_load_pd(addr) 
   60#define VEC_LDU(addr) _mm256_loadu_pd(addr) 
   61#define VEC_ST(addr, vec) _mm256_stream_pd(addr, vec) 
   62#define VEC_STU(addr, vec) _mm256_storeu_pd(addr, vec) 
   64#define VEC_FMA(aa, bb, cc) _mm256_macc_pd(aa, bb, cc) 
   66#define VEC_FMA(aa, bb, cc) _mm256_fmadd_pd(aa, bb, cc) 
   68#define VEC_FMA(aa, bb, cc) _mm256_add_pd(cc, _mm256_mul_pd(aa, bb)) 
   70#define VEC_SCAL(aa) _mm256_set1_pd(aa) 
   71#define VEC_ZERO _mm256_setzero_pd() 
   73#define FENCE _mm_mfence() 
   81#if defined(__FMA4__) || defined(__FMA__) 
   85#define VEC_TYPE __m128d 
   86#define VEC_LD(addr) _mm_load_pd(addr) 
   87#define VEC_LDU(addr) _mm_loadu_pd(addr) 
   88#define VEC_ST(addr, vec) _mm_stream_pd(addr, vec) 
   89#define VEC_STU(addr, vec) _mm_storeu_pd(addr, vec) 
   91#define VEC_FMA(aa, bb, cc) _mm_macc_pd(aa, bb, cc) 
   93#define VEC_FMA(aa, bb, cc) _mm_fmadd_pd(aa, bb, cc) 
   95#define VEC_FMA(aa, bb, cc) _mm_add_pd(cc, _mm_mul_pd(aa, bb)) 
   97#define VEC_SCAL(aa) _mm_set1_pd(aa) 
   98#define VEC_ZERO _mm_setzero_pd() 
   99#define FENCE _mm_mfence() 
  111#define VEC_TYPE vector4double 
  112#define VEC_LD(addr) vec_ld(0, (double *)(addr)) 
  113#define VEC_LDU(addr)                                                          \ 
  114  ((vector4double){(addr)[0], (addr)[1], (addr)[2], (addr)[3]}) 
  115#define VEC_ST(addr, vec) vec_st(vec, 0, (double *)(addr)) 
  116#define VEC_STU(addr, vec)                                                     \ 
  117  (addr)[0] = vec_extract(vec, 0);                                             \ 
  118  (addr)[1] = vec_extract(vec, 1);                                             \ 
  119  (addr)[2] = vec_extract(vec, 2);                                             \ 
  120  (addr)[3] = vec_extract(vec, 3) 
  121#define VEC_FMA(aa, bb, cc) vec_madd(aa, bb, cc) 
  122#define VEC_SCAL(aa) ((vector4double){aa, aa, aa, aa}) 
  123#define VEC_SCAL_LD(addr) vec_lds(0, (double *)(addr)) 
  124#define VEC_ZERO ((vector4double){0.0, 0.0, 0.0, 0.0}) 
  132#define VEC_TYPE double _Complex 
  133#define VEC_LD(addr) __lfpd(addr) 
  134#define VEC_LDU(addr) __cmplx((addr)[0], (addr)[1]) 
  135#define VEC_ST(addr, vec) __stfpd(addr, vec) 
  136#define VEC_STU(addr, vec)                                                     \ 
  137  (addr)[0] = __creal(vec);                                                    \ 
  138  (addr)[1] = __cimag(vec) 
  139#define VEC_FMA(aa, bb, cc) __fpmadd(cc, aa, bb) 
  140#define VEC_SCAL(aa) __cmplx(aa, aa) 
  141#define VEC_ZERO __cmplx(0.0, 0.0) 
  150#define VEC_TYPE double 
  151#define VEC_LD(addr) (addr)[0] 
  152#define VEC_LDU(addr) VEC_LD(addr) 
  153#define VEC_ST(addr, vec) (addr)[0] = vec 
  154#define VEC_STU(addr, vec) VEC_ST(addr, vec) 
  155#define VEC_FMA(aa, bb, cc) aa *bb + cc 
  156#define VEC_SCAL(aa) aa 
  162#define max1(x) (((x) > 0) ? (x) : 1)