symbian-qemu-0.9.1-12/qemu-symbian-svp/target-arm/neon_helper.c
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 /*
       
     2  * ARM NEON vector operations.
       
     3  *
       
     4  * Copyright (c) 2007, 2008 CodeSourcery.
       
     5  * Written by Paul Brook
       
     6  *
       
     7  * This code is licenced under the GNU GPL v2.
       
     8  */
       
     9 #include <stdlib.h>
       
    10 #include <stdio.h>
       
    11 
       
    12 #include "cpu.h"
       
    13 #include "exec-all.h"
       
    14 #include "helpers.h"
       
    15 
       
    16 #define SIGNBIT (uint32_t)0x80000000
       
    17 #define SIGNBIT64 ((uint64_t)1 << 63)
       
    18 
       
    19 #define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] = CPSR_Q
       
    20 
       
    21 static float_status neon_float_status;
       
    22 #define NFS &neon_float_status
       
    23 
       
    24 /* Helper routines to perform bitwise copies between float and int.  */
       
    25 static inline float32 vfp_itos(uint32_t i)
       
    26 {
       
    27     union {
       
    28         uint32_t i;
       
    29         float32 s;
       
    30     } v;
       
    31 
       
    32     v.i = i;
       
    33     return v.s;
       
    34 }
       
    35 
       
    36 static inline uint32_t vfp_stoi(float32 s)
       
    37 {
       
    38     union {
       
    39         uint32_t i;
       
    40         float32 s;
       
    41     } v;
       
    42 
       
    43     v.s = s;
       
    44     return v.i;
       
    45 }
       
    46 
       
    47 #define NEON_TYPE1(name, type) \
       
    48 typedef struct \
       
    49 { \
       
    50     type v1; \
       
    51 } neon_##name;
       
    52 #ifdef WORDS_BIGENDIAN
       
    53 #define NEON_TYPE2(name, type) \
       
    54 typedef struct \
       
    55 { \
       
    56     type v2; \
       
    57     type v1; \
       
    58 } neon_##name;
       
    59 #define NEON_TYPE4(name, type) \
       
    60 typedef struct \
       
    61 { \
       
    62     type v4; \
       
    63     type v3; \
       
    64     type v2; \
       
    65     type v1; \
       
    66 } neon_##name;
       
    67 #else
       
    68 #define NEON_TYPE2(name, type) \
       
    69 typedef struct \
       
    70 { \
       
    71     type v1; \
       
    72     type v2; \
       
    73 } neon_##name;
       
    74 #define NEON_TYPE4(name, type) \
       
    75 typedef struct \
       
    76 { \
       
    77     type v1; \
       
    78     type v2; \
       
    79     type v3; \
       
    80     type v4; \
       
    81 } neon_##name;
       
    82 #endif
       
    83 
       
    84 NEON_TYPE4(s8, int8_t)
       
    85 NEON_TYPE4(u8, uint8_t)
       
    86 NEON_TYPE2(s16, int16_t)
       
    87 NEON_TYPE2(u16, uint16_t)
       
    88 NEON_TYPE1(s32, int32_t)
       
    89 NEON_TYPE1(u32, uint32_t)
       
    90 #undef NEON_TYPE4
       
    91 #undef NEON_TYPE2
       
    92 #undef NEON_TYPE1
       
    93 
       
    94 /* Copy from a uint32_t to a vector structure type.  */
       
    95 #define NEON_UNPACK(vtype, dest, val) do { \
       
    96     union { \
       
    97         vtype v; \
       
    98         uint32_t i; \
       
    99     } conv_u; \
       
   100     conv_u.i = (val); \
       
   101     dest = conv_u.v; \
       
   102     } while(0)
       
   103 
       
   104 /* Copy from a vector structure type to a uint32_t.  */
       
   105 #define NEON_PACK(vtype, dest, val) do { \
       
   106     union { \
       
   107         vtype v; \
       
   108         uint32_t i; \
       
   109     } conv_u; \
       
   110     conv_u.v = (val); \
       
   111     dest = conv_u.i; \
       
   112     } while(0)
       
   113 
       
   114 #define NEON_DO1 \
       
   115     NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1);
       
   116 #define NEON_DO2 \
       
   117     NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \
       
   118     NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2);
       
   119 #define NEON_DO4 \
       
   120     NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \
       
   121     NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); \
       
   122     NEON_FN(vdest.v3, vsrc1.v3, vsrc2.v3); \
       
   123     NEON_FN(vdest.v4, vsrc1.v4, vsrc2.v4);
       
   124 
       
   125 #define NEON_VOP_BODY(vtype, n) \
       
   126 { \
       
   127     uint32_t res; \
       
   128     vtype vsrc1; \
       
   129     vtype vsrc2; \
       
   130     vtype vdest; \
       
   131     NEON_UNPACK(vtype, vsrc1, arg1); \
       
   132     NEON_UNPACK(vtype, vsrc2, arg2); \
       
   133     NEON_DO##n; \
       
   134     NEON_PACK(vtype, res, vdest); \
       
   135     return res; \
       
   136 }
       
   137 
       
   138 #define NEON_VOP(name, vtype, n) \
       
   139 uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \
       
   140 NEON_VOP_BODY(vtype, n)
       
   141 
       
   142 #define NEON_VOP_ENV(name, vtype, n) \
       
   143 uint32_t HELPER(glue(neon_,name))(CPUState *env, uint32_t arg1, uint32_t arg2) \
       
   144 NEON_VOP_BODY(vtype, n)
       
   145 
       
   146 /* Pairwise operations.  */
       
   147 /* For 32-bit elements each segment only contains a single element, so
       
   148    the elementwise and pairwise operations are the same.  */
       
   149 #define NEON_PDO2 \
       
   150     NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \
       
   151     NEON_FN(vdest.v2, vsrc2.v1, vsrc2.v2);
       
   152 #define NEON_PDO4 \
       
   153     NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \
       
   154     NEON_FN(vdest.v2, vsrc1.v3, vsrc1.v4); \
       
   155     NEON_FN(vdest.v3, vsrc2.v1, vsrc2.v2); \
       
   156     NEON_FN(vdest.v4, vsrc2.v3, vsrc2.v4); \
       
   157 
       
   158 #define NEON_POP(name, vtype, n) \
       
   159 uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \
       
   160 { \
       
   161     uint32_t res; \
       
   162     vtype vsrc1; \
       
   163     vtype vsrc2; \
       
   164     vtype vdest; \
       
   165     NEON_UNPACK(vtype, vsrc1, arg1); \
       
   166     NEON_UNPACK(vtype, vsrc2, arg2); \
       
   167     NEON_PDO##n; \
       
   168     NEON_PACK(vtype, res, vdest); \
       
   169     return res; \
       
   170 }
       
   171 
       
   172 /* Unary operators.  */
       
   173 #define NEON_VOP1(name, vtype, n) \
       
   174 uint32_t HELPER(glue(neon_,name))(uint32_t arg) \
       
   175 { \
       
   176     vtype vsrc1; \
       
   177     vtype vdest; \
       
   178     NEON_UNPACK(vtype, vsrc1, arg); \
       
   179     NEON_DO##n; \
       
   180     NEON_PACK(vtype, arg, vdest); \
       
   181     return arg; \
       
   182 }
       
   183 
       
   184 
       
   185 #define NEON_USAT(dest, src1, src2, type) do { \
       
   186     uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \
       
   187     if (tmp != (type)tmp) { \
       
   188         SET_QC(); \
       
   189         dest = ~0; \
       
   190     } else { \
       
   191         dest = tmp; \
       
   192     }} while(0)
       
   193 #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
       
   194 NEON_VOP_ENV(qadd_u8, neon_u8, 4)
       
   195 #undef NEON_FN
       
   196 #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
       
   197 NEON_VOP_ENV(qadd_u16, neon_u16, 2)
       
   198 #undef NEON_FN
       
   199 #undef NEON_USAT
       
   200 
       
   201 #define NEON_SSAT(dest, src1, src2, type) do { \
       
   202     int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \
       
   203     if (tmp != (type)tmp) { \
       
   204         SET_QC(); \
       
   205         if (src2 > 0) { \
       
   206             tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \
       
   207         } else { \
       
   208             tmp = 1 << (sizeof(type) * 8 - 1); \
       
   209         } \
       
   210     } \
       
   211     dest = tmp; \
       
   212     } while(0)
       
   213 #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
       
   214 NEON_VOP_ENV(qadd_s8, neon_s8, 4)
       
   215 #undef NEON_FN
       
   216 #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
       
   217 NEON_VOP_ENV(qadd_s16, neon_s16, 2)
       
   218 #undef NEON_FN
       
   219 #undef NEON_SSAT
       
   220 
       
   221 #define NEON_USAT(dest, src1, src2, type) do { \
       
   222     uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \
       
   223     if (tmp != (type)tmp) { \
       
   224         SET_QC(); \
       
   225         dest = 0; \
       
   226     } else { \
       
   227         dest = tmp; \
       
   228     }} while(0)
       
   229 #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
       
   230 NEON_VOP_ENV(qsub_u8, neon_u8, 4)
       
   231 #undef NEON_FN
       
   232 #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
       
   233 NEON_VOP_ENV(qsub_u16, neon_u16, 2)
       
   234 #undef NEON_FN
       
   235 #undef NEON_USAT
       
   236 
       
   237 #define NEON_SSAT(dest, src1, src2, type) do { \
       
   238     int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \
       
   239     if (tmp != (type)tmp) { \
       
   240         SET_QC(); \
       
   241         if (src2 < 0) { \
       
   242             tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \
       
   243         } else { \
       
   244             tmp = 1 << (sizeof(type) * 8 - 1); \
       
   245         } \
       
   246     } \
       
   247     dest = tmp; \
       
   248     } while(0)
       
   249 #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
       
   250 NEON_VOP_ENV(qsub_s8, neon_s8, 4)
       
   251 #undef NEON_FN
       
   252 #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
       
   253 NEON_VOP_ENV(qsub_s16, neon_s16, 2)
       
   254 #undef NEON_FN
       
   255 #undef NEON_SSAT
       
   256 
       
   257 #define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1
       
   258 NEON_VOP(hadd_s8, neon_s8, 4)
       
   259 NEON_VOP(hadd_u8, neon_u8, 4)
       
   260 NEON_VOP(hadd_s16, neon_s16, 2)
       
   261 NEON_VOP(hadd_u16, neon_u16, 2)
       
   262 #undef NEON_FN
       
   263 
       
   264 int32_t HELPER(neon_hadd_s32)(int32_t src1, int32_t src2)
       
   265 {
       
   266     int32_t dest;
       
   267 
       
   268     dest = (src1 >> 1) + (src2 >> 1);
       
   269     if (src1 & src2 & 1)
       
   270         dest++;
       
   271     return dest;
       
   272 }
       
   273 
       
   274 uint32_t HELPER(neon_hadd_u32)(uint32_t src1, uint32_t src2)
       
   275 {
       
   276     uint32_t dest;
       
   277 
       
   278     dest = (src1 >> 1) + (src2 >> 1);
       
   279     if (src1 & src2 & 1)
       
   280         dest++;
       
   281     return dest;
       
   282 }
       
   283 
       
   284 #define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1
       
   285 NEON_VOP(rhadd_s8, neon_s8, 4)
       
   286 NEON_VOP(rhadd_u8, neon_u8, 4)
       
   287 NEON_VOP(rhadd_s16, neon_s16, 2)
       
   288 NEON_VOP(rhadd_u16, neon_u16, 2)
       
   289 #undef NEON_FN
       
   290 
       
   291 int32_t HELPER(neon_rhadd_s32)(int32_t src1, int32_t src2)
       
   292 {
       
   293     int32_t dest;
       
   294 
       
   295     dest = (src1 >> 1) + (src2 >> 1);
       
   296     if ((src1 | src2) & 1)
       
   297         dest++;
       
   298     return dest;
       
   299 }
       
   300 
       
   301 uint32_t HELPER(neon_rhadd_u32)(uint32_t src1, uint32_t src2)
       
   302 {
       
   303     uint32_t dest;
       
   304 
       
   305     dest = (src1 >> 1) + (src2 >> 1);
       
   306     if ((src1 | src2) & 1)
       
   307         dest++;
       
   308     return dest;
       
   309 }
       
   310 
       
   311 #define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1
       
   312 NEON_VOP(hsub_s8, neon_s8, 4)
       
   313 NEON_VOP(hsub_u8, neon_u8, 4)
       
   314 NEON_VOP(hsub_s16, neon_s16, 2)
       
   315 NEON_VOP(hsub_u16, neon_u16, 2)
       
   316 #undef NEON_FN
       
   317 
       
   318 int32_t HELPER(neon_hsub_s32)(int32_t src1, int32_t src2)
       
   319 {
       
   320     int32_t dest;
       
   321 
       
   322     dest = (src1 >> 1) - (src2 >> 1);
       
   323     if ((~src1) & src2 & 1)
       
   324         dest--;
       
   325     return dest;
       
   326 }
       
   327 
       
   328 uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2)
       
   329 {
       
   330     uint32_t dest;
       
   331 
       
   332     dest = (src1 >> 1) - (src2 >> 1);
       
   333     if ((~src1) & src2 & 1)
       
   334         dest--;
       
   335     return dest;
       
   336 }
       
   337 
       
   338 #define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0
       
   339 NEON_VOP(cgt_s8, neon_s8, 4)
       
   340 NEON_VOP(cgt_u8, neon_u8, 4)
       
   341 NEON_VOP(cgt_s16, neon_s16, 2)
       
   342 NEON_VOP(cgt_u16, neon_u16, 2)
       
   343 NEON_VOP(cgt_s32, neon_s32, 1)
       
   344 NEON_VOP(cgt_u32, neon_u32, 1)
       
   345 #undef NEON_FN
       
   346 
       
   347 #define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0
       
   348 NEON_VOP(cge_s8, neon_s8, 4)
       
   349 NEON_VOP(cge_u8, neon_u8, 4)
       
   350 NEON_VOP(cge_s16, neon_s16, 2)
       
   351 NEON_VOP(cge_u16, neon_u16, 2)
       
   352 NEON_VOP(cge_s32, neon_s32, 1)
       
   353 NEON_VOP(cge_u32, neon_u32, 1)
       
   354 #undef NEON_FN
       
   355 
       
   356 #define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2
       
   357 NEON_VOP(min_s8, neon_s8, 4)
       
   358 NEON_VOP(min_u8, neon_u8, 4)
       
   359 NEON_VOP(min_s16, neon_s16, 2)
       
   360 NEON_VOP(min_u16, neon_u16, 2)
       
   361 NEON_VOP(min_s32, neon_s32, 1)
       
   362 NEON_VOP(min_u32, neon_u32, 1)
       
   363 NEON_POP(pmin_s8, neon_s8, 4)
       
   364 NEON_POP(pmin_u8, neon_u8, 4)
       
   365 NEON_POP(pmin_s16, neon_s16, 2)
       
   366 NEON_POP(pmin_u16, neon_u16, 2)
       
   367 #undef NEON_FN
       
   368 
       
   369 #define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2
       
   370 NEON_VOP(max_s8, neon_s8, 4)
       
   371 NEON_VOP(max_u8, neon_u8, 4)
       
   372 NEON_VOP(max_s16, neon_s16, 2)
       
   373 NEON_VOP(max_u16, neon_u16, 2)
       
   374 NEON_VOP(max_s32, neon_s32, 1)
       
   375 NEON_VOP(max_u32, neon_u32, 1)
       
   376 NEON_POP(pmax_s8, neon_s8, 4)
       
   377 NEON_POP(pmax_u8, neon_u8, 4)
       
   378 NEON_POP(pmax_s16, neon_s16, 2)
       
   379 NEON_POP(pmax_u16, neon_u16, 2)
       
   380 #undef NEON_FN
       
   381 
       
   382 #define NEON_FN(dest, src1, src2) \
       
   383     dest = (src1 > src2) ? (src1 - src2) : (src2 - src1)
       
   384 NEON_VOP(abd_s8, neon_s8, 4)
       
   385 NEON_VOP(abd_u8, neon_u8, 4)
       
   386 NEON_VOP(abd_s16, neon_s16, 2)
       
   387 NEON_VOP(abd_u16, neon_u16, 2)
       
   388 NEON_VOP(abd_s32, neon_s32, 1)
       
   389 NEON_VOP(abd_u32, neon_u32, 1)
       
   390 #undef NEON_FN
       
   391 
       
   392 #define NEON_FN(dest, src1, src2) do { \
       
   393     int8_t tmp; \
       
   394     tmp = (int8_t)src2; \
       
   395     if (abs(tmp) >= sizeof(src1) * 8) { \
       
   396         dest = 0; \
       
   397     } else if (tmp < 0) { \
       
   398         dest = src1 >> -tmp; \
       
   399     } else { \
       
   400         dest = src1 << tmp; \
       
   401     }} while (0)
       
   402 NEON_VOP(shl_u8, neon_u8, 4)
       
   403 NEON_VOP(shl_u16, neon_u16, 2)
       
   404 NEON_VOP(shl_u32, neon_u32, 1)
       
   405 #undef NEON_FN
       
   406 
       
   407 uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop)
       
   408 {
       
   409     int8_t shift = (int8_t)shiftop;
       
   410     if (shift >= 64 || shift <= -64) {
       
   411         val = 0;
       
   412     } else if (shift < 0) {
       
   413         val >>= -shift;
       
   414     } else {
       
   415         val <<= shift;
       
   416     }
       
   417     return val;
       
   418 }
       
   419 
       
   420 #define NEON_FN(dest, src1, src2) do { \
       
   421     int8_t tmp; \
       
   422     tmp = (int8_t)src2; \
       
   423     if (tmp >= sizeof(src1) * 8) { \
       
   424         dest = 0; \
       
   425     } else if (tmp <= -(int) (sizeof(src1) * 8)) { \
       
   426         dest = src1 >> (sizeof(src1) * 8 - 1); \
       
   427     } else if (tmp < 0) { \
       
   428         dest = src1 >> -tmp; \
       
   429     } else { \
       
   430         dest = src1 << tmp; \
       
   431     }} while (0)
       
   432 NEON_VOP(shl_s8, neon_s8, 4)
       
   433 NEON_VOP(shl_s16, neon_s16, 2)
       
   434 NEON_VOP(shl_s32, neon_s32, 1)
       
   435 #undef NEON_FN
       
   436 
       
   437 uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
       
   438 {
       
   439     int8_t shift = (int8_t)shiftop;
       
   440     int64_t val = valop;
       
   441     if (shift >= 64) {
       
   442         val = 0;
       
   443     } else if (shift <= -64) {
       
   444         val >>= 63;
       
   445     } else if (shift < 0) {
       
   446         val >>= -shift;
       
   447     } else {
       
   448         val <<= shift;
       
   449     }
       
   450     return val;
       
   451 }
       
   452 
       
   453 #define NEON_FN(dest, src1, src2) do { \
       
   454     int8_t tmp; \
       
   455     tmp = (int8_t)src2; \
       
   456     if (tmp >= sizeof(src1) * 8) { \
       
   457         dest = 0; \
       
   458     } else if (tmp < -(int) (sizeof(src1) * 8)) { \
       
   459         dest = src1 >> (sizeof(src1) * 8 - 1); \
       
   460     } else if (tmp == -(int) (sizeof(src1) * 8)) { \
       
   461         dest = src1 >> (tmp - 1); \
       
   462         dest++; \
       
   463         src2 >>= 1; \
       
   464     } else if (tmp < 0) { \
       
   465         dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
       
   466     } else { \
       
   467         dest = src1 << tmp; \
       
   468     }} while (0)
       
   469 NEON_VOP(rshl_s8, neon_s8, 4)
       
   470 NEON_VOP(rshl_s16, neon_s16, 2)
       
   471 NEON_VOP(rshl_s32, neon_s32, 1)
       
   472 #undef NEON_FN
       
   473 
       
   474 uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
       
   475 {
       
   476     int8_t shift = (int8_t)shiftop;
       
   477     int64_t val = valop;
       
   478     if (shift >= 64) {
       
   479         val = 0;
       
   480     } else if (shift < -64) {
       
   481         val >>= 63;
       
   482     } else if (shift == -63) {
       
   483         val >>= 63;
       
   484         val++;
       
   485         val >>= 1;
       
   486     } else if (shift < 0) {
       
   487         val = (val + ((int64_t)1 << (-1 - shift))) >> -shift;
       
   488     } else {
       
   489         val <<= shift;
       
   490     }
       
   491     return val;
       
   492 }
       
   493 
       
   494 #define NEON_FN(dest, src1, src2) do { \
       
   495     int8_t tmp; \
       
   496     tmp = (int8_t)src2; \
       
   497     if (abs(tmp) >= sizeof(src1) * 8) { \
       
   498         dest = 0; \
       
   499     } else if (tmp == -(int) (sizeof(src1) * 8)) { \
       
   500         dest = src1 >> (tmp - 1); \
       
   501     } else if (tmp < 0) { \
       
   502         dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
       
   503     } else { \
       
   504         dest = src1 << tmp; \
       
   505     }} while (0)
       
   506 NEON_VOP(rshl_u8, neon_u8, 4)
       
   507 NEON_VOP(rshl_u16, neon_u16, 2)
       
   508 NEON_VOP(rshl_u32, neon_u32, 1)
       
   509 #undef NEON_FN
       
   510 
       
   511 uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
       
   512 {
       
   513     int8_t shift = (uint8_t)shiftop;
       
   514     if (shift >= 64 || shift < 64) {
       
   515         val = 0;
       
   516     } else if (shift == -64) {
       
   517         /* Rounding a 1-bit result just preserves that bit.  */
       
   518         val >>= 63;
       
   519     } if (shift < 0) {
       
   520         val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift;
       
   521         val >>= -shift;
       
   522     } else {
       
   523         val <<= shift;
       
   524     }
       
   525     return val;
       
   526 }
       
   527 
       
   528 #define NEON_FN(dest, src1, src2) do { \
       
   529     int8_t tmp; \
       
   530     tmp = (int8_t)src2; \
       
   531     if (tmp >= sizeof(src1) * 8) { \
       
   532         if (src1) { \
       
   533             SET_QC(); \
       
   534             dest = ~0; \
       
   535         } else { \
       
   536             dest = 0; \
       
   537         } \
       
   538     } else if (tmp <= -(int) (sizeof(src1) * 8)) { \
       
   539         dest = 0; \
       
   540     } else if (tmp < 0) { \
       
   541         dest = src1 >> -tmp; \
       
   542     } else { \
       
   543         dest = src1 << tmp; \
       
   544         if ((dest >> tmp) != src1) { \
       
   545             SET_QC(); \
       
   546             dest = ~0; \
       
   547         } \
       
   548     }} while (0)
       
   549 NEON_VOP_ENV(qshl_u8, neon_u8, 4)
       
   550 NEON_VOP_ENV(qshl_u16, neon_u16, 2)
       
   551 NEON_VOP_ENV(qshl_u32, neon_u32, 1)
       
   552 #undef NEON_FN
       
   553 
       
   554 uint64_t HELPER(neon_qshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
       
   555 {
       
   556     int8_t shift = (int8_t)shiftop;
       
   557     if (shift >= 64) {
       
   558         if (val) {
       
   559             val = ~(uint64_t)0;
       
   560             SET_QC();
       
   561         } else {
       
   562             val = 0;
       
   563         }
       
   564     } else if (shift <= -64) {
       
   565         val = 0;
       
   566     } else if (shift < 0) {
       
   567         val >>= -shift;
       
   568     } else {
       
   569         uint64_t tmp = val;
       
   570         val <<= shift;
       
   571         if ((val >> shift) != tmp) {
       
   572             SET_QC();
       
   573             val = ~(uint64_t)0;
       
   574         }
       
   575     }
       
   576     return val;
       
   577 }
       
   578 
       
   579 #define NEON_FN(dest, src1, src2) do { \
       
   580     int8_t tmp; \
       
   581     tmp = (int8_t)src2; \
       
   582     if (tmp >= sizeof(src1) * 8) { \
       
   583         if (src1) \
       
   584             SET_QC(); \
       
   585         dest = src1 >> 31; \
       
   586     } else if (tmp <= -(int) (sizeof(src1) * 8)) { \
       
   587         dest = src1 >> 31; \
       
   588     } else if (tmp < 0) { \
       
   589         dest = src1 >> -tmp; \
       
   590     } else { \
       
   591         dest = src1 << tmp; \
       
   592         if ((dest >> tmp) != src1) { \
       
   593             SET_QC(); \
       
   594             dest = src2 >> 31; \
       
   595         } \
       
   596     }} while (0)
       
   597 NEON_VOP_ENV(qshl_s8, neon_s8, 4)
       
   598 NEON_VOP_ENV(qshl_s16, neon_s16, 2)
       
   599 NEON_VOP_ENV(qshl_s32, neon_s32, 1)
       
   600 #undef NEON_FN
       
   601 
       
   602 uint64_t HELPER(neon_qshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
       
   603 {
       
   604     int8_t shift = (uint8_t)shiftop;
       
   605     int64_t val = valop;
       
   606     if (shift >= 64) {
       
   607         if (val) {
       
   608             SET_QC();
       
   609             val = (val >> 63) & ~SIGNBIT64;
       
   610         }
       
   611     } else if (shift <= 64) {
       
   612         val >>= 63;
       
   613     } else if (shift < 0) {
       
   614         val >>= -shift;
       
   615     } else {
       
   616         int64_t tmp = val;
       
   617         val <<= shift;
       
   618         if ((val >> shift) != tmp) {
       
   619             SET_QC();
       
   620             val = (tmp >> 63) ^ ~SIGNBIT64;
       
   621         }
       
   622     }
       
   623     return val;
       
   624 }
       
   625 
       
   626 
       
   627 /* FIXME: This is wrong.  */
       
   628 #define NEON_FN(dest, src1, src2) do { \
       
   629     int8_t tmp; \
       
   630     tmp = (int8_t)src2; \
       
   631     if (tmp < 0) { \
       
   632         dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
       
   633     } else { \
       
   634         dest = src1 << tmp; \
       
   635         if ((dest >> tmp) != src1) { \
       
   636             SET_QC(); \
       
   637             dest = ~0; \
       
   638         } \
       
   639     }} while (0)
       
   640 NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
       
   641 NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
       
   642 NEON_VOP_ENV(qrshl_u32, neon_u32, 1)
       
   643 #undef NEON_FN
       
   644 
       
   645 uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
       
   646 {
       
   647     int8_t shift = (int8_t)shiftop;
       
   648     if (shift < 0) {
       
   649         val = (val + (1 << (-1 - shift))) >> -shift;
       
   650     } else { \
       
   651         uint64_t tmp = val;
       
   652         val <<= shift;
       
   653         if ((val >> shift) != tmp) {
       
   654             SET_QC();
       
   655             val = ~0;
       
   656         }
       
   657     }
       
   658     return val;
       
   659 }
       
   660 
       
   661 #define NEON_FN(dest, src1, src2) do { \
       
   662     int8_t tmp; \
       
   663     tmp = (int8_t)src2; \
       
   664     if (tmp < 0) { \
       
   665         dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
       
   666     } else { \
       
   667         dest = src1 << tmp; \
       
   668         if ((dest >> tmp) != src1) { \
       
   669             SET_QC(); \
       
   670             dest = src1 >> 31; \
       
   671         } \
       
   672     }} while (0)
       
   673 NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
       
   674 NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
       
   675 NEON_VOP_ENV(qrshl_s32, neon_s32, 1)
       
   676 #undef NEON_FN
       
   677 
       
   678 uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
       
   679 {
       
   680     int8_t shift = (uint8_t)shiftop;
       
   681     int64_t val = valop;
       
   682 
       
   683     if (shift < 0) {
       
   684         val = (val + (1 << (-1 - shift))) >> -shift;
       
   685     } else {
       
   686         int64_t tmp = val;;
       
   687         val <<= shift;
       
   688         if ((val >> shift) != tmp) {
       
   689             SET_QC();
       
   690             val = tmp >> 31;
       
   691         }
       
   692     }
       
   693     return val;
       
   694 }
       
   695 
       
   696 uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b)
       
   697 {
       
   698     uint32_t mask;
       
   699     mask = (a ^ b) & 0x80808080u;
       
   700     a &= ~0x80808080u;
       
   701     b &= ~0x80808080u;
       
   702     return (a + b) ^ mask;
       
   703 }
       
   704 
       
   705 uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b)
       
   706 {
       
   707     uint32_t mask;
       
   708     mask = (a ^ b) & 0x80008000u;
       
   709     a &= ~0x80008000u;
       
   710     b &= ~0x80008000u;
       
   711     return (a + b) ^ mask;
       
   712 }
       
   713 
       
   714 #define NEON_FN(dest, src1, src2) dest = src1 + src2
       
   715 NEON_POP(padd_u8, neon_u8, 4)
       
   716 NEON_POP(padd_u16, neon_u16, 2)
       
   717 #undef NEON_FN
       
   718 
       
   719 #define NEON_FN(dest, src1, src2) dest = src1 - src2
       
   720 NEON_VOP(sub_u8, neon_u8, 4)
       
   721 NEON_VOP(sub_u16, neon_u16, 2)
       
   722 #undef NEON_FN
       
   723 
       
   724 #define NEON_FN(dest, src1, src2) dest = src1 * src2
       
   725 NEON_VOP(mul_u8, neon_u8, 4)
       
   726 NEON_VOP(mul_u16, neon_u16, 2)
       
   727 #undef NEON_FN
       
   728 
       
   729 /* Polynomial multiplication is like integer multiplication except the
       
   730    partial products are XORed, not added.  */
       
   731 uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2)
       
   732 {
       
   733     uint32_t mask;
       
   734     uint32_t result;
       
   735     result = 0;
       
   736     while (op1) {
       
   737         mask = 0;
       
   738         if (op1 & 1)
       
   739             mask |= 0xff;
       
   740         if (op1 & (1 << 8))
       
   741             mask |= (0xff << 8);
       
   742         if (op1 & (1 << 16))
       
   743             mask |= (0xff << 16);
       
   744         if (op1 & (1 << 24))
       
   745             mask |= (0xff << 24);
       
   746         result ^= op2 & mask;
       
   747         op1 = (op1 >> 1) & 0x7f7f7f7f;
       
   748         op2 = (op2 << 1) & 0xfefefefe;
       
   749     }
       
   750     return result;
       
   751 }
       
   752 
       
   753 #define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0
       
   754 NEON_VOP(tst_u8, neon_u8, 4)
       
   755 NEON_VOP(tst_u16, neon_u16, 2)
       
   756 NEON_VOP(tst_u32, neon_u32, 1)
       
   757 #undef NEON_FN
       
   758 
       
   759 #define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0
       
   760 NEON_VOP(ceq_u8, neon_u8, 4)
       
   761 NEON_VOP(ceq_u16, neon_u16, 2)
       
   762 NEON_VOP(ceq_u32, neon_u32, 1)
       
   763 #undef NEON_FN
       
   764 
       
   765 #define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src
       
   766 NEON_VOP1(abs_s8, neon_s8, 4)
       
   767 NEON_VOP1(abs_s16, neon_s16, 2)
       
   768 #undef NEON_FN
       
   769 
       
   770 /* Count Leading Sign/Zero Bits.  */
       
   771 static inline int do_clz8(uint8_t x)
       
   772 {
       
   773     int n;
       
   774     for (n = 8; x; n--)
       
   775         x >>= 1;
       
   776     return n;
       
   777 }
       
   778 
       
   779 static inline int do_clz16(uint16_t x)
       
   780 {
       
   781     int n;
       
   782     for (n = 16; x; n--)
       
   783         x >>= 1;
       
   784     return n;
       
   785 }
       
   786 
       
   787 #define NEON_FN(dest, src, dummy) dest = do_clz8(src)
       
   788 NEON_VOP1(clz_u8, neon_u8, 4)
       
   789 #undef NEON_FN
       
   790 
       
   791 #define NEON_FN(dest, src, dummy) dest = do_clz16(src)
       
   792 NEON_VOP1(clz_u16, neon_u16, 2)
       
   793 #undef NEON_FN
       
   794 
       
   795 #define NEON_FN(dest, src, dummy) dest = do_clz8((src < 0) ? ~src : src) - 1
       
   796 NEON_VOP1(cls_s8, neon_s8, 4)
       
   797 #undef NEON_FN
       
   798 
       
   799 #define NEON_FN(dest, src, dummy) dest = do_clz16((src < 0) ? ~src : src) - 1
       
   800 NEON_VOP1(cls_s16, neon_s16, 2)
       
   801 #undef NEON_FN
       
   802 
       
   803 uint32_t HELPER(neon_cls_s32)(uint32_t x)
       
   804 {
       
   805     int count;
       
   806     if ((int32_t)x < 0)
       
   807         x = ~x;
       
   808     for (count = 32; x; count--)
       
   809         x = x >> 1;
       
   810     return count - 1;
       
   811 }
       
   812 
       
   813 /* Bit count.  */
       
   814 uint32_t HELPER(neon_cnt_u8)(uint32_t x)
       
   815 {
       
   816     x = (x & 0x55555555) + ((x >>  1) & 0x55555555);
       
   817     x = (x & 0x33333333) + ((x >>  2) & 0x33333333);
       
   818     x = (x & 0x0f0f0f0f) + ((x >>  4) & 0x0f0f0f0f);
       
   819     return x;
       
   820 }
       
   821 
       
   822 #define NEON_QDMULH16(dest, src1, src2, round) do { \
       
   823     uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \
       
   824     if ((tmp ^ (tmp << 1)) & SIGNBIT) { \
       
   825         SET_QC(); \
       
   826         tmp = (tmp >> 31) ^ ~SIGNBIT; \
       
   827     } \
       
   828     tmp <<= 1; \
       
   829     if (round) { \
       
   830         int32_t old = tmp; \
       
   831         tmp += 1 << 15; \
       
   832         if ((int32_t)tmp < old) { \
       
   833             SET_QC(); \
       
   834             tmp = SIGNBIT - 1; \
       
   835         } \
       
   836     } \
       
   837     dest = tmp >> 16; \
       
   838     } while(0)
       
   839 #define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0)
       
   840 NEON_VOP_ENV(qdmulh_s16, neon_s16, 2)
       
   841 #undef NEON_FN
       
   842 #define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1)
       
   843 NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2)
       
   844 #undef NEON_FN
       
   845 #undef NEON_QDMULH16
       
   846 
       
   847 #define NEON_QDMULH32(dest, src1, src2, round) do { \
       
   848     uint64_t tmp = (int64_t)(int32_t) src1 * (int32_t) src2; \
       
   849     if ((tmp ^ (tmp << 1)) & SIGNBIT64) { \
       
   850         SET_QC(); \
       
   851         tmp = (tmp >> 63) ^ ~SIGNBIT64; \
       
   852     } else { \
       
   853         tmp <<= 1; \
       
   854     } \
       
   855     if (round) { \
       
   856         int64_t old = tmp; \
       
   857         tmp += (int64_t)1 << 31; \
       
   858         if ((int64_t)tmp < old) { \
       
   859             SET_QC(); \
       
   860             tmp = SIGNBIT64 - 1; \
       
   861         } \
       
   862     } \
       
   863     dest = tmp >> 32; \
       
   864     } while(0)
       
   865 #define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0)
       
   866 NEON_VOP_ENV(qdmulh_s32, neon_s32, 1)
       
   867 #undef NEON_FN
       
   868 #define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1)
       
   869 NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1)
       
   870 #undef NEON_FN
       
   871 #undef NEON_QDMULH32
       
   872 
       
   873 uint32_t HELPER(neon_narrow_u8)(uint64_t x)
       
   874 {
       
   875     return (x & 0xffu) | ((x >> 8) & 0xff00u) | ((x >> 16) & 0xff0000u)
       
   876            | ((x >> 24) & 0xff000000u);
       
   877 }
       
   878 
       
   879 uint32_t HELPER(neon_narrow_u16)(uint64_t x)
       
   880 {
       
   881     return (x & 0xffffu) | ((x >> 16) & 0xffff0000u);
       
   882 }
       
   883 
       
   884 uint32_t HELPER(neon_narrow_high_u8)(uint64_t x)
       
   885 {
       
   886     return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00)
       
   887             | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000);
       
   888 }
       
   889 
       
   890 uint32_t HELPER(neon_narrow_high_u16)(uint64_t x)
       
   891 {
       
   892     return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);
       
   893 }
       
   894 
       
   895 uint32_t HELPER(neon_narrow_round_high_u8)(uint64_t x)
       
   896 {
       
   897     x &= 0xff80ff80ff80ff80ull;
       
   898     x += 0x0080008000800080ull;
       
   899     return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00)
       
   900             | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000);
       
   901 }
       
   902 
       
   903 uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x)
       
   904 {
       
   905     x &= 0xffff8000ffff8000ull;
       
   906     x += 0x0000800000008000ull;
       
   907     return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);
       
   908 }
       
   909 
       
   910 uint32_t HELPER(neon_narrow_sat_u8)(CPUState *env, uint64_t x)
       
   911 {
       
   912     uint16_t s;
       
   913     uint8_t d;
       
   914     uint32_t res = 0;
       
   915 #define SAT8(n) \
       
   916     s = x >> n; \
       
   917     if (s > 0xff) { \
       
   918         d = 0xff; \
       
   919         SET_QC(); \
       
   920     } else  { \
       
   921         d = s; \
       
   922     } \
       
   923     res |= (uint32_t)d << (n / 2);
       
   924 
       
   925     SAT8(0);
       
   926     SAT8(16);
       
   927     SAT8(32);
       
   928     SAT8(48);
       
   929 #undef SAT8
       
   930     return res;
       
   931 }
       
   932 
       
   933 uint32_t HELPER(neon_narrow_sat_s8)(CPUState *env, uint64_t x)
       
   934 {
       
   935     int16_t s;
       
   936     uint8_t d;
       
   937     uint32_t res = 0;
       
   938 #define SAT8(n) \
       
   939     s = x >> n; \
       
   940     if (s != (int8_t)s) { \
       
   941         d = (s >> 15) ^ 0x7f; \
       
   942         SET_QC(); \
       
   943     } else  { \
       
   944         d = s; \
       
   945     } \
       
   946     res |= (uint32_t)d << (n / 2);
       
   947 
       
   948     SAT8(0);
       
   949     SAT8(16);
       
   950     SAT8(32);
       
   951     SAT8(48);
       
   952 #undef SAT8
       
   953     return res;
       
   954 }
       
   955 
       
   956 uint32_t HELPER(neon_narrow_sat_u16)(CPUState *env, uint64_t x)
       
   957 {
       
   958     uint32_t high;
       
   959     uint32_t low;
       
   960     low = x;
       
   961     if (low > 0xffff) {
       
   962         low = 0xffff;
       
   963         SET_QC();
       
   964     }
       
   965     high = x >> 32;
       
   966     if (high > 0xffff) {
       
   967         high = 0xffff;
       
   968         SET_QC();
       
   969     }
       
   970     return low | (high << 16);
       
   971 }
       
   972 
       
   973 uint32_t HELPER(neon_narrow_sat_s16)(CPUState *env, uint64_t x)
       
   974 {
       
   975     int32_t low;
       
   976     int32_t high;
       
   977     low = x;
       
   978     if (low != (int16_t)low) {
       
   979         low = (low >> 31) ^ 0x7fff;
       
   980         SET_QC();
       
   981     }
       
   982     high = x >> 32;
       
   983     if (high != (int16_t)high) {
       
   984         high = (high >> 31) ^ 0x7fff;
       
   985         SET_QC();
       
   986     }
       
   987     return (uint16_t)low | (high << 16);
       
   988 }
       
   989 
       
   990 uint32_t HELPER(neon_narrow_sat_u32)(CPUState *env, uint64_t x)
       
   991 {
       
   992     if (x > 0xffffffffu) {
       
   993         SET_QC();
       
   994         return 0xffffffffu;
       
   995     }
       
   996     return x;
       
   997 }
       
   998 
       
   999 uint32_t HELPER(neon_narrow_sat_s32)(CPUState *env, uint64_t x)
       
  1000 {
       
  1001     if ((int64_t)x != (int32_t)x) {
       
  1002         SET_QC();
       
  1003         return (x >> 63) ^ 0x7fffffff;
       
  1004     }
       
  1005     return x;
       
  1006 }
       
  1007 
       
  1008 uint64_t HELPER(neon_widen_u8)(uint32_t x)
       
  1009 {
       
  1010     uint64_t tmp;
       
  1011     uint64_t ret;
       
  1012     ret = (uint8_t)x;
       
  1013     tmp = (uint8_t)(x >> 8);
       
  1014     ret |= tmp << 16;
       
  1015     tmp = (uint8_t)(x >> 16);
       
  1016     ret |= tmp << 32;
       
  1017     tmp = (uint8_t)(x >> 24);
       
  1018     ret |= tmp << 48;
       
  1019     return ret;
       
  1020 }
       
  1021 
       
  1022 uint64_t HELPER(neon_widen_s8)(uint32_t x)
       
  1023 {
       
  1024     uint64_t tmp;
       
  1025     uint64_t ret;
       
  1026     ret = (uint16_t)(int8_t)x;
       
  1027     tmp = (uint16_t)(int8_t)(x >> 8);
       
  1028     ret |= tmp << 16;
       
  1029     tmp = (uint16_t)(int8_t)(x >> 16);
       
  1030     ret |= tmp << 32;
       
  1031     tmp = (uint16_t)(int8_t)(x >> 24);
       
  1032     ret |= tmp << 48;
       
  1033     return ret;
       
  1034 }
       
  1035 
       
  1036 uint64_t HELPER(neon_widen_u16)(uint32_t x)
       
  1037 {
       
  1038     uint64_t high = (uint16_t)(x >> 16);
       
  1039     return ((uint16_t)x) | (high << 32);
       
  1040 }
       
  1041 
       
  1042 uint64_t HELPER(neon_widen_s16)(uint32_t x)
       
  1043 {
       
  1044     uint64_t high = (int16_t)(x >> 16);
       
  1045     return ((uint32_t)(int16_t)x) | (high << 32);
       
  1046 }
       
  1047 
       
  1048 uint64_t HELPER(neon_addl_u16)(uint64_t a, uint64_t b)
       
  1049 {
       
  1050     uint64_t mask;
       
  1051     mask = (a ^ b) & 0x8000800080008000ull;
       
  1052     a &= ~0x8000800080008000ull;
       
  1053     b &= ~0x8000800080008000ull;
       
  1054     return (a + b) ^ mask;
       
  1055 }
       
  1056 
       
  1057 uint64_t HELPER(neon_addl_u32)(uint64_t a, uint64_t b)
       
  1058 {
       
  1059     uint64_t mask;
       
  1060     mask = (a ^ b) & 0x8000000080000000ull;
       
  1061     a &= ~0x8000000080000000ull;
       
  1062     b &= ~0x8000000080000000ull;
       
  1063     return (a + b) ^ mask;
       
  1064 }
       
  1065 
       
  1066 uint64_t HELPER(neon_paddl_u16)(uint64_t a, uint64_t b)
       
  1067 {
       
  1068     uint64_t tmp;
       
  1069     uint64_t tmp2;
       
  1070 
       
  1071     tmp = a & 0x0000ffff0000ffffull;
       
  1072     tmp += (a >> 16) & 0x0000ffff0000ffffull;
       
  1073     tmp2 = b & 0xffff0000ffff0000ull;
       
  1074     tmp2 += (b << 16) & 0xffff0000ffff0000ull;
       
  1075     return    ( tmp         & 0xffff)
       
  1076             | ((tmp  >> 16) & 0xffff0000ull)
       
  1077             | ((tmp2 << 16) & 0xffff00000000ull)
       
  1078             | ( tmp2        & 0xffff000000000000ull);
       
  1079 }
       
  1080 
       
  1081 uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b)
       
  1082 {
       
  1083     uint32_t low = a + (a >> 32);
       
  1084     uint32_t high = b + (b >> 32);
       
  1085     return low + ((uint64_t)high << 32);
       
  1086 }
       
  1087 
       
  1088 uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b)
       
  1089 {
       
  1090     uint64_t mask;
       
  1091     mask = (a ^ ~b) & 0x8000800080008000ull;
       
  1092     a |= 0x8000800080008000ull;
       
  1093     b &= ~0x8000800080008000ull;
       
  1094     return (a - b) ^ mask;
       
  1095 }
       
  1096 
       
  1097 uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b)
       
  1098 {
       
  1099     uint64_t mask;
       
  1100     mask = (a ^ ~b) & 0x8000000080000000ull;
       
  1101     a |= 0x8000000080000000ull;
       
  1102     b &= ~0x8000000080000000ull;
       
  1103     return (a - b) ^ mask;
       
  1104 }
       
  1105 
       
  1106 uint64_t HELPER(neon_addl_saturate_s32)(CPUState *env, uint64_t a, uint64_t b)
       
  1107 {
       
  1108     uint32_t x, y;
       
  1109     uint32_t low, high;
       
  1110 
       
  1111     x = a;
       
  1112     y = b;
       
  1113     low = x + y;
       
  1114     if (((low ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
       
  1115         SET_QC();
       
  1116         low = ((int32_t)x >> 31) ^ ~SIGNBIT;
       
  1117     }
       
  1118     x = a >> 32;
       
  1119     y = b >> 32;
       
  1120     high = x + y;
       
  1121     if (((high ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
       
  1122         SET_QC();
       
  1123         high = ((int32_t)x >> 31) ^ ~SIGNBIT;
       
  1124     }
       
  1125     return low | ((uint64_t)high << 32);
       
  1126 }
       
  1127 
       
  1128 uint64_t HELPER(neon_addl_saturate_s64)(CPUState *env, uint64_t a, uint64_t b)
       
  1129 {
       
  1130     uint64_t result;
       
  1131 
       
  1132     result = a + b;
       
  1133     if (((result ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) {
       
  1134         SET_QC();
       
  1135         result = ((int64_t)a >> 63) ^ ~SIGNBIT64;
       
  1136     }
       
  1137     return result;
       
  1138 }
       
  1139 
       
  1140 #define DO_ABD(dest, x, y, type) do { \
       
  1141     type tmp_x = x; \
       
  1142     type tmp_y = y; \
       
  1143     dest = ((tmp_x > tmp_y) ? tmp_x - tmp_y : tmp_y - tmp_x); \
       
  1144     } while(0)
       
  1145 
       
  1146 uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b)
       
  1147 {
       
  1148     uint64_t tmp;
       
  1149     uint64_t result;
       
  1150     DO_ABD(result, a, b, uint8_t);
       
  1151     DO_ABD(tmp, a >> 8, b >> 8, uint8_t);
       
  1152     result |= tmp << 16;
       
  1153     DO_ABD(tmp, a >> 16, b >> 16, uint8_t);
       
  1154     result |= tmp << 32;
       
  1155     DO_ABD(tmp, a >> 24, b >> 24, uint8_t);
       
  1156     result |= tmp << 48;
       
  1157     return result;
       
  1158 }
       
  1159 
       
  1160 uint64_t HELPER(neon_abdl_s16)(uint32_t a, uint32_t b)
       
  1161 {
       
  1162     uint64_t tmp;
       
  1163     uint64_t result;
       
  1164     DO_ABD(result, a, b, int8_t);
       
  1165     DO_ABD(tmp, a >> 8, b >> 8, int8_t);
       
  1166     result |= tmp << 16;
       
  1167     DO_ABD(tmp, a >> 16, b >> 16, int8_t);
       
  1168     result |= tmp << 32;
       
  1169     DO_ABD(tmp, a >> 24, b >> 24, int8_t);
       
  1170     result |= tmp << 48;
       
  1171     return result;
       
  1172 }
       
  1173 
       
  1174 uint64_t HELPER(neon_abdl_u32)(uint32_t a, uint32_t b)
       
  1175 {
       
  1176     uint64_t tmp;
       
  1177     uint64_t result;
       
  1178     DO_ABD(result, a, b, uint16_t);
       
  1179     DO_ABD(tmp, a >> 16, b >> 16, uint16_t);
       
  1180     return result | (tmp << 32);
       
  1181 }
       
  1182 
       
  1183 uint64_t HELPER(neon_abdl_s32)(uint32_t a, uint32_t b)
       
  1184 {
       
  1185     uint64_t tmp;
       
  1186     uint64_t result;
       
  1187     DO_ABD(result, a, b, int16_t);
       
  1188     DO_ABD(tmp, a >> 16, b >> 16, int16_t);
       
  1189     return result | (tmp << 32);
       
  1190 }
       
  1191 
       
  1192 uint64_t HELPER(neon_abdl_u64)(uint32_t a, uint32_t b)
       
  1193 {
       
  1194     uint64_t result;
       
  1195     DO_ABD(result, a, b, uint32_t);
       
  1196     return result;
       
  1197 }
       
  1198 
       
  1199 uint64_t HELPER(neon_abdl_s64)(uint32_t a, uint32_t b)
       
  1200 {
       
  1201     uint64_t result;
       
  1202     DO_ABD(result, a, b, int32_t);
       
  1203     return result;
       
  1204 }
       
  1205 #undef DO_ABD
       
  1206 
       
  1207 /* Widening multiply. Named type is the source type.  */
       
  1208 #define DO_MULL(dest, x, y, type1, type2) do { \
       
  1209     type1 tmp_x = x; \
       
  1210     type1 tmp_y = y; \
       
  1211     dest = (type2)((type2)tmp_x * (type2)tmp_y); \
       
  1212     } while(0)
       
  1213 
       
  1214 uint64_t HELPER(neon_mull_u8)(uint32_t a, uint32_t b)
       
  1215 {
       
  1216     uint64_t tmp;
       
  1217     uint64_t result;
       
  1218 
       
  1219     DO_MULL(result, a, b, uint8_t, uint16_t);
       
  1220     DO_MULL(tmp, a >> 8, b >> 8, uint8_t, uint16_t);
       
  1221     result |= tmp << 16;
       
  1222     DO_MULL(tmp, a >> 16, b >> 16, uint8_t, uint16_t);
       
  1223     result |= tmp << 32;
       
  1224     DO_MULL(tmp, a >> 24, b >> 24, uint8_t, uint16_t);
       
  1225     result |= tmp << 48;
       
  1226     return result;
       
  1227 }
       
  1228 
       
  1229 uint64_t HELPER(neon_mull_s8)(uint32_t a, uint32_t b)
       
  1230 {
       
  1231     uint64_t tmp;
       
  1232     uint64_t result;
       
  1233 
       
  1234     DO_MULL(result, a, b, int8_t, uint16_t);
       
  1235     DO_MULL(tmp, a >> 8, b >> 8, int8_t, uint16_t);
       
  1236     result |= tmp << 16;
       
  1237     DO_MULL(tmp, a >> 16, b >> 16, int8_t, uint16_t);
       
  1238     result |= tmp << 32;
       
  1239     DO_MULL(tmp, a >> 24, b >> 24, int8_t, uint16_t);
       
  1240     result |= tmp << 48;
       
  1241     return result;
       
  1242 }
       
  1243 
       
  1244 uint64_t HELPER(neon_mull_u16)(uint32_t a, uint32_t b)
       
  1245 {
       
  1246     uint64_t tmp;
       
  1247     uint64_t result;
       
  1248 
       
  1249     DO_MULL(result, a, b, uint16_t, uint32_t);
       
  1250     DO_MULL(tmp, a >> 16, b >> 16, uint16_t, uint32_t);
       
  1251     return result | (tmp << 32);
       
  1252 }
       
  1253 
       
  1254 uint64_t HELPER(neon_mull_s16)(uint32_t a, uint32_t b)
       
  1255 {
       
  1256     uint64_t tmp;
       
  1257     uint64_t result;
       
  1258 
       
  1259     DO_MULL(result, a, b, int16_t, uint32_t);
       
  1260     DO_MULL(tmp, a >> 16, b >> 16, int16_t, uint32_t);
       
  1261     return result | (tmp << 32);
       
  1262 }
       
  1263 
       
  1264 uint64_t HELPER(neon_negl_u16)(uint64_t x)
       
  1265 {
       
  1266     uint16_t tmp;
       
  1267     uint64_t result;
       
  1268     result = (uint16_t)-x;
       
  1269     tmp = -(x >> 16);
       
  1270     result |= (uint64_t)tmp << 16;
       
  1271     tmp = -(x >> 32);
       
  1272     result |= (uint64_t)tmp << 32;
       
  1273     tmp = -(x >> 48);
       
  1274     result |= (uint64_t)tmp << 48;
       
  1275     return result;
       
  1276 }
       
  1277 
       
  1278 #include <stdio.h>
       
  1279 uint64_t HELPER(neon_negl_u32)(uint64_t x)
       
  1280 {
       
  1281     uint32_t low = -x;
       
  1282     uint32_t high = -(x >> 32);
       
  1283     return low | ((uint64_t)high << 32);
       
  1284 }
       
  1285 
       
  1286 /* FIXME:  There should be a native op for this.  */
       
  1287 uint64_t HELPER(neon_negl_u64)(uint64_t x)
       
  1288 {
       
  1289     return -x;
       
  1290 }
       
  1291 
       
  1292 /* Saturnating sign manuipulation.  */
       
  1293 /* ??? Make these use NEON_VOP1 */
       
  1294 #define DO_QABS8(x) do { \
       
  1295     if (x == (int8_t)0x80) { \
       
  1296         x = 0x7f; \
       
  1297         SET_QC(); \
       
  1298     } else if (x < 0) { \
       
  1299         x = -x; \
       
  1300     }} while (0)
       
  1301 uint32_t HELPER(neon_qabs_s8)(CPUState *env, uint32_t x)
       
  1302 {
       
  1303     neon_s8 vec;
       
  1304     NEON_UNPACK(neon_s8, vec, x);
       
  1305     DO_QABS8(vec.v1);
       
  1306     DO_QABS8(vec.v2);
       
  1307     DO_QABS8(vec.v3);
       
  1308     DO_QABS8(vec.v4);
       
  1309     NEON_PACK(neon_s8, x, vec);
       
  1310     return x;
       
  1311 }
       
  1312 #undef DO_QABS8
       
  1313 
       
  1314 #define DO_QNEG8(x) do { \
       
  1315     if (x == (int8_t)0x80) { \
       
  1316         x = 0x7f; \
       
  1317         SET_QC(); \
       
  1318     } else { \
       
  1319         x = -x; \
       
  1320     }} while (0)
       
  1321 uint32_t HELPER(neon_qneg_s8)(CPUState *env, uint32_t x)
       
  1322 {
       
  1323     neon_s8 vec;
       
  1324     NEON_UNPACK(neon_s8, vec, x);
       
  1325     DO_QNEG8(vec.v1);
       
  1326     DO_QNEG8(vec.v2);
       
  1327     DO_QNEG8(vec.v3);
       
  1328     DO_QNEG8(vec.v4);
       
  1329     NEON_PACK(neon_s8, x, vec);
       
  1330     return x;
       
  1331 }
       
  1332 #undef DO_QNEG8
       
  1333 
       
  1334 #define DO_QABS16(x) do { \
       
  1335     if (x == (int16_t)0x8000) { \
       
  1336         x = 0x7fff; \
       
  1337         SET_QC(); \
       
  1338     } else if (x < 0) { \
       
  1339         x = -x; \
       
  1340     }} while (0)
       
  1341 uint32_t HELPER(neon_qabs_s16)(CPUState *env, uint32_t x)
       
  1342 {
       
  1343     neon_s16 vec;
       
  1344     NEON_UNPACK(neon_s16, vec, x);
       
  1345     DO_QABS16(vec.v1);
       
  1346     DO_QABS16(vec.v2);
       
  1347     NEON_PACK(neon_s16, x, vec);
       
  1348     return x;
       
  1349 }
       
  1350 #undef DO_QABS16
       
  1351 
       
  1352 #define DO_QNEG16(x) do { \
       
  1353     if (x == (int16_t)0x8000) { \
       
  1354         x = 0x7fff; \
       
  1355         SET_QC(); \
       
  1356     } else { \
       
  1357         x = -x; \
       
  1358     }} while (0)
       
  1359 uint32_t HELPER(neon_qneg_s16)(CPUState *env, uint32_t x)
       
  1360 {
       
  1361     neon_s16 vec;
       
  1362     NEON_UNPACK(neon_s16, vec, x);
       
  1363     DO_QNEG16(vec.v1);
       
  1364     DO_QNEG16(vec.v2);
       
  1365     NEON_PACK(neon_s16, x, vec);
       
  1366     return x;
       
  1367 }
       
  1368 #undef DO_QNEG16
       
  1369 
       
  1370 uint32_t HELPER(neon_qabs_s32)(CPUState *env, uint32_t x)
       
  1371 {
       
  1372     if (x == SIGNBIT) {
       
  1373         SET_QC();
       
  1374         x = ~SIGNBIT;
       
  1375     } else if ((int32_t)x < 0) {
       
  1376         x = -x;
       
  1377     }
       
  1378     return x;
       
  1379 }
       
  1380 
       
  1381 uint32_t HELPER(neon_qneg_s32)(CPUState *env, uint32_t x)
       
  1382 {
       
  1383     if (x == SIGNBIT) {
       
  1384         SET_QC();
       
  1385         x = ~SIGNBIT;
       
  1386     } else {
       
  1387         x = -x;
       
  1388     }
       
  1389     return x;
       
  1390 }
       
  1391 
       
  1392 /* NEON Float helpers.  */
       
  1393 uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b)
       
  1394 {
       
  1395     float32 f0 = vfp_itos(a);
       
  1396     float32 f1 = vfp_itos(b);
       
  1397     return (float32_compare_quiet(f0, f1, NFS) == -1) ? a : b;
       
  1398 }
       
  1399 
       
  1400 uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b)
       
  1401 {
       
  1402     float32 f0 = vfp_itos(a);
       
  1403     float32 f1 = vfp_itos(b);
       
  1404     return (float32_compare_quiet(f0, f1, NFS) == 1) ? a : b;
       
  1405 }
       
  1406 
       
  1407 uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b)
       
  1408 {
       
  1409     float32 f0 = vfp_itos(a);
       
  1410     float32 f1 = vfp_itos(b);
       
  1411     return vfp_stoi((float32_compare_quiet(f0, f1, NFS) == 1)
       
  1412                     ? float32_sub(f0, f1, NFS)
       
  1413                     : float32_sub(f1, f0, NFS));
       
  1414 }
       
  1415 
       
  1416 uint32_t HELPER(neon_add_f32)(uint32_t a, uint32_t b)
       
  1417 {
       
  1418     return vfp_stoi(float32_add(vfp_itos(a), vfp_itos(b), NFS));
       
  1419 }
       
  1420 
       
  1421 uint32_t HELPER(neon_sub_f32)(uint32_t a, uint32_t b)
       
  1422 {
       
  1423     return vfp_stoi(float32_sub(vfp_itos(a), vfp_itos(b), NFS));
       
  1424 }
       
  1425 
       
  1426 uint32_t HELPER(neon_mul_f32)(uint32_t a, uint32_t b)
       
  1427 {
       
  1428     return vfp_stoi(float32_mul(vfp_itos(a), vfp_itos(b), NFS));
       
  1429 }
       
  1430 
       
  1431 /* Floating point comparisons produce an integer result.  */
       
  1432 #define NEON_VOP_FCMP(name, cmp) \
       
  1433 uint32_t HELPER(neon_##name)(uint32_t a, uint32_t b) \
       
  1434 { \
       
  1435     if (float32_compare_quiet(vfp_itos(a), vfp_itos(b), NFS) cmp 0) \
       
  1436         return ~0; \
       
  1437     else \
       
  1438         return 0; \
       
  1439 }
       
  1440 
       
  1441 NEON_VOP_FCMP(ceq_f32, ==)
       
  1442 NEON_VOP_FCMP(cge_f32, >=)
       
  1443 NEON_VOP_FCMP(cgt_f32, >)
       
  1444 
       
  1445 uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b)
       
  1446 {
       
  1447     float32 f0 = float32_abs(vfp_itos(a));
       
  1448     float32 f1 = float32_abs(vfp_itos(b));
       
  1449     return (float32_compare_quiet(f0, f1,NFS) >= 0) ? ~0 : 0;
       
  1450 }
       
  1451 
       
  1452 uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b)
       
  1453 {
       
  1454     float32 f0 = float32_abs(vfp_itos(a));
       
  1455     float32 f1 = float32_abs(vfp_itos(b));
       
  1456     return (float32_compare_quiet(f0, f1, NFS) > 0) ? ~0 : 0;
       
  1457 }