83 const uint16x4_t alpha16_low = vdup_lane_u16(vget_low_u16(src16), 3); |
86 const uint16x4_t alpha16_low = vdup_lane_u16(vget_low_u16(src16), 3); |
84 |
87 |
85 const uint16x8_t alpha16 = vsubq_u16(full, vcombine_u16(alpha16_low, alpha16_high)); |
88 const uint16x8_t alpha16 = vsubq_u16(full, vcombine_u16(alpha16_low, alpha16_high)); |
86 |
89 |
87 return vaddq_u16(src16, qvbyte_mul_u16(dst16, alpha16, half)); |
90 return vaddq_u16(src16, qvbyte_mul_u16(dst16, alpha16, half)); |
|
91 } |
|
92 |
|
93 extern "C" void |
|
94 pixman_composite_over_8888_0565_asm_neon (int32_t w, |
|
95 int32_t h, |
|
96 uint16_t *dst, |
|
97 int32_t dst_stride, |
|
98 uint32_t *src, |
|
99 int32_t src_stride); |
|
100 |
|
101 extern "C" void |
|
102 pixman_composite_over_8888_8888_asm_neon (int32_t w, |
|
103 int32_t h, |
|
104 uint32_t *dst, |
|
105 int32_t dst_stride, |
|
106 uint32_t *src, |
|
107 int32_t src_stride); |
|
108 |
|
109 extern "C" void |
|
110 pixman_composite_src_0565_8888_asm_neon (int32_t w, |
|
111 int32_t h, |
|
112 uint32_t *dst, |
|
113 int32_t dst_stride, |
|
114 uint16_t *src, |
|
115 int32_t src_stride); |
|
116 |
|
117 extern "C" void |
|
118 pixman_composite_over_n_8_0565_asm_neon (int32_t w, |
|
119 int32_t h, |
|
120 uint16_t *dst, |
|
121 int32_t dst_stride, |
|
122 uint32_t src, |
|
123 int32_t unused, |
|
124 uint8_t *mask, |
|
125 int32_t mask_stride); |
|
126 |
|
127 extern "C" void |
|
128 pixman_composite_scanline_over_asm_neon (int32_t w, |
|
129 const uint32_t *dst, |
|
130 const uint32_t *src); |
|
131 |
|
132 // qblendfunctions.cpp |
|
133 void qt_blend_argb32_on_rgb16_const_alpha(uchar *destPixels, int dbpl, |
|
134 const uchar *srcPixels, int sbpl, |
|
135 int w, int h, |
|
136 int const_alpha); |
|
137 |
|
138 void qt_blend_rgb16_on_argb32_neon(uchar *destPixels, int dbpl, |
|
139 const uchar *srcPixels, int sbpl, |
|
140 int w, int h, |
|
141 int const_alpha) |
|
142 { |
|
143 dbpl /= 4; |
|
144 sbpl /= 2; |
|
145 |
|
146 quint32 *dst = (quint32 *) destPixels; |
|
147 quint16 *src = (quint16 *) srcPixels; |
|
148 |
|
149 if (const_alpha != 256) { |
|
150 quint8 a = (255 * const_alpha) >> 8; |
|
151 quint8 ia = 255 - a; |
|
152 |
|
153 while (h--) { |
|
154 for (int x=0; x<w; ++x) |
|
155 dst[x] = INTERPOLATE_PIXEL_255(qt_colorConvert(src[x], dst[x]), a, dst[x], ia); |
|
156 dst += dbpl; |
|
157 src += sbpl; |
|
158 } |
|
159 return; |
|
160 } |
|
161 |
|
162 pixman_composite_src_0565_8888_asm_neon(w, h, dst, dbpl, src, sbpl); |
|
163 } |
|
164 |
|
165 extern "C" void blend_8_pixels_argb32_on_rgb16_neon(quint16 *dst, const quint32 *src, int const_alpha); |
|
166 |
|
167 void qt_blend_argb32_on_rgb16_neon(uchar *destPixels, int dbpl, |
|
168 const uchar *srcPixels, int sbpl, |
|
169 int w, int h, |
|
170 int const_alpha) |
|
171 { |
|
172 quint16 *dst = (quint16 *) destPixels; |
|
173 quint32 *src = (quint32 *) srcPixels; |
|
174 |
|
175 if (const_alpha != 256) { |
|
176 for (int y=0; y<h; ++y) { |
|
177 int i = 0; |
|
178 for (; i < w-7; i += 8) |
|
179 blend_8_pixels_argb32_on_rgb16_neon(&dst[i], &src[i], const_alpha); |
|
180 |
|
181 if (i < w) { |
|
182 int tail = w - i; |
|
183 |
|
184 quint16 dstBuffer[8]; |
|
185 quint32 srcBuffer[8]; |
|
186 |
|
187 for (int j = 0; j < tail; ++j) { |
|
188 dstBuffer[j] = dst[i + j]; |
|
189 srcBuffer[j] = src[i + j]; |
|
190 } |
|
191 |
|
192 blend_8_pixels_argb32_on_rgb16_neon(dstBuffer, srcBuffer, const_alpha); |
|
193 |
|
194 for (int j = 0; j < tail; ++j) { |
|
195 dst[i + j] = dstBuffer[j]; |
|
196 src[i + j] = srcBuffer[j]; |
|
197 } |
|
198 } |
|
199 |
|
200 dst = (quint16 *)(((uchar *) dst) + dbpl); |
|
201 src = (quint32 *)(((uchar *) src) + sbpl); |
|
202 } |
|
203 return; |
|
204 } |
|
205 |
|
206 pixman_composite_over_8888_0565_asm_neon(w, h, dst, dbpl / 2, src, sbpl / 4); |
|
207 } |
|
208 |
|
209 void qt_blend_argb32_on_argb32_scanline_neon(uint *dest, const uint *src, int length, uint const_alpha) |
|
210 { |
|
211 if (const_alpha == 255) { |
|
212 pixman_composite_scanline_over_asm_neon(length, dest, src); |
|
213 } else { |
|
214 qt_blend_argb32_on_argb32_neon((uchar *)dest, 4 * length, (uchar *)src, 4 * length, length, 1, (const_alpha * 256) / 255); |
|
215 } |
88 } |
216 } |
89 |
217 |
90 void qt_blend_argb32_on_argb32_neon(uchar *destPixels, int dbpl, |
218 void qt_blend_argb32_on_argb32_neon(uchar *destPixels, int dbpl, |
91 const uchar *srcPixels, int sbpl, |
219 const uchar *srcPixels, int sbpl, |
92 int w, int h, |
220 int w, int h, |
95 const uint *src = (const uint *) srcPixels; |
223 const uint *src = (const uint *) srcPixels; |
96 uint *dst = (uint *) destPixels; |
224 uint *dst = (uint *) destPixels; |
97 uint16x8_t half = vdupq_n_u16(0x80); |
225 uint16x8_t half = vdupq_n_u16(0x80); |
98 uint16x8_t full = vdupq_n_u16(0xff); |
226 uint16x8_t full = vdupq_n_u16(0xff); |
99 if (const_alpha == 256) { |
227 if (const_alpha == 256) { |
100 for (int y = 0; y < h; ++y) { |
228 pixman_composite_over_8888_8888_asm_neon(w, h, (uint32_t *)destPixels, dbpl / 4, (uint32_t *)srcPixels, sbpl / 4); |
101 int x = 0; |
|
102 for (; x < w-3; x += 4) { |
|
103 uint32x4_t src32 = vld1q_u32((uint32_t *)&src[x]); |
|
104 if ((src[x] & src[x+1] & src[x+2] & src[x+3]) >= 0xff000000) { |
|
105 // all opaque |
|
106 vst1q_u32((uint32_t *)&dst[x], src32); |
|
107 } else if (src[x] | src[x+1] | src[x+2] | src[x+3]) { |
|
108 uint32x4_t dst32 = vld1q_u32((uint32_t *)&dst[x]); |
|
109 |
|
110 const uint8x16_t src8 = vreinterpretq_u8_u32(src32); |
|
111 const uint8x16_t dst8 = vreinterpretq_u8_u32(dst32); |
|
112 |
|
113 const uint8x8_t src8_low = vget_low_u8(src8); |
|
114 const uint8x8_t dst8_low = vget_low_u8(dst8); |
|
115 |
|
116 const uint8x8_t src8_high = vget_high_u8(src8); |
|
117 const uint8x8_t dst8_high = vget_high_u8(dst8); |
|
118 |
|
119 const uint16x8_t src16_low = vmovl_u8(src8_low); |
|
120 const uint16x8_t dst16_low = vmovl_u8(dst8_low); |
|
121 |
|
122 const uint16x8_t src16_high = vmovl_u8(src8_high); |
|
123 const uint16x8_t dst16_high = vmovl_u8(dst8_high); |
|
124 |
|
125 const uint16x8_t result16_low = qvsource_over_u16(src16_low, dst16_low, half, full); |
|
126 const uint16x8_t result16_high = qvsource_over_u16(src16_high, dst16_high, half, full); |
|
127 |
|
128 const uint32x2_t result32_low = vreinterpret_u32_u8(vmovn_u16(result16_low)); |
|
129 const uint32x2_t result32_high = vreinterpret_u32_u8(vmovn_u16(result16_high)); |
|
130 |
|
131 vst1q_u32((uint32_t *)&dst[x], vcombine_u32(result32_low, result32_high)); |
|
132 } |
|
133 } |
|
134 for (; x<w; ++x) { |
|
135 uint s = src[x]; |
|
136 if (s >= 0xff000000) |
|
137 dst[x] = s; |
|
138 else if (s != 0) |
|
139 dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); |
|
140 } |
|
141 dst = (quint32 *)(((uchar *) dst) + dbpl); |
|
142 src = (const quint32 *)(((const uchar *) src) + sbpl); |
|
143 } |
|
144 } else if (const_alpha != 0) { |
229 } else if (const_alpha != 0) { |
145 const_alpha = (const_alpha * 255) >> 8; |
230 const_alpha = (const_alpha * 255) >> 8; |
146 uint16x8_t const_alpha16 = vdupq_n_u16(const_alpha); |
231 uint16x8_t const_alpha16 = vdupq_n_u16(const_alpha); |
147 for (int y = 0; y < h; ++y) { |
232 for (int y = 0; y < h; ++y) { |
148 int x = 0; |
233 int x = 0; |
252 } else { |
337 } else { |
253 qt_blend_rgb32_on_rgb32(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); |
338 qt_blend_rgb32_on_rgb32(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); |
254 } |
339 } |
255 } |
340 } |
256 |
341 |
|
342 void qt_alphamapblit_quint16_neon(QRasterBuffer *rasterBuffer, |
|
343 int x, int y, quint32 color, |
|
344 const uchar *bitmap, |
|
345 int mapWidth, int mapHeight, int mapStride, |
|
346 const QClipData *) |
|
347 { |
|
348 quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x; |
|
349 const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint16); |
|
350 |
|
351 uchar *mask = const_cast<uchar *>(bitmap); |
|
352 |
|
353 pixman_composite_over_n_8_0565_asm_neon(mapWidth, mapHeight, dest, destStride, color, 0, mask, mapStride); |
|
354 } |
|
355 |
|
356 extern "C" void blend_8_pixels_rgb16_on_rgb16_neon(quint16 *dst, const quint16 *src, int const_alpha); |
|
357 |
|
358 template <typename SRC, typename BlendFunc> |
|
359 struct Blend_on_RGB16_SourceAndConstAlpha_Neon { |
|
360 Blend_on_RGB16_SourceAndConstAlpha_Neon(BlendFunc blender, int const_alpha) |
|
361 : m_index(0) |
|
362 , m_blender(blender) |
|
363 , m_const_alpha(const_alpha) |
|
364 { |
|
365 } |
|
366 |
|
367 inline void write(quint16 *dst, quint32 src) |
|
368 { |
|
369 srcBuffer[m_index++] = src; |
|
370 |
|
371 if (m_index == 8) { |
|
372 m_blender(dst - 7, srcBuffer, m_const_alpha); |
|
373 m_index = 0; |
|
374 } |
|
375 } |
|
376 |
|
377 inline void flush(quint16 *dst) |
|
378 { |
|
379 if (m_index > 0) { |
|
380 quint16 dstBuffer[8]; |
|
381 for (int i = 0; i < m_index; ++i) |
|
382 dstBuffer[i] = dst[i - m_index]; |
|
383 |
|
384 m_blender(dstBuffer, srcBuffer, m_const_alpha); |
|
385 |
|
386 for (int i = 0; i < m_index; ++i) |
|
387 dst[i - m_index] = dstBuffer[i]; |
|
388 |
|
389 m_index = 0; |
|
390 } |
|
391 } |
|
392 |
|
393 SRC srcBuffer[8]; |
|
394 |
|
395 int m_index; |
|
396 BlendFunc m_blender; |
|
397 int m_const_alpha; |
|
398 }; |
|
399 |
|
400 template <typename SRC, typename BlendFunc> |
|
401 Blend_on_RGB16_SourceAndConstAlpha_Neon<SRC, BlendFunc> |
|
402 Blend_on_RGB16_SourceAndConstAlpha_Neon_create(BlendFunc blender, int const_alpha) |
|
403 { |
|
404 return Blend_on_RGB16_SourceAndConstAlpha_Neon<SRC, BlendFunc>(blender, const_alpha); |
|
405 } |
|
406 |
|
407 void qt_scale_image_argb32_on_rgb16_neon(uchar *destPixels, int dbpl, |
|
408 const uchar *srcPixels, int sbpl, |
|
409 const QRectF &targetRect, |
|
410 const QRectF &sourceRect, |
|
411 const QRect &clip, |
|
412 int const_alpha) |
|
413 { |
|
414 if (const_alpha == 0) |
|
415 return; |
|
416 |
|
417 qt_scale_image_16bit<quint32>(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, |
|
418 Blend_on_RGB16_SourceAndConstAlpha_Neon_create<quint32>(blend_8_pixels_argb32_on_rgb16_neon, const_alpha)); |
|
419 } |
|
420 |
|
421 void qt_scale_image_rgb16_on_rgb16(uchar *destPixels, int dbpl, |
|
422 const uchar *srcPixels, int sbpl, |
|
423 const QRectF &targetRect, |
|
424 const QRectF &sourceRect, |
|
425 const QRect &clip, |
|
426 int const_alpha); |
|
427 |
|
428 void qt_scale_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl, |
|
429 const uchar *srcPixels, int sbpl, |
|
430 const QRectF &targetRect, |
|
431 const QRectF &sourceRect, |
|
432 const QRect &clip, |
|
433 int const_alpha) |
|
434 { |
|
435 if (const_alpha == 0) |
|
436 return; |
|
437 |
|
438 if (const_alpha == 256) { |
|
439 qt_scale_image_rgb16_on_rgb16(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, const_alpha); |
|
440 return; |
|
441 } |
|
442 |
|
443 qt_scale_image_16bit<quint16>(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, |
|
444 Blend_on_RGB16_SourceAndConstAlpha_Neon_create<quint16>(blend_8_pixels_rgb16_on_rgb16_neon, const_alpha)); |
|
445 } |
|
446 |
|
447 extern void qt_transform_image_rgb16_on_rgb16(uchar *destPixels, int dbpl, |
|
448 const uchar *srcPixels, int sbpl, |
|
449 const QRectF &targetRect, |
|
450 const QRectF &sourceRect, |
|
451 const QRect &clip, |
|
452 const QTransform &targetRectTransform, |
|
453 int const_alpha); |
|
454 |
|
455 void qt_transform_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl, |
|
456 const uchar *srcPixels, int sbpl, |
|
457 const QRectF &targetRect, |
|
458 const QRectF &sourceRect, |
|
459 const QRect &clip, |
|
460 const QTransform &targetRectTransform, |
|
461 int const_alpha) |
|
462 { |
|
463 if (const_alpha == 0) |
|
464 return; |
|
465 |
|
466 if (const_alpha == 256) { |
|
467 qt_transform_image_rgb16_on_rgb16(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, targetRectTransform, const_alpha); |
|
468 return; |
|
469 } |
|
470 |
|
471 qt_transform_image(reinterpret_cast<quint16 *>(destPixels), dbpl, |
|
472 reinterpret_cast<const quint16 *>(srcPixels), sbpl, targetRect, sourceRect, clip, targetRectTransform, |
|
473 Blend_on_RGB16_SourceAndConstAlpha_Neon_create<quint16>(blend_8_pixels_rgb16_on_rgb16_neon, const_alpha)); |
|
474 } |
|
475 |
|
476 void qt_transform_image_argb32_on_rgb16_neon(uchar *destPixels, int dbpl, |
|
477 const uchar *srcPixels, int sbpl, |
|
478 const QRectF &targetRect, |
|
479 const QRectF &sourceRect, |
|
480 const QRect &clip, |
|
481 const QTransform &targetRectTransform, |
|
482 int const_alpha) |
|
483 { |
|
484 if (const_alpha == 0) |
|
485 return; |
|
486 |
|
487 qt_transform_image(reinterpret_cast<quint16 *>(destPixels), dbpl, |
|
488 reinterpret_cast<const quint32 *>(srcPixels), sbpl, targetRect, sourceRect, clip, targetRectTransform, |
|
489 Blend_on_RGB16_SourceAndConstAlpha_Neon_create<quint32>(blend_8_pixels_argb32_on_rgb16_neon, const_alpha)); |
|
490 } |
|
491 |
|
492 static inline void convert_8_pixels_rgb16_to_argb32(quint32 *dst, const quint16 *src) |
|
493 { |
|
494 asm volatile ( |
|
495 "vld1.16 { d0, d1 }, [%[SRC]]\n\t" |
|
496 |
|
497 /* convert 8 r5g6b5 pixel data from {d0, d1} to planar 8-bit format |
|
498 and put data into d4 - red, d3 - green, d2 - blue */ |
|
499 "vshrn.u16 d4, q0, #8\n\t" |
|
500 "vshrn.u16 d3, q0, #3\n\t" |
|
501 "vsli.u16 q0, q0, #5\n\t" |
|
502 "vsri.u8 d4, d4, #5\n\t" |
|
503 "vsri.u8 d3, d3, #6\n\t" |
|
504 "vshrn.u16 d2, q0, #2\n\t" |
|
505 |
|
506 /* fill d5 - alpha with 0xff */ |
|
507 "mov r2, #255\n\t" |
|
508 "vdup.8 d5, r2\n\t" |
|
509 |
|
510 "vst4.8 { d2, d3, d4, d5 }, [%[DST]]" |
|
511 : : [DST]"r" (dst), [SRC]"r" (src) |
|
512 : "memory", "r2", "d0", "d1", "d2", "d3", "d4", "d5" |
|
513 ); |
|
514 } |
|
515 |
|
516 uint * QT_FASTCALL qt_destFetchRGB16_neon(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length) |
|
517 { |
|
518 const ushort *data = (const ushort *)rasterBuffer->scanLine(y) + x; |
|
519 |
|
520 int i = 0; |
|
521 for (; i < length - 7; i += 8) |
|
522 convert_8_pixels_rgb16_to_argb32(&buffer[i], &data[i]); |
|
523 |
|
524 if (i < length) { |
|
525 quint16 srcBuffer[8]; |
|
526 quint32 dstBuffer[8]; |
|
527 |
|
528 int tail = length - i; |
|
529 for (int j = 0; j < tail; ++j) |
|
530 srcBuffer[j] = data[i + j]; |
|
531 |
|
532 convert_8_pixels_rgb16_to_argb32(dstBuffer, srcBuffer); |
|
533 |
|
534 for (int j = 0; j < tail; ++j) |
|
535 buffer[i + j] = dstBuffer[j]; |
|
536 } |
|
537 |
|
538 return buffer; |
|
539 } |
|
540 |
|
541 static inline void convert_8_pixels_argb32_to_rgb16(quint16 *dst, const quint32 *src) |
|
542 { |
|
543 asm volatile ( |
|
544 "vld4.8 { d0, d1, d2, d3 }, [%[SRC]]\n\t" |
|
545 |
|
546 /* convert to r5g6b5 and store it into {d28, d29} */ |
|
547 "vshll.u8 q14, d2, #8\n\t" |
|
548 "vshll.u8 q8, d1, #8\n\t" |
|
549 "vshll.u8 q9, d0, #8\n\t" |
|
550 "vsri.u16 q14, q8, #5\n\t" |
|
551 "vsri.u16 q14, q9, #11\n\t" |
|
552 |
|
553 "vst1.16 { d28, d29 }, [%[DST]]" |
|
554 : : [DST]"r" (dst), [SRC]"r" (src) |
|
555 : "memory", "d0", "d1", "d2", "d3", "d16", "d17", "d18", "d19", "d28", "d29" |
|
556 ); |
|
557 } |
|
558 |
|
559 void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length) |
|
560 { |
|
561 quint16 *data = (quint16*)rasterBuffer->scanLine(y) + x; |
|
562 |
|
563 int i = 0; |
|
564 for (; i < length - 7; i += 8) |
|
565 convert_8_pixels_argb32_to_rgb16(&data[i], &buffer[i]); |
|
566 |
|
567 if (i < length) { |
|
568 quint32 srcBuffer[8]; |
|
569 quint16 dstBuffer[8]; |
|
570 |
|
571 int tail = length - i; |
|
572 for (int j = 0; j < tail; ++j) |
|
573 srcBuffer[j] = buffer[i + j]; |
|
574 |
|
575 convert_8_pixels_argb32_to_rgb16(dstBuffer, srcBuffer); |
|
576 |
|
577 for (int j = 0; j < tail; ++j) |
|
578 data[i + j] = dstBuffer[j]; |
|
579 } |
|
580 } |
|
581 |
257 QT_END_NAMESPACE |
582 QT_END_NAMESPACE |
258 |
583 |
259 #endif // QT_HAVE_NEON |
584 #endif // QT_HAVE_NEON |
260 |
585 |