src/gui/painting/qdrawhelper_neon_asm.S
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Tue, 06 Jul 2010 15:10:48 +0300
changeset 30 5dc02b23752f
child 33 3e2da88830cd
permissions -rw-r--r--
Revision: 201025 Kit: 2010127

/****************************************************************************
**
** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
** All rights reserved.
** Contact: Nokia Corporation (qt-info@nokia.com)
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** No Commercial Usage
** This file contains pre-release code and may not be distributed.
** You may use this file in accordance with the terms and conditions
** contained in the Technology Preview License Agreement accompanying
** this package.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL included in the
** packaging of this file.  Please review the following information to
** ensure the GNU Lesser General Public License version 2.1 requirements
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Nokia gives you certain additional
** rights.  These rights are described in the Nokia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** If you have questions regarding the use of this file, please contact
** Nokia at qt-info@nokia.com.
**
**
**
**
**
**
**
**
** $QT_END_LICENSE$
**
****************************************************************************/

/* Prevent the stack from becoming executable for no reason... */
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

.text
.fpu neon
.arch armv7a
.altmacro

/* void blend_8_pixels_argb32_on_rgb16_neon(quint16 *dst, const quint32 *src, int const_alpha) */

    .func blend_8_pixels_argb32_on_rgb16_neon
    .global blend_8_pixels_argb32_on_rgb16_neon
    /* For ELF format also set function visibility to hidden */
#ifdef __ELF__
    .hidden blend_8_pixels_argb32_on_rgb16_neon
    .type blend_8_pixels_argb32_on_rgb16_neon, %function
#endif
blend_8_pixels_argb32_on_rgb16_neon:
    vld4.8      { d0, d1, d2, d3 }, [r1]
    vld1.16     { d4, d5 }, [r0]

    cmp         r2, #256
    beq         .blend_32_inner

    vdup.8      d6, r2

    /* multiply by const_alpha */
    vmull.u8    q8,   d6, d0
    vmull.u8    q9,   d6, d1
    vmull.u8    q10,  d6, d2
    vmull.u8    q11,  d6, d3

    vshrn.u16   d0,  q8, #8
    vshrn.u16   d1,  q9, #8
    vshrn.u16   d2, q10, #8
    vshrn.u16   d3, q11, #8

.blend_32_inner:
    /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
       and put data into d6 - red, d7 - green, d30 - blue */
    vshrn.u16   d6, q2, #8
    vshrn.u16   d7, q2, #3
    vsli.u16    q2, q2, #5
    vsri.u8     d6, d6, #5
    vmvn.8      d3, d3
    vsri.u8     d7, d7, #6
    vshrn.u16   d30, q2, #2

    pld [r0, #128]

    /* now do alpha blending, storing results in 8-bit planar format
       into d16 - red, d19 - green, d18 - blue */
    vmull.u8    q10, d3, d6
    vmull.u8    q11, d3, d7
    vmull.u8    q12, d3, d30
    vrshr.u16   q13, q10, #8
    vrshr.u16   q3,  q11, #8
    vrshr.u16   q15, q12, #8
    vraddhn.u16 d20, q10, q13
    vraddhn.u16 d23, q11, q3
    vraddhn.u16 d22, q12, q15
    vqadd.u8    d16, d2, d20
    vqadd.u8    q9, q0, q11
    /* convert the result to r5g6b5 and store it into {d28, d29} */
    vshll.u8    q14, d16, #8
    vshll.u8    q8, d19, #8
    vshll.u8    q9, d18, #8
    vsri.u16    q14, q8, #5
    vsri.u16    q14, q9, #11

    vst1.16     { d28, d29 }, [r0]

    bx          lr

    .endfunc

/* void blend_8_pixels_rgb16_on_rgb16_neon(quint16 *dst, const quint16 *src, int const_alpha) */

    .func blend_8_pixels_rgb16_on_rgb16_neon
    .global blend_8_pixels_rgb16_on_rgb16_neon
    /* For ELF format also set function visibility to hidden */
#ifdef __ELF__
    .hidden blend_8_pixels_rgb16_on_rgb16_neon
    .type blend_8_pixels_rgb16_on_rgb16_neon, %function
#endif
blend_8_pixels_rgb16_on_rgb16_neon:
    vld1.16     { d0, d1 }, [r0]
    vld1.16     { d2, d3 }, [r1]

    rsb         r3, r2, #256
    vdup.8      d4, r2
    vdup.8      d5, r3

    /* convert 8 r5g6b5 pixel data from {d0, d1} to planar 8-bit format
       and put data into d6 - red, d7 - green, d30 - blue */
    vshrn.u16   d6,  q0,  #8
    vshrn.u16   d7,  q0,  #3
    vsli.u16    q0,  q0,  #5
    vsri.u8     d6,  d6,  #5
    vsri.u8     d7,  d7,  #6
    vshrn.u16   d30, q0,  #2

    /* same from {d2, d3} into {d26, d27, d28} */
    vshrn.u16   d26, q1,  #8
    vshrn.u16   d27, q1,  #3
    vsli.u16    q1,  q1,  #5
    vsri.u8     d26, d26, #5
    vsri.u8     d27, d27, #6
    vshrn.u16   d28, q1,  #2

    /* multiply dst by inv const_alpha */
    vmull.u8    q10, d5,  d6
    vmull.u8    q11, d5,  d7
    vmull.u8    q12, d5,  d30

    vshrn.u16   d6,  q10, #8
    vshrn.u16   d7,  q11, #8
    vshrn.u16   d30, q12, #8

    /* multiply src by const_alpha */
    vmull.u8    q10,  d4, d26
    vmull.u8    q11,  d4, d27
    vmull.u8    q12,  d4, d28

    vshrn.u16   d26, q10, #8
    vshrn.u16   d27, q11, #8
    vshrn.u16   d28, q12, #8

    /* preload dst + 128 */
    pld [r0, #128]

    /* add components, storing results in 8-bit planar format
       into d16 - red, d19 - green, d18 - blue */
    vadd.u8     d16, d26, d6
    vadd.u8     d19, d27, d7
    vadd.u8     d18, d28, d30

    /* convert the result to r5g6b5 and store it into {d28, d29} */
    vshll.u8    q14, d16, #8
    vshll.u8    q8,  d19, #8
    vshll.u8    q9,  d18, #8
    vsri.u16    q14,  q8, #5
    vsri.u16    q14,  q9, #11

    vst1.16     { d28, d29 }, [r0]

    bx          lr

    .endfunc