/* Metrowerks x86 Runtime Support Library 
 * Copyright  1995-2003 Metrowerks Corporation.  All rights reserved.
 *
 * $Date: 2005/02/25 15:03:06 $
 * $Revision: 1.1.2.1 $
 */

/*
-- 	check Intel compiler dox so we won't miss any macros
--	allow old (instruction) named-intrinsics via macros
-- _m_from_int?
*/

/*
 *	MMX intrinsics API support.  
 *
 *	Culled from Intel architecture docs, MSDN, and other web references.
 *
 *	Note, in Codewarrior, inline asm is as good as intrinsics
 *	so we implement the majority of functions as inlines.
 */
 
#ifndef _MMINTRIN_H
#define _MMINTRIN_H

#include "x86_prefix.h"

#if __cplusplus
extern "C" {
#endif

/*
	Direct use of the members is HIGHLY DISCOURAGED since it forces
	objects to memory (otherwise they will be allocated to MM registers)
*/
typedef __declspec(intrin_type) union  __declspec(align(8)) __m64
{
    unsigned __int64    m64_u64;
    float               m64_f32[2];
    __int8              m64_i8[8];
    __int16             m64_i16[4];
    __int32             m64_i32[2];    
    __int64             m64_i64;
    unsigned __int8     m64_u8[8];
    unsigned __int16    m64_u16[4];
    unsigned __int32    m64_u32[2];
} __m64;

#pragma volatile_asm off

// actual intrinsics

extern __m64 _mm_set_pi32 (int i, int j);

extern __m64 _mm_set1_pi32 (int i);
extern __m64 _mm_set1_pi16 (short w);
extern __m64 _mm_set1_pi8 (char b);

// "intrinsics"

__inline void _mm_empty()
{
	__asm EMMS;
}

__inline void _mm_maskmove_si64(register __m64 a, register __m64 b, register char* c)
{
	__asm mov edi, c
	__asm MASKMOVQ a,b;
}

__inline __m64 _mm_cvtsi32_si64(register int a)
{
	register __m64 r;
	__asm MOVD r,a;
	return r;
}

__inline int _mm_cvtsi64_si32(register __m64 a)
{
	register int r;
	__asm MOVD r,a;
	return r;
}

__inline __m64 _mm_packs_pi16(register __m64 a, register __m64 b)
{
	__asm PACKSSWB a,b;
	return a;
}

__inline __m64 _mm_packs_pi32(register __m64 a, register __m64 b)
{
	__asm PACKSSDW a,b;
	return a;
}

__inline __m64 _mm_packus_pi16(register __m64 a, register __m64 b)
{
	__asm PACKUSWB a,b;
	return a;
}

__inline __m64 _mm_add_pi8(register __m64 a, register __m64 b)
{
	__asm PADDB a,b;
	return a;
}

__inline __m64 _mm_add_pi16(register __m64 a, register __m64 b)
{
	__asm PADDW a,b;
	return a;
}

__inline __m64 _mm_add_pi32(register __m64 a, register __m64 b)
{
	__asm PADDD a,b;
	return a;
}

__inline __m64 _mm_add_pi64(register __m64 a, register __m64 b)
{
	__asm PADDQ a,b;
	return a;
}

__inline __m64 _mm_adds_pi8(register __m64 a, register __m64 b)
{
	__asm PADDSB a,b;
	return a;
}

__inline __m64 _mm_adds_pi16(register __m64 a, register __m64 b)
{
	__asm PADDSW a,b;
	return a;
}

__inline __m64 _mm_add_pu8(register __m64 a, register __m64 b)
{
	__asm PADDUSB a,b;
	return a;
}

__inline __m64 _mm_add_pu16(register __m64 a, register __m64 b)
{
	__asm PADDUSW a,b;
	return a;
}

__inline __m64 _mm_and_si64(register __m64 a, register __m64 b)
{
	__asm PAND a,b;
	return a;
}

__inline __m64 _mm_andnot_si64(register __m64 a, register __m64 b)
{
	__asm PANDN a,b;
	return a;
}

__inline __m64 _mm_avg_pu8(register __m64 a, register __m64 b)
{
	__asm PAVGB a,b;
	return a;
}

__inline __m64 _mm_avg_pu16(register __m64 a, register __m64 b)
{
	__asm PAVGW a,b;
	return a;
}

__inline __m64 _mm_cmpeq_pi8(register __m64 a, register __m64 b)
{
	__asm PCMPEQB a,b;
	return a;
}

__inline __m64 _mm_cmpeq_pi16(register __m64 a, register __m64 b)
{
	__asm PCMPEQW a,b;
	return a;
}

__inline __m64 _mm_cmpeq_pi32(register __m64 a, register __m64 b)
{
	__asm PCMPEQD a,b;
	return a;
}

__inline __m64 _mm_cmpgt_pi8(register __m64 a, register __m64 b)
{
	__asm PCMPGTB a,b;
	return a;
}

__inline __m64 _mm_cmpgt_pi16(register __m64 a, register __m64 b)
{
	__asm PCMPGTW a,b;
	return a;
}

__inline __m64 _mm_cmpgt_pi32(register __m64 a, register __m64 b)
{
	__asm PCMPGTD a,b;
	return a;
}

__inline int _mm_extract_pi16(register __m64 a, register int b)
{
	register int r;
	__asm PEXTRW r,a,b;
	return r;
}

__inline __m64 _mm_insert_pi16(register __m64 a, register int b, register int c)
{
	__asm PINSRW a,b,c;
	return a;
}

__inline __m64 _mm_madd_pi16(register __m64 a, register __m64 b)
{
	__asm PMADDWD a,b;
	return a;
}

__inline __m64 _mm_max_pi16(register __m64 a, register __m64 b)
{
	__asm PMAXSW a,b;
	return a;
}

__inline __m64 _mm_max_pu8(register __m64 a, register __m64 b)
{
	__asm PMAXUB a,b;
	return a;
}

__inline __m64 _mm_min_pi16(register __m64 a, register __m64 b)
{
	__asm PMINSW a,b;
	return a;
}

__inline __m64 _mm_min_pu8(register __m64 a, register __m64 b)
{
	__asm PMINUB a,b;
	return a;
}

__inline int _mm_movemask_pi8(register __m64 a)
{
	register int r;
	__asm PMOVMSKB r,a;
	return r;
}

__inline __m64 _mm_mulhi_pu16(register __m64 a, register __m64 b)
{
	__asm PMULHUW a,b;
	return a;
}

__inline __m64 _mm_mulhi_pi16(register __m64 a, register __m64 b)
{
	__asm PMULHW a,b;
	return a;
}

__inline __m64 _mm_mullo_pi16(register __m64 a, register __m64 b)
{
	__asm PMULLW a,b;
	return a;
}

__inline __m64 _mm_mul_su32(register __m64 a, register __m64 b)
{
	__asm PMULUDQ a,b;
	return a;
}

__inline __m64 _mm_or_si64(register __m64 a, register __m64 b)
{
	__asm POR a,b;
	return a;
}

__inline __m64 _mm_sad_pu8(register __m64 a, register __m64 b)
{
	__asm PSADBW a,b;
	return a;
}

__inline __m64 _mm_shuffle_pi16(register __m64 a, register int b)
{
	__asm PSHUFW a,b;
	return a;
}

__inline __m64 _mm_sll_pi16(register __m64 a, register __m64 b)
{
	__asm PSLLW a,b;
	return a;
}

__inline __m64 _mm_slli_pi16(register __m64 a, register int b)
{
	long long b0 = b;
	__asm PSLLW a,b0;
	return a;
}

__inline __m64 _mm_sll_pi32(register __m64 a, register __m64 b)
{
	__asm PSLLD a,b;
	return a;
}

__inline __m64 _mm_slli_pi32(register __m64 a, register int b)
{
	long long b0 = b;
	__asm PSLLD a,b0;
	return a;
}

__inline __m64 _mm_sll_si64(register __m64 a, register __m64 b)
{
	__asm PSLLQ a,b;
	return a;
}

__inline __m64 _mm_slli_si64(register __m64 a, register int b)
{
	long long b0 = b;
	__asm PSLLQ a,b0;
	return a;
}

__inline __m64 _mm_sra_pi16(register __m64 a, register __m64 b)
{
	__asm PSRAW a,b;
	return a;
}

__inline __m64 _mm_srai_pi16(register __m64 a, register int b)
{
	long long b0 = b;
	__asm PSRAW a,b0;
	return a;
}

__inline __m64 _mm_sra_pi32(register __m64 a, register __m64 b)
{
	__asm PSRAD a,b;
	return a;
}

__inline __m64 _mm_srai_pi32(register __m64 a, register int b)
{
	long long b0 = b;
	__asm PSRAD a,b0;
	return a;
}

__inline __m64 _mm_srl_pi16(register __m64 a, register __m64 b)
{
	__asm PSRLW a,b;
	return a;
}

__inline __m64 _mm_srli_pi16(register __m64 a, register int b)
{
	long long b0 = b;
	__asm PSRLW a,b0;
	return a;
}

__inline __m64 _mm_srl_pi32(register __m64 a, register __m64 b)
{
	__asm PSRLD a,b;
	return a;
}

__inline __m64 _mm_srli_pi32(register __m64 a, register int b)
{
	long long b0 = b;
	__asm PSRLD a,b0;
	return a;
}

__inline __m64 _mm_srl_si64(register __m64 a, register __m64 b)
{
	__asm PSRLQ a,b;
	return a;
}

__inline __m64 _mm_srli_si64(register __m64 a, register int b)
{
	long long b0 = b;
	__asm PSRLQ a,b0;
	return a;
}

__inline __m64 _mm_sub_pi8(register __m64 a, register __m64 b)
{
	__asm PSUBB a,b;
	return a;
}

__inline __m64 _mm_sub_pi16(register __m64 a, register __m64 b)
{
	__asm PSUBW a,b;
	return a;
}

__inline __m64 _mm_sub_pi32(register __m64 a, register __m64 b)
{
	__asm PSUBD a,b;
	return a;
}

__inline __m64 _mm_sub_pi64(register __m64 a, register __m64 b)
{
	__asm PSUBQ a,b;
	return a;
}

__inline __m64 _mm_subs_pi8(register __m64 a, register __m64 b)
{
	__asm PSUBSB a,b;
	return a;
}

__inline __m64 _mm_subs_pi16(register __m64 a, register __m64 b)
{
	__asm PSUBSW a,b;
	return a;
}

__inline __m64 _mm_sub_pu8(register __m64 a, register __m64 b)
{
	__asm PSUBUSB a,b;
	return a;
}

__inline __m64 _mm_sub_pu16(register __m64 a, register __m64 b)
{
	__asm PSUBUSW a,b;
	return a;
}

__inline __m64 _mm_unpackhi_pi8(register __m64 a, register __m64 b)
{
	__asm PUNPCKHBW a,b;
	return a;
}

__inline __m64 _mm_unpackhi_pi16(register __m64 a, register __m64 b)
{
	__asm PUNPCKHWD a,b;
	return a;
}

__inline __m64 _mm_unpackhi_pi32(register __m64 a, register __m64 b)
{
	__asm PUNPCKHDQ a,b;
	return a;
}

__inline __m64 _mm_unpacklo_pi8(register __m64 a, register __m64 b)
{
	__asm PUNPCKLBW a,b;
	return a;
}

__inline __m64 _mm_unpacklo_pi16(register __m64 a, register __m64 b)
{
	__asm PUNPCKLWD a,b;
	return a;
}

__inline __m64 _mm_unpacklo_pi32(register __m64 a, register __m64 b)
{
	__asm PUNPCKLDQ a,b;
	return a;
}

__inline __m64 _mm_xor_si64(register __m64 a, register __m64 b)
{
	__asm PXOR a,b;
	return a;
}

#if 0
#pragma mark -
#endif

/*	Composites */

__inline __m64 _mm_setzero_si64(void)
{
	register __m64 r;
	__asm pxor r,r
	return r;
}

__inline __m64 _mm_set_pi16 (short w3, short w2, short w1, short w0)
{
	return _mm_set_pi32((w3<<16)|w2, (w1<<16)|w0);
}

__inline __m64 _mm_set_pi8 (char b7, char b6, char b5, char b4,
							char b3, char b2, char b1, char b0)
{
	return _mm_set_pi16((b7<<8)|b6, (b5<<8)|b4, (b3<<8)|b2, (b1<<8)|b0);
}

__inline __m64 _mm_setr_pi32 (int i0, int i1)
{
	return _mm_set_pi32(i1, i0);
}

__inline __m64 _mm_setr_pi16 (short w0, short w1, short w2, short w3)
{
	return _mm_set_pi16(w3, w2, w1, w0);
}

__inline __m64 _mm_setr_pi8 (char b0, char b1, char b2, char b3,
							char b4, char b5, char b6, char b7)
{
	return _mm_set_pi8(b7, b6, b5, b4, b3, b2, b1, b0);
}

#pragma volatile_asm reset

#if __cplusplus
}
#endif


#endif	// _MMINTRIN_H

/*
 * Change Log:
 * 030329 EJS	Initial checkin
 * 030605 EJS 	Use struct decls from original header instead of new keywords
 * 030619 EJS	Move __declspec(intrin_type) so this works in c++
*/
 