author | Matt Plumtree <matt.plumtree@nokia.com> |
Mon, 15 Nov 2010 09:56:25 +0000 | |
branch | bug235_bringup_0 |
changeset 77 | b0395290e61f |
parent 69 | 3f914c77c2e9 |
permissions | -rw-r--r-- |
24 | 1 |
/*------------------------------------------------------------------------ |
2 |
* |
|
3 |
* OpenVG 1.1 Reference Implementation |
|
4 |
* ----------------------------------- |
|
5 |
* |
|
6 |
* Copyright (c) 2007 The Khronos Group Inc. |
|
7 |
* Portions copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). |
|
8 |
* |
|
9 |
* Permission is hereby granted, free of charge, to any person obtaining a |
|
10 |
* copy of this software and /or associated documentation files |
|
11 |
* (the "Materials "), to deal in the Materials without restriction, |
|
12 |
* including without limitation the rights to use, copy, modify, merge, |
|
13 |
* publish, distribute, sublicense, and/or sell copies of the Materials, |
|
14 |
* and to permit persons to whom the Materials are furnished to do so, |
|
15 |
* subject to the following conditions: |
|
16 |
* |
|
17 |
* The above copyright notice and this permission notice shall be included |
|
18 |
* in all copies or substantial portions of the Materials. |
|
19 |
* |
|
20 |
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|
21 |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|
22 |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
|
23 |
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, |
|
24 |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
|
25 |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR |
|
26 |
* THE USE OR OTHER DEALINGS IN THE MATERIALS. |
|
27 |
* |
|
28 |
*//** |
|
29 |
* \file |
|
30 |
* \brief Implementation of polygon rasterizer. |
|
31 |
* \note |
|
32 |
*//*-------------------------------------------------------------------*/ |
|
33 |
||
34 |
#include "riRasterizer.h" |
|
35 |
||
69
3f914c77c2e9
Host OpenVG building using GCC, without LLVM integration.
Matt Plumtree <matt.plumtree@nokia.com>
parents:
53
diff
changeset
|
36 |
#if defined(RI_COMPILE_LLVM_BYTECODE) |
24 | 37 |
// TEMP! |
38 |
#ifndef __SFCOMPILER_H |
|
39 |
# include "sfCompiler.h" |
|
40 |
#endif |
|
69
3f914c77c2e9
Host OpenVG building using GCC, without LLVM integration.
Matt Plumtree <matt.plumtree@nokia.com>
parents:
53
diff
changeset
|
41 |
#endif |
24 | 42 |
|
43 |
||
44 |
namespace OpenVGRI |
|
45 |
{ |
|
46 |
||
47 |
/*-------------------------------------------------------------------*//*! |
|
48 |
* \brief Rasterizer constructor. |
|
49 |
* \param |
|
50 |
* \return |
|
51 |
* \note |
|
52 |
*//*-------------------------------------------------------------------*/ |
|
53 |
||
54 |
Rasterizer::Rasterizer() : |
|
55 |
m_covBuffer(NULL), |
|
56 |
m_covBufferSz(0), |
|
57 |
m_edges(), |
|
58 |
m_scissorEdges(), |
|
59 |
m_scissor(false), |
|
60 |
m_aa(true), |
|
61 |
m_vpx(0), |
|
62 |
m_vpy(0), |
|
63 |
m_vpwidth(0), |
|
64 |
m_vpheight(0), |
|
65 |
m_fillRule(VG_EVEN_ODD), |
|
66 |
m_pixelPipe(NULL), |
|
67 |
m_nSpans(0) |
|
68 |
{} |
|
69 |
||
70 |
/*-------------------------------------------------------------------*//*! |
|
71 |
* \brief Rasterizer destructor. |
|
72 |
* \param |
|
73 |
* \return |
|
74 |
* \note |
|
75 |
*//*-------------------------------------------------------------------*/ |
|
76 |
||
77 |
Rasterizer::~Rasterizer() |
|
78 |
{ |
|
79 |
if(m_covBuffer) |
|
80 |
RI_DELETE_ARRAY(m_covBuffer); |
|
81 |
} |
|
82 |
||
83 |
/*-------------------------------------------------------------------*//*! |
|
84 |
* \brief Removes all appended edges. |
|
85 |
* \param |
|
86 |
* \return |
|
87 |
* \note |
|
88 |
*//*-------------------------------------------------------------------*/ |
|
89 |
||
90 |
#define EDGE_TERMINATOR 0xFFFFFFFFu |
|
91 |
||
92 |
void Rasterizer::clear() |
|
93 |
{ |
|
94 |
//m_edges.clear(); |
|
95 |
for (int i = 0; i < m_edges.size(); i++) |
|
96 |
m_edges[i] = EDGE_TERMINATOR; |
|
97 |
||
98 |
m_edgePool.clear(); |
|
99 |
||
100 |
m_edgeMin.set(0x7fffffffu, 0x7fffffffu); |
|
101 |
m_edgeMax.set(0x80000000, 0x80000000); |
|
102 |
} |
|
103 |
||
104 |
/*-------------------------------------------------------------------*//*! |
|
105 |
* \brief Appends an edge to the rasterizer. |
|
106 |
* \param |
|
107 |
* \return |
|
108 |
* \note |
|
109 |
*//*-------------------------------------------------------------------*/ |
|
110 |
||
111 |
void Rasterizer::addBBox(const IVector2& v) |
|
112 |
{ |
|
113 |
if(v.x < m_edgeMin.x) m_edgeMin.x = v.x; |
|
114 |
if(v.y < m_edgeMin.y) m_edgeMin.y = v.y; |
|
115 |
if(v.x > m_edgeMax.x) m_edgeMax.x = v.x; |
|
116 |
if(v.y > m_edgeMax.y) m_edgeMax.y = v.y; |
|
117 |
} |
|
118 |
||
119 |
void Rasterizer::pushEdge(const Edge& edge) |
|
120 |
{ |
|
121 |
addBBox(edge.v0); |
|
122 |
addBBox(edge.v1); |
|
123 |
||
124 |
// Only add processed edges. |
|
125 |
||
126 |
RI_ASSERT(edge.v0.y >= 0); |
|
127 |
RI_ASSERT(edge.v0.y < edge.v1.y); //horizontal edges should have been dropped already |
|
128 |
||
129 |
ActiveEdge ae; |
|
130 |
ae.direction = edge.direction; |
|
131 |
||
132 |
// \todo Adjust for non-AA cases |
|
133 |
// \todo verySteep is temporary. Either clip to right edge also, or validate that a proper slope can be |
|
134 |
// calculated here. |
|
135 |
const int slope = RI_SAT_SHL((edge.v1.x - edge.v0.x), RASTERIZER_BITS - X_BITS) / (edge.v1.y - edge.v0.y); |
|
136 |
//const bool verySteep = RI_INT_ABS(edge.v1.x - edge.v0.x) > (1 << (30-RASTERIZER_BITS)) ? true : false; |
|
137 |
//const int slope = verySteep ? 1 << 30 : RI_SHL((edge.v1.x - edge.v0.x), RASTERIZER_BITS - X_BITS) / (edge.v1.y - edge.v0.y); |
|
138 |
// slope: SI.(RASTERIZER_BITS - Y_BITS) |
|
139 |
const int yF = edge.v0.y & Y_MASK; |
|
140 |
// \todo See verySteep note for this hack also. (Clip to right edge?) |
|
141 |
const int xRef = RI_SAT_SHL(edge.v0.x, RASTERIZER_BITS - X_BITS) - (yF * slope); |
|
142 |
//const int xRef = edge.v0.x > (1<<(30-RASTERIZER_BITS)) ? 1<<30 : RI_SHL(edge.v0.x, RASTERIZER_BITS - X_BITS) - (yF * slope); |
|
143 |
||
144 |
RI_ASSERT(RI_INT_ABS(edge.v0.y <= 32767)); |
|
145 |
RI_ASSERT(RI_INT_ABS(edge.v1.y <= 32767)); |
|
146 |
||
147 |
ae.yStart = (RIint16)edge.v0.y; |
|
148 |
ae.yEnd = (RIint16)edge.v1.y; |
|
149 |
ae.xRef = xRef; |
|
150 |
ae.slope = slope; |
|
151 |
// Scanline range. |
|
152 |
ae.minx = xRef >> RASTERIZER_BITS; |
|
153 |
ae.maxx = (xRef + slope * (1<<Y_BITS)) >> RASTERIZER_BITS; |
|
154 |
||
155 |
if (ae.minx > ae.maxx) |
|
156 |
RI_ANY_SWAP(ActiveEdge::XCoord, ae.minx, ae.maxx); |
|
157 |
||
158 |
if (ae.maxx < 0) |
|
159 |
ae.minx = ae.maxx = LEFT_DISCARD_SHORT; |
|
160 |
||
161 |
if (m_edges[ae.yStart>>Y_BITS] == EDGE_TERMINATOR) |
|
162 |
ae.next = EDGE_TERMINATOR; |
|
163 |
else |
|
164 |
ae.next = m_edges[ae.yStart>>Y_BITS]; |
|
165 |
||
166 |
m_edgePool.push_back(ae); //throws bad_alloc |
|
167 |
||
168 |
RI_ASSERT(m_edgePool.size() > 0); |
|
169 |
m_edges[ae.yStart>>Y_BITS] = m_edgePool.size()-1; |
|
170 |
} |
|
171 |
||
172 |
/** |
|
173 |
* \brief Clips an edge and if something remains, adds it to the list of edges. |
|
174 |
* \todo Enhance precision: Currently this just uses doubles and gets away with |
|
175 |
* it in most cases. |
|
176 |
*/ |
|
177 |
void Rasterizer::clipAndAddEdge(Edge& edge) |
|
178 |
{ |
|
179 |
//if (m_edges.size() > 48) |
|
180 |
//return; |
|
181 |
// Check y-clips |
|
182 |
// \todo Reduce amount of clips. |
|
183 |
bool outLeft[2] = {(edge.v0.x < m_vpMinx), (edge.v1.x < m_vpMinx)}; |
|
184 |
bool outRight[2] = {(edge.v0.x > m_vpMaxx), (edge.v1.x > m_vpMaxx)}; |
|
185 |
bool outTop[2] = {(edge.v0.y < m_vpMiny), (edge.v1.y < m_vpMiny)}; |
|
186 |
bool outBottom[2] = {(edge.v0.y > m_vpMaxy), (edge.v1.y > m_vpMaxy)}; |
|
187 |
||
188 |
if (!(outLeft[0] || outLeft[1] || outRight[0] || outRight[1] || outTop[0] || outTop[1] || outBottom[0] || outBottom[1])) |
|
189 |
{ |
|
190 |
pushEdge(edge); |
|
191 |
return; |
|
192 |
} |
|
193 |
||
194 |
// \todo Make sure that checking out-of-right works with the scanconverter. |
|
195 |
if ((outBottom[0] && outBottom[1]) || (outTop[0] && outTop[1])) |
|
196 |
return; // Out of bounds |
|
197 |
||
198 |
// \todo Clip to right edge of screen. |
|
199 |
// \todo Make slope-calculation and signs consistent. |
|
200 |
// |
|
201 |
if (outTop[0] || outBottom[1]) |
|
202 |
{ |
|
203 |
// Clip to top/bottom. |
|
204 |
double slope = (double)(edge.v1.x - edge.v0.x)/(edge.v1.y - edge.v0.y); |
|
205 |
||
206 |
if (outTop[0]) |
|
207 |
{ |
|
208 |
RI_ASSERT(-(RIint64)edge.v0.y >= 0); |
|
209 |
RIint32 dx = RI_ROUND_TO_INT(-slope * edge.v0.y); |
|
210 |
edge.v0.y = 0; |
|
211 |
edge.v0.x += dx; |
|
212 |
} |
|
213 |
||
214 |
if (outBottom[1]) |
|
215 |
{ |
|
216 |
RIint32 dy = edge.v1.y - m_vpMaxy; |
|
217 |
RI_ASSERT(dy >= 0); |
|
218 |
RIint32 dx = -RI_ROUND_TO_INT(slope * dy); |
|
219 |
edge.v1.y = m_vpMaxy; |
|
220 |
edge.v1.x += dx; |
|
221 |
} |
|
222 |
||
223 |
} |
|
224 |
||
225 |
if (edge.v0.y >= edge.v1.y) |
|
226 |
return; |
|
227 |
||
228 |
// \todo Recheck left/right. |
|
229 |
outLeft[0] = (edge.v0.x < m_vpMinx); outLeft[1] = (edge.v1.x < m_vpMinx); |
|
230 |
outRight[1] = (edge.v0.x > m_vpMaxx); outRight[1] = (edge.v1.x > m_vpMaxx); |
|
231 |
||
232 |
if (outLeft[0] && outLeft[1]) |
|
233 |
{ |
|
234 |
edge.v0.x = m_vpMinx; |
|
235 |
edge.v1.x = m_vpMinx; |
|
236 |
pushEdge(edge); |
|
237 |
return; |
|
238 |
} |
|
239 |
if (outRight[0] && outRight[1]) |
|
240 |
{ |
|
241 |
edge.v0.x = m_vpMaxx; |
|
242 |
edge.v1.x = m_vpMaxx; |
|
243 |
pushEdge(edge); |
|
244 |
return; |
|
245 |
} |
|
246 |
||
247 |
// From outside -> screen |
|
248 |
if (outLeft[0] || outRight[1]) |
|
249 |
{ |
|
250 |
// infinite slope? |
|
251 |
double slope = (double)((RIint64)edge.v1.y - edge.v0.y)/((RIint64)edge.v1.x - edge.v0.x); |
|
252 |
||
253 |
if (outLeft[0]) |
|
254 |
{ |
|
255 |
RIint32 dx = edge.v0.x; |
|
256 |
//RI_ASSERT(dx >= 0); |
|
257 |
// Note the sign. |
|
258 |
RIint32 dy = RI_ROUND_TO_INT(-slope * dx); |
|
259 |
||
260 |
Edge vpart = edge; |
|
261 |
vpart.v1.y = edge.v0.y + dy; |
|
262 |
//vpart.v1.x = edge.v0.x; // = 0? |
|
263 |
// \note This should be flagged instead of setting the smallest possible |
|
264 |
// value because of extremely gentle slopes may cause bugs: |
|
265 |
vpart.v1.x = vpart.v0.x = -0x100000; |
|
266 |
||
267 |
if (vpart.v1.y > vpart.v0.y) |
|
268 |
pushEdge(vpart); |
|
269 |
||
270 |
edge.v0.y += dy; |
|
271 |
edge.v0.x = 0; |
|
272 |
} |
|
273 |
} |
|
274 |
// From screen -> outside |
|
275 |
if (outLeft[1] || outRight[0]) |
|
276 |
{ |
|
277 |
// infinite slope? |
|
278 |
double slope = (double)((RIint64)edge.v1.y - edge.v0.y)/((RIint64)edge.v1.x - edge.v0.x); |
|
279 |
||
280 |
if (outLeft[1]) |
|
281 |
{ |
|
282 |
RIint32 dx = edge.v0.x; |
|
283 |
RI_ASSERT(dx >= 0); |
|
284 |
RIint32 dy = RI_ROUND_TO_INT(-slope * dx); |
|
285 |
||
286 |
Edge vpart = edge; |
|
287 |
vpart.v0.y = edge.v0.y + dy; |
|
288 |
vpart.v1.x = vpart.v0.x = LEFT_DISCARD; |
|
289 |
||
290 |
if (vpart.v1.y > vpart.v0.y) |
|
291 |
pushEdge(vpart); |
|
292 |
||
293 |
edge.v1.y = edge.v0.y + dy; |
|
294 |
edge.v1.x = 0; |
|
295 |
} |
|
296 |
} |
|
297 |
||
298 |
if (edge.v0.y >= edge.v1.y) |
|
299 |
return; |
|
300 |
||
301 |
// Finally, add the edge: |
|
302 |
pushEdge(edge); |
|
303 |
} |
|
304 |
||
305 |
void Rasterizer::addEdge(const Vector2& v0, const Vector2& v1) |
|
306 |
{ |
|
307 |
if( m_edges.size() >= RI_MAX_EDGES ) |
|
308 |
throw std::bad_alloc(); //throw an out of memory error if there are too many edges |
|
309 |
||
310 |
Edge e; |
|
311 |
||
312 |
{ |
|
313 |
IVector2 i0(RI_ROUND_TO_INT(v0.x * (1<<X_BITS)), RI_ROUND_TO_INT(v0.y * (1<<Y_BITS))); |
|
314 |
IVector2 i1(RI_ROUND_TO_INT(v1.x * (1<<X_BITS)), RI_ROUND_TO_INT(v1.y * (1<<Y_BITS))); |
|
315 |
||
316 |
if(i0.y == i1.y) |
|
317 |
return; //skip horizontal edges (they don't affect rasterization since we scan horizontally) |
|
318 |
||
319 |
if (i0.y < i1.y) |
|
320 |
{ |
|
321 |
// Edge is going upward |
|
322 |
e.v0 = i0; |
|
323 |
e.v1 = i1; |
|
324 |
e.direction = 1; |
|
325 |
} |
|
326 |
else |
|
327 |
{ |
|
328 |
// Edge is going downward |
|
329 |
e.v0 = i1; |
|
330 |
e.v1 = i0; |
|
331 |
e.direction = -1; |
|
332 |
} |
|
333 |
} |
|
334 |
||
335 |
// Clip and insert. |
|
336 |
||
337 |
clipAndAddEdge(e); |
|
338 |
} |
|
339 |
||
340 |
/*-------------------------------------------------------------------*//*! |
|
341 |
* \brief Set up rasterizer |
|
342 |
* \param |
|
343 |
* \return |
|
344 |
* \note |
|
345 |
*//*-------------------------------------------------------------------*/ |
|
346 |
||
347 |
void Rasterizer::setup(int vpx, int vpy, int vpwidth, int vpheight, VGFillRule fillRule, const PixelPipe* pixelPipe) |
|
348 |
{ |
|
349 |
RI_ASSERT(vpwidth >= 0 && vpheight >= 0); |
|
350 |
RI_ASSERT(vpx + vpwidth >= vpx && vpy + vpheight >= vpy); |
|
351 |
RI_ASSERT(fillRule == VG_EVEN_ODD || fillRule == VG_NON_ZERO); |
|
352 |
RI_ASSERT(pixelPipe); |
|
353 |
||
354 |
clear(); |
|
355 |
||
356 |
m_vpx = vpx; |
|
357 |
m_vpy = vpy; |
|
358 |
m_vpwidth = vpwidth; |
|
359 |
m_vpheight = vpheight; |
|
360 |
||
361 |
if (m_vpheight > m_edges.size()) |
|
362 |
{ |
|
363 |
int os = m_edges.size(); |
|
364 |
m_edges.resize(m_vpheight); |
|
365 |
for (int i = os; i < m_edges.size(); i++) |
|
366 |
m_edges[i] = EDGE_TERMINATOR; |
|
367 |
} |
|
368 |
||
369 |
m_vpMinx = RI_SHL(vpx, X_BITS); |
|
370 |
m_vpMiny = RI_SHL(vpy, Y_BITS); |
|
371 |
m_vpMaxx = RI_SHL(vpx + vpwidth, X_BITS); |
|
372 |
m_vpMaxy = RI_SHL(vpy + vpheight, Y_BITS); |
|
373 |
||
374 |
m_fillRule = fillRule; |
|
375 |
||
376 |
RIuint32 fillRuleMask = fillRule == VG_NON_ZERO ? 0xffffffffu : 1; |
|
377 |
m_fillRuleMask = fillRuleMask; |
|
378 |
||
379 |
m_pixelPipe = pixelPipe; |
|
380 |
m_covMinx = vpx+vpwidth; |
|
381 |
m_covMiny = vpy+vpheight; |
|
382 |
m_covMaxx = vpx; |
|
383 |
m_covMaxy = vpy; |
|
384 |
} |
|
385 |
||
386 |
/*-------------------------------------------------------------------*//*! |
|
387 |
* \brief Sets scissor rectangles. |
|
388 |
* \param |
|
389 |
* \return |
|
390 |
* \note |
|
391 |
*//*-------------------------------------------------------------------*/ |
|
392 |
||
393 |
void Rasterizer::setScissor(const Array<Rectangle>& scissors) |
|
394 |
{ |
|
395 |
try |
|
396 |
{ |
|
397 |
m_scissorEdges.clear(); |
|
398 |
for(int i=0;i<scissors.size();i++) |
|
399 |
{ |
|
400 |
if(scissors[i].width > 0 && scissors[i].height > 0) |
|
401 |
{ |
|
402 |
ScissorEdge e; |
|
403 |
e.miny = scissors[i].y; |
|
404 |
e.maxy = RI_INT_ADDSATURATE(scissors[i].y, scissors[i].height); |
|
405 |
||
406 |
e.x = scissors[i].x; |
|
407 |
e.direction = 1; |
|
408 |
m_scissorEdges.push_back(e); //throws bad_alloc |
|
409 |
e.x = RI_INT_ADDSATURATE(scissors[i].x, scissors[i].width); |
|
410 |
e.direction = -1; |
|
411 |
m_scissorEdges.push_back(e); //throws bad_alloc |
|
412 |
} |
|
413 |
} |
|
414 |
} |
|
415 |
catch(std::bad_alloc) |
|
416 |
{ |
|
417 |
m_scissorEdges.clear(); |
|
418 |
throw; |
|
419 |
} |
|
420 |
} |
|
421 |
||
422 |
void Rasterizer::setScissoring(bool enabled) |
|
423 |
{ |
|
424 |
m_scissor = enabled; |
|
425 |
} |
|
426 |
||
427 |
static RI_INLINE void small_memcpy32(void* dst, const void* src, size_t n) |
|
428 |
{ |
|
429 |
RIuint32 *d = (RIuint32*)dst; |
|
430 |
const RIuint32 *s = (const RIuint32*)src; |
|
431 |
while(n) |
|
432 |
{ |
|
433 |
*d++ = *s++; |
|
434 |
n-=4; |
|
435 |
} |
|
436 |
} |
|
437 |
||
438 |
// \todo Move this to some debug file or remove. |
|
439 |
#if defined(USE_SSE2) && !defined(_WIN32) |
|
440 |
RI_INLINE static void print128(__m128i ll) |
|
441 |
{ |
|
442 |
#if defined(RI_DEBUG) |
|
443 |
unsigned long long v[2]; |
|
444 |
_mm_storeu_pd((double*)v, (__m128d)ll); |
|
445 |
RI_PRINTF("0x%016llx %016llx\n", v[0], v[1]); |
|
446 |
#else |
|
447 |
(void)ll; |
|
448 |
#endif |
|
449 |
} |
|
450 |
#endif |
|
451 |
||
452 |
#if defined(USE_SSE2) |
|
453 |
RI_INLINE static __m128i mm_mul4x32(const __m128i a, const __m128i b) { |
|
454 |
__m128i res; |
|
69
3f914c77c2e9
Host OpenVG building using GCC, without LLVM integration.
Matt Plumtree <matt.plumtree@nokia.com>
parents:
53
diff
changeset
|
455 |
#if defined(__GNUG__) |
3f914c77c2e9
Host OpenVG building using GCC, without LLVM integration.
Matt Plumtree <matt.plumtree@nokia.com>
parents:
53
diff
changeset
|
456 |
__m128i m0 = _mm_mul_epu32(a, _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 1, 0, 0))); |
3f914c77c2e9
Host OpenVG building using GCC, without LLVM integration.
Matt Plumtree <matt.plumtree@nokia.com>
parents:
53
diff
changeset
|
457 |
__m128i m1 = _mm_mul_epu32(a, _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 2, 2))); |
3f914c77c2e9
Host OpenVG building using GCC, without LLVM integration.
Matt Plumtree <matt.plumtree@nokia.com>
parents:
53
diff
changeset
|
458 |
|
3f914c77c2e9
Host OpenVG building using GCC, without LLVM integration.
Matt Plumtree <matt.plumtree@nokia.com>
parents:
53
diff
changeset
|
459 |
res = _mm_cvtps_epi32(_mm_shuffle_ps(_mm_cvtepi32_ps(m0), _mm_cvtepi32_ps(m1), _MM_SHUFFLE(2, 0, 2, 0))); |
3f914c77c2e9
Host OpenVG building using GCC, without LLVM integration.
Matt Plumtree <matt.plumtree@nokia.com>
parents:
53
diff
changeset
|
460 |
#elif (_MSC_VER > 1400) |
24 | 461 |
// \todo Simpler way to do this on intel? |
462 |
__m128i m0 = _mm_mul_epu32(a, _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 1, 0, 0))); |
|
463 |
__m128i m1 = _mm_mul_epu32(a, _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 2, 2))); |
|
464 |
||
465 |
res = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(m0), _mm_castsi128_ps(m1), _MM_SHUFFLE(2, 0, 2, 0))); |
|
466 |
#else |
|
467 |
__asm { |
|
468 |
movdqa xmm1, a; |
|
469 |
movdqa xmm2, b; |
|
470 |
pshufd xmm3, xmm2, 80; |
|
471 |
movdqa xmm0, xmm1; |
|
472 |
||
473 |
pshufd xmm2, xmm2, 250; |
|
474 |
pmuludq xmm0, xmm3; |
|
475 |
pmuludq xmm1, xmm2; |
|
476 |
||
477 |
shufps xmm0, xmm1, 136; |
|
478 |
movdqa res, xmm0; |
|
479 |
} |
|
480 |
#endif |
|
481 |
return res; |
|
482 |
} |
|
483 |
#endif |
|
484 |
||
485 |
#if defined(USE_SSE2) |
|
486 |
RI_INLINE static void mm_get_xmasks(const __m128i& coords, const __m128i& sampleCoords, __m128i& slWindMask, __m128i& pxWindMask) |
|
487 |
{ |
|
488 |
const __m128i z = _mm_setzero_si128(); |
|
489 |
const __m128i xMask = _mm_cmpeq_epi16(_mm_srai_epi16(coords, Rasterizer::RASTERIZER_BITS), z); |
|
490 |
const __m128i sCmp = _mm_or_si128(_mm_cmpgt_epi16(sampleCoords, coords), _mm_cmpeq_epi16(sampleCoords, coords)); |
|
491 |
//const __m128i sCmp = _mm_cmplt_epi16(coords, sampleCoords); |
|
492 |
slWindMask = xMask; |
|
493 |
pxWindMask = _mm_and_si128(xMask, sCmp); |
|
494 |
} |
|
495 |
#endif |
|
496 |
||
497 |
RI_INLINE static void getVerticalSubpixels(int iY, int yStart, int yEnd, int& py0, int& py1) |
|
498 |
{ |
|
499 |
const int cy = iY << Rasterizer::Y_BITS; |
|
500 |
py0 = cy > yStart ? 0 : yStart & Rasterizer::Y_MASK; |
|
501 |
py1 = (RI_INT_MIN(yEnd, cy + (1<<Rasterizer::Y_BITS)) - 1) & Rasterizer::Y_MASK; |
|
502 |
} |
|
503 |
||
504 |
RI_INLINE static void applyLeftEdge(const Rasterizer::ActiveEdge& currAe, Rasterizer::Windings& scanline, int intY) |
|
505 |
{ |
|
506 |
// Applies the whole edge at a time. Make sure xRight < x for all y. |
|
507 |
// \todo Remove duplicate code for determining the active samples |
|
508 |
#if defined(USE_SSE2) |
|
509 |
int py0, py1; |
|
510 |
||
511 |
getVerticalSubpixels(intY, currAe.yStart, currAe.yEnd, py0, py1); |
|
512 |
||
513 |
const __m128i csteps = _mm_set_epi16(7,6,5,4,3,2,1,0); |
|
514 |
||
515 |
const __m128i ssePy0 = _mm_set1_epi16(py0-1); |
|
516 |
const __m128i ssePy1 = _mm_set1_epi16(py1+1); |
|
517 |
||
518 |
const __m128i yMask = _mm_and_si128(_mm_cmpgt_epi16(csteps, ssePy0), _mm_cmplt_epi16(csteps, ssePy1)); |
|
519 |
const __m128i dir = _mm_set1_epi16(currAe.direction); |
|
520 |
||
521 |
scanline.sseWinding = _mm_add_epi16(scanline.sseWinding, _mm_and_si128(yMask, dir)); |
|
522 |
||
523 |
#else |
|
524 |
RI_ASSERT(false); // Not implemented yet. |
|
525 |
#endif |
|
526 |
} |
|
527 |
||
528 |
RI_INLINE static void applyLeftEdgeNoAA(const Rasterizer::ActiveEdge& currAe, Rasterizer::Windings& scanline, int intY) |
|
529 |
{ |
|
530 |
// Applies the whole edge at a time. Make sure xRight < x for all y. |
|
531 |
// \todo Remove duplicate code for determining the active samples? |
|
532 |
#if defined(USE_SSE2) |
|
533 |
int py0, py1; |
|
534 |
||
535 |
getVerticalSubpixels(intY, currAe.yStart, currAe.yEnd, py0, py1); |
|
536 |
||
537 |
//const __m128i csteps = _mm_set_epi16(4,4,4,4,4,4,4,4); |
|
538 |
||
539 |
__m128i yMask; |
|
540 |
||
541 |
if (py0 <= 4 && py1 >= 4) |
|
542 |
yMask = _mm_set1_epi8(-1); |
|
543 |
else |
|
544 |
yMask = _mm_set1_epi8(0); |
|
545 |
||
546 |
const __m128i dir = _mm_set1_epi16(currAe.direction); |
|
547 |
||
548 |
scanline.sseWinding = _mm_add_epi16(scanline.sseWinding, _mm_and_si128(yMask, dir)); |
|
549 |
//scanline.sseWinding = _mm_add_epi32(scanline.sseWinding, dir); |
|
550 |
||
551 |
#else |
|
552 |
RI_ASSERT(false); // Not implemented yet. |
|
553 |
#endif |
|
554 |
} |
|
555 |
||
556 |
RI_INLINE void calculateAEWinding(const Rasterizer::ActiveEdge& currAe, Rasterizer::Windings& pixel, Rasterizer::Windings& scanline, int intY, int pixelX) |
|
557 |
{ |
|
558 |
#define QUEEN_COORD(Y) ((Y<<(Rasterizer::RASTERIZER_BITS - Rasterizer::SAMPLE_BITS)) + (1<<(Rasterizer::RASTERIZER_BITS-Rasterizer::SAMPLE_BITS-1))) |
|
559 |
||
560 |
#if !defined(USE_SSE2) |
|
561 |
static const int queenCoords[(1<<Rasterizer::SAMPLE_BITS)] = { |
|
562 |
QUEEN_COORD(3), QUEEN_COORD(7), QUEEN_COORD(0), QUEEN_COORD(2), |
|
563 |
QUEEN_COORD(5), QUEEN_COORD(1), QUEEN_COORD(6), QUEEN_COORD(4) |
|
564 |
}; |
|
565 |
||
566 |
const int ix = pixelX >> Rasterizer::RASTERIZER_BITS; |
|
567 |
const int cy = intY << Rasterizer::Y_BITS; |
|
568 |
||
569 |
const int py0 = cy > currAe.yStart ? 0 : currAe.yStart & Rasterizer::Y_MASK; |
|
570 |
const int py1 = (RI_INT_MIN(currAe.yEnd, cy + (1<<Rasterizer::Y_BITS)) - 1) & Rasterizer::Y_MASK; |
|
571 |
||
572 |
int edgeX = currAe.xRef + (cy + py0 - (currAe.yStart & ~Rasterizer::Y_MASK)) * currAe.slope; |
|
573 |
||
574 |
RI_ASSERT(py1 >= py0); |
|
575 |
||
576 |
for (int s = py0; s <= py1; s++) |
|
577 |
{ |
|
578 |
const int sampleX = pixelX + queenCoords[s]; |
|
579 |
||
580 |
//compute winding number by evaluating the edge functions of edges to the left of the sampling point |
|
581 |
if(((edgeX >> Rasterizer::RASTERIZER_BITS) == ix)) |
|
582 |
{ |
|
583 |
if (sampleX >= edgeX) |
|
584 |
{ |
|
585 |
pixel.winding[s] += currAe.direction; |
|
586 |
} |
|
587 |
scanline.winding[s] += currAe.direction; |
|
588 |
} |
|
589 |
||
590 |
edgeX += currAe.slope; |
|
591 |
} |
|
592 |
#else |
|
593 |
||
594 |
__m128i qCoords = _mm_set_epi16( |
|
595 |
QUEEN_COORD(4), QUEEN_COORD(6), QUEEN_COORD(1), QUEEN_COORD(5), |
|
596 |
QUEEN_COORD(2), QUEEN_COORD(0), QUEEN_COORD(7), QUEEN_COORD(3)); |
|
597 |
||
598 |
RI_ASSERT(Rasterizer::RASTERIZER_BITS <= 14); |
|
599 |
||
600 |
// TEROP: Optimize conditions. |
|
601 |
int py0, py1; |
|
602 |
getVerticalSubpixels(intY, currAe.yStart, currAe.yEnd, py0, py1); |
|
603 |
||
604 |
const int cy = intY << Rasterizer::Y_BITS; |
|
605 |
||
606 |
const __m128i csteps0 = _mm_set_epi32(3,2,1,0); |
|
607 |
const __m128i csteps1 = _mm_set_epi32(7,6,5,4); |
|
608 |
||
609 |
const __m128i ssePy0 = _mm_set1_epi32(py0-1); |
|
610 |
const __m128i ssePy1 = _mm_set1_epi32(py1+1); |
|
611 |
||
612 |
const __m128i yMask0 = _mm_and_si128(_mm_cmpgt_epi32(csteps0, ssePy0), _mm_cmplt_epi32(csteps0, ssePy1)); |
|
613 |
const __m128i yMask1 = _mm_and_si128(_mm_cmpgt_epi32(csteps1, ssePy0), _mm_cmplt_epi32(csteps1, ssePy1)); |
|
614 |
||
615 |
const int edgeX = currAe.xRef + (cy - (currAe.yStart & ~Rasterizer::Y_MASK)) * currAe.slope; |
|
616 |
const __m128i xStart = _mm_set1_epi32(edgeX - pixelX); |
|
617 |
||
618 |
const __m128i xs0 = _mm_set1_epi32(currAe.slope); |
|
619 |
||
620 |
__m128i xAdd0 = mm_mul4x32(xs0, csteps0); |
|
621 |
__m128i xAdd1 = mm_mul4x32(xs0, csteps1); |
|
622 |
__m128i coords0 = _mm_add_epi32(xStart, xAdd0); |
|
623 |
__m128i coords1 = _mm_add_epi32(xStart, xAdd1); |
|
624 |
__m128i coords = _mm_packs_epi32(coords0, coords1); |
|
625 |
||
626 |
__m128i dir = _mm_set1_epi16(currAe.direction); |
|
627 |
__m128i yMask = _mm_packs_epi32(yMask0, yMask1); |
|
628 |
__m128i mDir = _mm_and_si128(dir, yMask); |
|
629 |
||
630 |
__m128i sampleCoords = qCoords; |
|
631 |
||
632 |
__m128i sw, pw; |
|
633 |
mm_get_xmasks(coords, sampleCoords, sw, pw); |
|
634 |
||
635 |
pixel.sseWinding = _mm_add_epi16(pixel.sseWinding, _mm_and_si128(pw, mDir)); |
|
636 |
scanline.sseWinding = _mm_add_epi16(scanline.sseWinding, _mm_and_si128(sw, mDir)); |
|
637 |
#endif |
|
638 |
||
639 |
#undef QUEEN_COORD |
|
640 |
||
641 |
} |
|
642 |
||
643 |
/** |
|
644 |
* \brief Calculate winding using one sample only. |
|
645 |
* \note This uses most of the same code as the AA-case even though it is not |
|
646 |
* necessary (one sample would be enough). |
|
647 |
*/ |
|
648 |
RI_INLINE void calculateAEWindingNoAA(const Rasterizer::ActiveEdge& currAe, Rasterizer::Windings& pixel, Rasterizer::Windings& scanline, int intY, int pixelX) |
|
649 |
{ |
|
650 |
#if defined(USE_SSE2) |
|
651 |
||
652 |
#define QUEEN_COORD(Y) ((Y<<(Rasterizer::RASTERIZER_BITS - Rasterizer::SAMPLE_BITS)) + (1<<(Rasterizer::RASTERIZER_BITS-Rasterizer::SAMPLE_BITS-1))) |
|
653 |
const int half = 1<<(Rasterizer::RASTERIZER_BITS-1); |
|
654 |
||
655 |
__m128i sampleCoords = _mm_set1_epi16(half); |
|
656 |
||
657 |
RI_ASSERT(Rasterizer::RASTERIZER_BITS <= 14); |
|
658 |
||
659 |
const int cy = intY << Rasterizer::Y_BITS; |
|
660 |
||
661 |
int py0, py1; |
|
662 |
getVerticalSubpixels(intY, currAe.yStart, currAe.yEnd, py0, py1); |
|
663 |
||
664 |
__m128i yMask; |
|
665 |
||
666 |
if (py0 <= 4 && py1 >= 4) |
|
667 |
yMask = _mm_set1_epi8(-1); |
|
668 |
else |
|
669 |
yMask = _mm_set1_epi8(0); |
|
670 |
||
671 |
const __m128i csteps0 = _mm_set_epi32(4,4,4,4); |
|
672 |
const __m128i csteps1 = _mm_set_epi32(4,4,4,4); |
|
673 |
||
674 |
const int edgeX = currAe.xRef + (cy - (currAe.yStart & ~Rasterizer::Y_MASK)) * currAe.slope; |
|
675 |
const __m128i xStart = _mm_set1_epi32(edgeX - pixelX); |
|
676 |
||
677 |
const __m128i xs0 = _mm_set1_epi32(currAe.slope); |
|
678 |
||
679 |
__m128i xAdd0 = mm_mul4x32(xs0, csteps0); |
|
680 |
__m128i xAdd1 = mm_mul4x32(xs0, csteps1); |
|
681 |
__m128i coords0 = _mm_add_epi32(xStart, xAdd0); |
|
682 |
__m128i coords1 = _mm_add_epi32(xStart, xAdd1); |
|
683 |
__m128i coords = _mm_packs_epi32(coords0, coords1); |
|
684 |
||
685 |
__m128i dir = _mm_set1_epi16(currAe.direction); |
|
686 |
__m128i mDir = _mm_and_si128(dir, yMask); |
|
687 |
//__m128i mDir = dir; |
|
688 |
||
689 |
__m128i sw, pw; |
|
690 |
mm_get_xmasks(coords, sampleCoords, sw, pw); |
|
691 |
||
692 |
pixel.sseWinding = _mm_add_epi16(pixel.sseWinding, _mm_and_si128(pw, mDir)); |
|
693 |
scanline.sseWinding = _mm_add_epi16(scanline.sseWinding, _mm_and_si128(sw, mDir)); |
|
694 |
||
695 |
#undef QUEEN_COORD |
|
696 |
||
697 |
#else |
|
698 |
RI_ASSERT(false); // Not implemented. |
|
699 |
#endif |
|
700 |
} |
|
701 |
||
702 |
#if defined(USE_SSE2) |
|
703 |
RI_INLINE static int mm_winding_to_coverage(const Rasterizer::Windings& pixel, int fillRuleMask) |
|
704 |
{ |
|
705 |
// This version uses SSE2 counters. |
|
706 |
__m128i mask = _mm_set1_epi16(fillRuleMask); |
|
707 |
__m128i t = _mm_and_si128(mask, pixel.sseWinding); |
|
708 |
__m128i z = _mm_setzero_si128(); |
|
709 |
__m128i isz = _mm_cmpeq_epi16(t, z); |
|
710 |
__m128i ones = _mm_set1_epi16(1); |
|
711 |
__m128i res = _mm_add_epi16(ones, isz); |
|
712 |
__m128i add0 = _mm_add_epi16(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(2, 3, 2, 3))); |
|
713 |
__m128i add1 = _mm_add_epi16(add0, _mm_shuffle_epi32(add0, _MM_SHUFFLE(1, 1, 1, 1))); |
|
714 |
__m128i add2 = _mm_add_epi16(add1, _mm_shufflelo_epi16(add1, _MM_SHUFFLE(1, 1, 1, 1))); |
|
715 |
||
716 |
int nSamples = _mm_cvtsi128_si32(add2) & 0xff; |
|
717 |
return nSamples; |
|
718 |
} |
|
719 |
#endif |
|
720 |
||
721 |
#define RI_DEBUG |
|
722 |
#if defined(RI_DEBUG) |
|
723 |
void maybeDumpEdges(Array<Rasterizer::ActiveEdge> &edgePool) |
|
724 |
{ |
|
725 |
return; |
|
726 |
// \note This gives an idea about the edges at the rasterization stage. |
|
727 |
// Input edges must be output at a different stage. |
|
728 |
RI_PRINTF("lines = []\n"); |
|
729 |
for (int i = 0 ; i < edgePool.size(); i++) |
|
730 |
{ |
|
731 |
const int slope = edgePool[i].slope; |
|
732 |
int x0, x1, y0, y1; |
|
733 |
y0 = edgePool[i].yStart; |
|
734 |
y1 = edgePool[i].yEnd; |
|
735 |
x0 = edgePool[i].xRef + (slope * (y0 & Rasterizer::Y_MASK)); |
|
736 |
x1 = (edgePool[i].xRef + (slope * (y1 - (y0 & ~Rasterizer::Y_MASK))))>>(Rasterizer::RASTERIZER_BITS-Rasterizer::X_BITS); |
|
737 |
RI_PRINTF("lines += [[%d, %d], [%d, %d]]\n",x0>>(Rasterizer::RASTERIZER_BITS-Rasterizer::X_BITS),y0,x1,y1); |
|
738 |
} |
|
739 |
} |
|
740 |
#endif |
|
741 |
||
742 |
/*-------------------------------------------------------------------*//*! |
|
743 |
* \brief Calls PixelPipe::pixelPipe for each pixel with coverage greater |
|
744 |
* than zero. |
|
745 |
* \param |
|
746 |
* \return |
|
747 |
* \note |
|
748 |
*//*-------------------------------------------------------------------*/ |
|
749 |
void Rasterizer::fill() |
|
750 |
{ |
|
751 |
if(m_scissor && !m_scissorEdges.size()) |
|
752 |
return; //scissoring is on, but there are no scissor rectangles => nothing is visible |
|
753 |
||
754 |
int firstAe = 0; |
|
755 |
||
756 |
//proceed scanline by scanline |
|
757 |
//keep track of edges that can intersect the pixel filters of the current scanline (Active Edge Table) |
|
758 |
//until all pixels of the scanline have been processed |
|
759 |
// for all sampling points of the current pixel |
|
760 |
// determine the winding number using edge functions |
|
761 |
// add filter weight to coverage |
|
762 |
// divide coverage by the number of samples |
|
763 |
// determine a run of pixels with constant coverage |
|
764 |
// call fill callback for each pixel of the run |
|
765 |
||
766 |
const int fillRuleMask = m_fillRuleMask; |
|
767 |
||
768 |
int bbminx = (m_edgeMin.x >> X_BITS); |
|
769 |
int bbminy = (m_edgeMin.y >> Y_BITS); |
|
770 |
int bbmaxx = (m_edgeMax.x >> X_BITS)+1; |
|
771 |
int bbmaxy = (m_edgeMax.y >> Y_BITS)+1; |
|
772 |
int sx = RI_INT_MAX(m_vpx, bbminx); |
|
773 |
int ex = RI_INT_MIN(m_vpx+m_vpwidth, bbmaxx); |
|
774 |
int sy = RI_INT_MAX(m_vpy, bbminy); |
|
775 |
int ey = RI_INT_MIN(m_vpy+m_vpheight, bbmaxy); |
|
776 |
if(sx < m_covMinx) m_covMinx = sx; |
|
777 |
if(sy < m_covMiny) m_covMiny = sy; |
|
778 |
if(ex > m_covMaxx) m_covMaxx = ex; |
|
779 |
if(ey > m_covMaxy) m_covMaxy = ey; |
|
780 |
||
781 |
#if 0 |
|
782 |
// Dump edges: |
|
783 |
static bool dump = true; |
|
784 |
if (dump) |
|
785 |
{ |
|
786 |
RI_PRINTF("lines = []\n"); |
|
787 |
for (int ie = 0; dump && ie < m_edgePool.size(); ie++) |
|
788 |
{ |
|
789 |
RI_PRINTF("lines += [[%d, %d], [%d, %d]]\n",m_edgePool[ie].v0.x, m_edgePool[ie].v0.y, m_edgePool[ie].v1.x, m_edgePool[ie].v1.y); |
|
790 |
} |
|
791 |
dump = false; |
|
792 |
} |
|
793 |
||
794 |
#endif |
|
795 |
m_aet.clear(); |
|
796 |
||
797 |
#if defined(RI_DEBUG) |
|
798 |
maybeDumpEdges(m_edgePool); |
|
799 |
#endif |
|
800 |
||
801 |
//fill the screen |
|
802 |
for(int j = sy; j < ey; j++) |
|
803 |
{ |
|
804 |
Windings scanlineWinding; |
|
805 |
const int cminy = j << Y_BITS; |
|
806 |
||
807 |
if (m_scissor) |
|
808 |
{ |
|
809 |
// Gather scissor edges intersecting this scanline |
|
810 |
// \todo Don't clear, remove unused instead! |
|
811 |
m_scissorAet.clear(); |
|
812 |
||
813 |
for(int e = 0; e < m_scissorEdges.size(); e++) |
|
814 |
{ |
|
815 |
const ScissorEdge& se = m_scissorEdges[e]; |
|
816 |
||
817 |
if(j >= se.miny && j < se.maxy) |
|
818 |
m_scissorAet.push_back(m_scissorEdges[e]); //throws bad_alloc |
|
819 |
} |
|
820 |
||
821 |
//sort scissor AET by edge x |
|
822 |
if (m_scissor) |
|
823 |
m_scissorAet.sort(); |
|
824 |
} |
|
825 |
||
826 |
// Drop unused edges, update remaining. |
|
827 |
// \todo Combine with full sweep. Use a sort-friendly edge-discard. |
|
828 |
for (int iae = firstAe; iae < m_aet.size(); iae++) |
|
829 |
{ |
|
830 |
ActiveEdge& ae = m_aet[iae]; |
|
831 |
||
832 |
if (cminy >= ae.yEnd) |
|
833 |
{ |
|
834 |
m_aet[iae] = m_aet[firstAe]; |
|
835 |
firstAe++; |
|
836 |
continue; |
|
837 |
} |
|
838 |
||
839 |
/* Update existing coordinates */ |
|
840 |
// \todo AND instead of shift. See other places also. |
|
841 |
const int y0 = (ae.yStart & ~Y_MASK); |
|
842 |
const int x = ae.xRef + ((j << Y_BITS) - y0) * ae.slope; |
|
843 |
ae.minx = x >> RASTERIZER_BITS; |
|
844 |
ae.maxx = (x + ae.slope * (1<<Y_BITS)) >> RASTERIZER_BITS; |
|
845 |
||
846 |
if (ae.minx > ae.maxx) |
|
847 |
RI_ANY_SWAP(ActiveEdge::XCoord, ae.minx, ae.maxx); |
|
848 |
||
849 |
// If the edge is not visible, "mark" it as immediately applicable |
|
850 |
// \todo Verify that this is the correct procedure. |
|
851 |
||
852 |
if (ae.maxx < 0) |
|
853 |
ae.minx = ae.maxx = LEFT_DISCARD_SHORT; |
|
854 |
} |
|
855 |
||
856 |
/* Add new edges */ |
|
857 |
||
858 |
RIuint32 aeIndex = m_edges[j]; |
|
859 |
while (aeIndex != EDGE_TERMINATOR) |
|
860 |
{ |
|
861 |
const ActiveEdge& ae = m_edgePool[aeIndex]; |
|
862 |
m_aet.push_back(ae); // \todo Just copy pointers? |
|
863 |
aeIndex = ae.next; |
|
864 |
} |
|
865 |
||
866 |
if (firstAe >= m_aet.size()) |
|
867 |
{ |
|
868 |
RI_ASSERT(firstAe == m_aet.size()); |
|
869 |
continue; //no edges on the whole scanline, skip it |
|
870 |
} |
|
871 |
||
872 |
//sort AET by edge minx |
|
873 |
m_aet.sort(firstAe, m_aet.size() - 1); |
|
874 |
||
875 |
// \todo Optimize adding and updating the edges? |
|
876 |
if (m_scissor && !m_scissorAet.size()) |
|
877 |
continue; // Scissoring is on, but there are no scissor rectangles on this scanline. |
|
878 |
||
879 |
//fill the scanline |
|
880 |
int scissorWinding = m_scissor ? 0 : 1; //if scissoring is off, winding is always 1 |
|
881 |
int scissorIndex = 0; |
|
882 |
int aes = firstAe; |
|
883 |
int aen = firstAe; |
|
884 |
||
885 |
RI_ASSERT(sx >= 0); |
|
886 |
||
887 |
#if 1 |
|
888 |
if (m_aa) |
|
889 |
{ |
|
890 |
while ((aen < m_aet.size()) && (m_aet[aen].maxx < 0)) |
|
891 |
{ |
|
892 |
applyLeftEdge(m_aet[aen], scanlineWinding, j); |
|
893 |
aen++; |
|
894 |
} |
|
895 |
} |
|
896 |
else |
|
897 |
{ |
|
898 |
while ((aen < m_aet.size()) && (m_aet[aen].maxx < 0)) |
|
899 |
{ |
|
900 |
applyLeftEdgeNoAA(m_aet[aen], scanlineWinding, j); |
|
901 |
aen++; |
|
902 |
} |
|
903 |
} |
|
904 |
||
905 |
#if defined(RI_DEBUG) |
|
906 |
for (int a = aen; a < m_aet.size(); a++) |
|
907 |
{ |
|
908 |
RI_ASSERT(m_aet[a].maxx >= 0); |
|
909 |
} |
|
910 |
#endif |
|
911 |
#endif |
|
912 |
||
913 |
// \todo Combine this with the first check or reorganize the "clipping". |
|
914 |
if (aen >= m_aet.size()) |
|
915 |
continue; // No edges within viewport. Can happen atm. when all edges are "left". |
|
916 |
||
917 |
for(int i = sx; i < ex;) |
|
918 |
{ |
|
919 |
//find edges that intersect or are to the left of the pixel antialiasing filter |
|
920 |
while(aes < m_aet.size() && (i + 1) >= m_aet[aes].minx) |
|
921 |
aes++; |
|
922 |
//edges [0,aes[ may have an effect on winding, and need to be evaluated while sampling |
|
923 |
||
924 |
// RIint8 winding[SF_SAMPLES]; |
|
925 |
Windings pixelWinding; |
|
926 |
||
927 |
pixelWinding = scanlineWinding; |
|
928 |
||
929 |
if (m_aa) |
|
930 |
{ |
|
931 |
for(int e = aen; e < aes; e++) |
|
932 |
{ |
|
933 |
const ActiveEdge& currAe = m_aet[e]; |
|
934 |
calculateAEWinding(currAe, pixelWinding, scanlineWinding, j, i << RASTERIZER_BITS); |
|
935 |
} |
|
936 |
} |
|
937 |
else |
|
938 |
{ |
|
939 |
for(int e = aen; e < aes; e++) |
|
940 |
{ |
|
941 |
const ActiveEdge& currAe = m_aet[e]; |
|
942 |
calculateAEWindingNoAA(currAe, pixelWinding, scanlineWinding, j, i << RASTERIZER_BITS); |
|
943 |
} |
|
944 |
} |
|
945 |
||
946 |
//compute coverage |
|
947 |
int coverageSamples = 0; |
|
948 |
#if !defined(USE_SSE2) |
|
949 |
||
950 |
for (int s = 0; s < SF_SAMPLES; s++) |
|
951 |
{ |
|
952 |
if(pixelWinding.winding[s]) |
|
953 |
{ |
|
954 |
coverageSamples++; |
|
955 |
} |
|
956 |
} |
|
957 |
#else |
|
958 |
coverageSamples = mm_winding_to_coverage(pixelWinding, fillRuleMask); |
|
959 |
_mm_empty(); |
|
960 |
#endif |
|
961 |
||
962 |
//constant coverage optimization: |
|
963 |
//scan AET from left to right and skip all the edges that are completely to the left of the pixel filter. |
|
964 |
//since AET is sorted by minx, the edge we stop at is the leftmost of the edges we haven't passed yet. |
|
965 |
//if that edge is to the right of this pixel, coverage is constant between this pixel and the start of the edge. |
|
966 |
while(aen < m_aet.size() && m_aet[aen].maxx < i) |
|
967 |
aen++; |
|
968 |
||
969 |
int endSpan = m_vpx + m_vpwidth; // endSpan is the first pixel NOT part of the span |
|
970 |
||
971 |
if(aen < m_aet.size()) |
|
972 |
{ |
|
973 |
endSpan = RI_INT_MAX(i+1, RI_INT_MIN(endSpan, m_aet[aen].minx)); |
|
974 |
} |
|
975 |
||
976 |
//fill a run of pixels with constant coverage |
|
977 |
if(coverageSamples) |
|
978 |
{ |
|
979 |
||
980 |
if (!m_scissor) |
|
981 |
{ |
|
982 |
int fillStartX = i; /* Inclusive */ |
|
983 |
pushSpan(fillStartX, j, (endSpan - fillStartX), coverageSamples); |
|
984 |
} |
|
985 |
else // (scissor) |
|
986 |
{ |
|
987 |
int fillStartX = i; |
|
988 |
//update scissor winding number |
|
989 |
||
990 |
/* \todo Sort the scissor edges and skip unnecessary checks when scissors are used */ |
|
991 |
while (scissorIndex < m_scissorAet.size() && m_scissorAet[scissorIndex].x <= fillStartX) |
|
992 |
{ |
|
993 |
scissorWinding += m_scissorAet[scissorIndex++].direction; |
|
994 |
} |
|
995 |
||
996 |
while (!scissorWinding && scissorIndex < m_scissorAet.size() && m_scissorAet[scissorIndex].x < endSpan) |
|
997 |
{ |
|
998 |
fillStartX = m_scissorAet[scissorIndex].x; |
|
999 |
scissorWinding += m_scissorAet[scissorIndex++].direction; |
|
1000 |
RI_ASSERT(fillStartX >= i); |
|
1001 |
} |
|
1002 |
||
1003 |
RI_ASSERT(scissorWinding >= 0); |
|
1004 |
||
1005 |
int endScissorSpan = endSpan; |
|
1006 |
||
1007 |
while (scissorWinding && fillStartX < endSpan && (scissorIndex < m_scissorAet.size())) |
|
1008 |
{ |
|
1009 |
||
1010 |
// Determine the end of renderable area: |
|
1011 |
while (scissorWinding && scissorIndex < m_scissorAet.size() && m_scissorAet[scissorIndex].x <= endSpan) |
|
1012 |
{ |
|
1013 |
endScissorSpan = m_scissorAet[scissorIndex].x; |
|
1014 |
scissorWinding += m_scissorAet[scissorIndex++].direction; |
|
1015 |
} |
|
1016 |
||
1017 |
RI_ASSERT(fillStartX >= i); |
|
1018 |
RI_ASSERT(endScissorSpan <= endSpan); |
|
1019 |
||
1020 |
pushSpan(fillStartX, j, (endScissorSpan - fillStartX), coverageSamples); |
|
1021 |
fillStartX = endScissorSpan; |
|
1022 |
endScissorSpan = endSpan; |
|
1023 |
||
1024 |
// Skip until within drawable area |
|
1025 |
while (!scissorWinding && scissorIndex < m_scissorAet.size() && m_scissorAet[scissorIndex].x < endSpan) |
|
1026 |
{ |
|
1027 |
fillStartX = m_scissorAet[scissorIndex].x; |
|
1028 |
scissorWinding += m_scissorAet[scissorIndex++].direction; |
|
1029 |
} |
|
1030 |
||
1031 |
} |
|
1032 |
} |
|
1033 |
} |
|
1034 |
i = endSpan; |
|
1035 |
} |
|
1036 |
} |
|
1037 |
commitSpans(); |
|
1038 |
#if defined(USE_SSE2) |
|
1039 |
_mm_empty(); |
|
1040 |
#endif |
|
1041 |
clear(); |
|
1042 |
} |
|
1043 |
||
1044 |
RI_INLINE void Rasterizer::commitSpans() |
|
1045 |
{ |
|
1046 |
if (!m_nSpans) |
|
1047 |
return; |
|
1048 |
||
1049 |
m_pixelPipe->fillSpans(m_ppVariants, m_spanCache, m_nSpans); |
|
1050 |
m_nSpans = 0; |
|
1051 |
||
1052 |
} |
|
1053 |
||
1054 |
RI_INLINE void Rasterizer::pushSpan(int x, int y, int len, int coverage) |
|
1055 |
{ |
|
1056 |
//printf("x: %d, y: %d, len: %d, coverage: %d\n", x, y, len, coverage); |
|
1057 |
// \todo Check what causes this with scissors |
|
1058 |
if (len <= 0) return; |
|
1059 |
//RI_ASSERT(len > 0); |
|
1060 |
||
1061 |
Span& span = m_spanCache[m_nSpans]; |
|
1062 |
||
1063 |
span.x0 = x; |
|
1064 |
span.y = y; |
|
1065 |
span.len = (RIuint16)len; |
|
1066 |
span.coverage = coverage; |
|
1067 |
||
1068 |
m_nSpans++; |
|
1069 |
||
1070 |
if (m_nSpans == N_CACHED_SPANS) |
|
1071 |
{ |
|
1072 |
commitSpans(); |
|
1073 |
} |
|
1074 |
} |
|
1075 |
||
1076 |
//======================================================================= |
|
1077 |
||
1078 |
} //namespace OpenVGRI |