171 return -1; |
171 return -1; |
172 } |
172 } |
173 return 1; |
173 return 1; |
174 } |
174 } |
175 |
175 |
176 // Unicode case-insensitive comparison |
|
177 static int ucstrcmp(const QChar *a, int alen, const QChar *b, int blen) |
|
178 { |
|
179 if (a == b && alen == blen) |
|
180 return 0; |
|
181 int l = qMin(alen, blen); |
|
182 while (l-- && *a == *b) |
|
183 a++,b++; |
|
184 if (l == -1) |
|
185 return (alen-blen); |
|
186 return a->unicode() - b->unicode(); |
|
187 } |
|
188 |
|
189 // Unicode case-sensitive compare two same-sized strings |
176 // Unicode case-sensitive compare two same-sized strings |
190 static int ucstrncmp(const QChar *a, const QChar *b, int l) |
177 static int ucstrncmp(const QChar *a, const QChar *b, int l) |
191 { |
178 { |
192 while (l-- && *a == *b) |
179 while (l-- && *a == *b) |
193 a++,b++; |
180 a++,b++; |
194 if (l==-1) |
181 if (l==-1) |
195 return 0; |
182 return 0; |
196 return a->unicode() - b->unicode(); |
183 return a->unicode() - b->unicode(); |
197 } |
184 } |
198 |
185 |
|
186 // Unicode case-sensitive comparison |
|
187 static int ucstrcmp(const QChar *a, int alen, const QChar *b, int blen) |
|
188 { |
|
189 if (a == b && alen == blen) |
|
190 return 0; |
|
191 int l = qMin(alen, blen); |
|
192 int cmp = ucstrncmp(a, b, l); |
|
193 return cmp ? cmp : (alen-blen); |
|
194 } |
|
195 |
199 // Unicode case-insensitive compare two same-sized strings |
196 // Unicode case-insensitive compare two same-sized strings |
200 static int ucstrnicmp(const ushort *a, const ushort *b, int l) |
197 static int ucstrnicmp(const ushort *a, const ushort *b, int l) |
201 { |
198 { |
202 return ucstricmp(a, a + l, b, b + l); |
199 return ucstricmp(a, a + l, b, b + l); |
203 } |
200 } |
204 |
201 |
|
202 // Benchmarking indicates that doing memcmp is much slower than |
|
203 // executing the comparison ourselves. |
|
204 // |
|
205 // The profiling was done on a population of calls to qMemEquals, generated |
|
206 // during a run of the demo browser. The profile of the data (32-bit x86 |
|
207 // Linux) was: |
|
208 // |
|
209 // total number of comparisons: 21353 |
|
210 // longest string compared: 95 |
|
211 // average comparison length: 14.8786 |
|
212 // cache-line crosses: 5661 (13.3%) |
|
213 // alignment histogram: |
|
214 // 0xXXX0 = 512 (1.2%) strings, 0 (0.0%) of which same-aligned |
|
215 // 0xXXX2 = 15087 (35.3%) strings, 5145 (34.1%) of which same-aligned |
|
216 // 0xXXX4 = 525 (1.2%) strings, 0 (0.0%) of which same-aligned |
|
217 // 0xXXX6 = 557 (1.3%) strings, 6 (1.1%) of which same-aligned |
|
218 // 0xXXX8 = 509 (1.2%) strings, 0 (0.0%) of which same-aligned |
|
219 // 0xXXXa = 24358 (57.0%) strings, 9901 (40.6%) of which same-aligned |
|
220 // 0xXXXc = 557 (1.3%) strings, 0 (0.0%) of which same-aligned |
|
221 // 0xXXXe = 601 (1.4%) strings, 15 (2.5%) of which same-aligned |
|
222 // total = 42706 (100%) strings, 15067 (35.3%) of which same-aligned |
|
223 // |
|
224 // 92% of the strings have alignment of 2 or 10, which is due to malloc on |
|
225 // 32-bit Linux returning values aligned to 8 bytes, and offsetof(array, QString::Data) == 18. |
|
226 // |
|
227 // The profile on 64-bit will be different since offsetof(array, QString::Data) == 26. |
|
228 // |
|
229 // The benchmark results were, for a Core-i7 @ 2.67 GHz 32-bit, compiled with -O3 -funroll-loops: |
|
230 // 16-bit loads only: 872,301 CPU ticks [Qt 4.5 / memcmp] |
|
231 // 32- and 16-bit loads: 773,362 CPU ticks [Qt 4.6] |
|
232 // SSE2 "movdqu" 128-bit loads: 618,736 CPU ticks |
|
233 // SSE3 "lddqu" 128-bit loads: 619,954 CPU ticks |
|
234 // SSSE3 "palignr" corrections: 852,147 CPU ticks |
|
235 // SSE4.2 "pcmpestrm": 738,702 CPU ticks |
|
236 // |
|
237 // The same benchmark on an Atom N450 @ 1.66 GHz, is: |
|
238 // 16-bit loads only: 2,185,882 CPU ticks |
|
239 // 32- and 16-bit loads: 1,805,060 CPU ticks |
|
240 // SSE2 "movdqu" 128-bit loads: 2,529,843 CPU ticks |
|
241 // SSE3 "lddqu" 128-bit loads: 2,514,858 CPU ticks |
|
242 // SSSE3 "palignr" corrections: 2,160,325 CPU ticks |
|
243 // SSE4.2 not available |
|
244 // |
|
245 // The conclusion we reach is that alignment the SSE2 unaligned code can gain |
|
246 // 20% improvement in performance in some systems, but suffers a penalty due |
|
247 // to the unaligned loads on others. |
|
248 |
205 static bool qMemEquals(const quint16 *a, const quint16 *b, int length) |
249 static bool qMemEquals(const quint16 *a, const quint16 *b, int length) |
206 { |
250 { |
207 // Benchmarking indicates that doing memcmp is much slower than |
|
208 // executing the comparison ourselves. |
|
209 // To make it even faster, we do a 32-bit comparison, comparing |
|
210 // twice the amount of data as a normal word-by-word comparison. |
|
211 // |
|
212 // Benchmarking results on a 2.33 GHz Core2 Duo, with a 64-QChar |
|
213 // block of data, with 4194304 iterations (per iteration): |
|
214 // operation usec cpu ticks |
|
215 // memcmp 330 710 |
|
216 // 16-bit 79 167-171 |
|
217 // 32-bit aligned 49 105-109 |
|
218 // |
|
219 // Testing also indicates that unaligned 32-bit loads are as |
|
220 // performant as 32-bit aligned. |
|
221 if (a == b || !length) |
251 if (a == b || !length) |
222 return true; |
252 return true; |
223 |
253 |
224 register union { |
254 register union { |
225 const quint16 *w; |
255 const quint16 *w; |
6643 arg = QLocale::c().d()->longLongToString(a, -1, base, fieldWidth, flags); |
6673 arg = QLocale::c().d()->longLongToString(a, -1, base, fieldWidth, flags); |
6644 |
6674 |
6645 QString locale_arg; |
6675 QString locale_arg; |
6646 if (d.locale_occurrences > 0) { |
6676 if (d.locale_occurrences > 0) { |
6647 QLocale locale; |
6677 QLocale locale; |
6648 locale_arg = locale.d()->longLongToString(a, -1, base, fieldWidth, |
6678 if (!locale.numberOptions() & QLocale::OmitGroupSeparator) |
6649 flags | QLocalePrivate::ThousandsGroup); |
6679 flags |= QLocalePrivate::ThousandsGroup; |
|
6680 locale_arg = locale.d()->longLongToString(a, -1, base, fieldWidth, flags); |
6650 } |
6681 } |
6651 |
6682 |
6652 return replaceArgEscapes(*this, d, fieldWidth, arg, locale_arg, fillChar); |
6683 return replaceArgEscapes(*this, d, fieldWidth, arg, locale_arg, fillChar); |
6653 } |
6684 } |
6654 |
6685 |
6686 arg = QLocale::c().d()->unsLongLongToString(a, -1, base, fieldWidth, flags); |
6717 arg = QLocale::c().d()->unsLongLongToString(a, -1, base, fieldWidth, flags); |
6687 |
6718 |
6688 QString locale_arg; |
6719 QString locale_arg; |
6689 if (d.locale_occurrences > 0) { |
6720 if (d.locale_occurrences > 0) { |
6690 QLocale locale; |
6721 QLocale locale; |
6691 locale_arg = locale.d()->unsLongLongToString(a, -1, base, fieldWidth, |
6722 if (!locale.numberOptions() & QLocale::OmitGroupSeparator) |
6692 flags | QLocalePrivate::ThousandsGroup); |
6723 flags |= QLocalePrivate::ThousandsGroup; |
|
6724 locale_arg = locale.d()->unsLongLongToString(a, -1, base, fieldWidth, flags); |
6693 } |
6725 } |
6694 |
6726 |
6695 return replaceArgEscapes(*this, d, fieldWidth, arg, locale_arg, fillChar); |
6727 return replaceArgEscapes(*this, d, fieldWidth, arg, locale_arg, fillChar); |
6696 } |
6728 } |
6697 |
6729 |