|
1 /* NOTE: this API is -ONLY- for use with single byte character strings. */ |
|
2 /* Do not use it with Unicode. */ |
|
3 |
|
4 #include "bytes_methods.h" |
|
5 |
|
6 #ifndef STRINGLIB_MUTABLE |
|
7 #warning "STRINGLIB_MUTABLE not defined before #include, assuming 0" |
|
8 #define STRINGLIB_MUTABLE 0 |
|
9 #endif |
|
10 |
|
11 /* the more complicated methods. parts of these should be pulled out into the |
|
12 shared code in bytes_methods.c to cut down on duplicate code bloat. */ |
|
13 |
|
14 PyDoc_STRVAR(expandtabs__doc__, |
|
15 "B.expandtabs([tabsize]) -> copy of B\n\ |
|
16 \n\ |
|
17 Return a copy of B where all tab characters are expanded using spaces.\n\ |
|
18 If tabsize is not given, a tab size of 8 characters is assumed."); |
|
19 |
|
20 static PyObject* |
|
21 stringlib_expandtabs(PyObject *self, PyObject *args) |
|
22 { |
|
23 const char *e, *p; |
|
24 char *q; |
|
25 Py_ssize_t i, j, old_j; |
|
26 PyObject *u; |
|
27 int tabsize = 8; |
|
28 |
|
29 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) |
|
30 return NULL; |
|
31 |
|
32 /* First pass: determine size of output string */ |
|
33 i = j = old_j = 0; |
|
34 e = STRINGLIB_STR(self) + STRINGLIB_LEN(self); |
|
35 for (p = STRINGLIB_STR(self); p < e; p++) |
|
36 if (*p == '\t') { |
|
37 if (tabsize > 0) { |
|
38 j += tabsize - (j % tabsize); |
|
39 /* XXX: this depends on a signed integer overflow to < 0 */ |
|
40 /* C compilers, including gcc, do -NOT- guarantee this. */ |
|
41 if (old_j > j) { |
|
42 PyErr_SetString(PyExc_OverflowError, |
|
43 "result is too long"); |
|
44 return NULL; |
|
45 } |
|
46 old_j = j; |
|
47 } |
|
48 } |
|
49 else { |
|
50 j++; |
|
51 if (*p == '\n' || *p == '\r') { |
|
52 i += j; |
|
53 old_j = j = 0; |
|
54 /* XXX: this depends on a signed integer overflow to < 0 */ |
|
55 /* C compilers, including gcc, do -NOT- guarantee this. */ |
|
56 if (i < 0) { |
|
57 PyErr_SetString(PyExc_OverflowError, |
|
58 "result is too long"); |
|
59 return NULL; |
|
60 } |
|
61 } |
|
62 } |
|
63 |
|
64 if ((i + j) < 0) { |
|
65 /* XXX: this depends on a signed integer overflow to < 0 */ |
|
66 /* C compilers, including gcc, do -NOT- guarantee this. */ |
|
67 PyErr_SetString(PyExc_OverflowError, "result is too long"); |
|
68 return NULL; |
|
69 } |
|
70 |
|
71 /* Second pass: create output string and fill it */ |
|
72 u = STRINGLIB_NEW(NULL, i + j); |
|
73 if (!u) |
|
74 return NULL; |
|
75 |
|
76 j = 0; |
|
77 q = STRINGLIB_STR(u); |
|
78 |
|
79 for (p = STRINGLIB_STR(self); p < e; p++) |
|
80 if (*p == '\t') { |
|
81 if (tabsize > 0) { |
|
82 i = tabsize - (j % tabsize); |
|
83 j += i; |
|
84 while (i--) |
|
85 *q++ = ' '; |
|
86 } |
|
87 } |
|
88 else { |
|
89 j++; |
|
90 *q++ = *p; |
|
91 if (*p == '\n' || *p == '\r') |
|
92 j = 0; |
|
93 } |
|
94 |
|
95 return u; |
|
96 } |
|
97 |
|
98 Py_LOCAL_INLINE(PyObject *) |
|
99 pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill) |
|
100 { |
|
101 PyObject *u; |
|
102 |
|
103 if (left < 0) |
|
104 left = 0; |
|
105 if (right < 0) |
|
106 right = 0; |
|
107 |
|
108 if (left == 0 && right == 0 && STRINGLIB_CHECK_EXACT(self)) { |
|
109 #if STRINGLIB_MUTABLE |
|
110 /* We're defined as returning a copy; If the object is mutable |
|
111 * that means we must make an identical copy. */ |
|
112 return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); |
|
113 #else |
|
114 Py_INCREF(self); |
|
115 return (PyObject *)self; |
|
116 #endif /* STRINGLIB_MUTABLE */ |
|
117 } |
|
118 |
|
119 u = STRINGLIB_NEW(NULL, |
|
120 left + STRINGLIB_LEN(self) + right); |
|
121 if (u) { |
|
122 if (left) |
|
123 memset(STRINGLIB_STR(u), fill, left); |
|
124 Py_MEMCPY(STRINGLIB_STR(u) + left, |
|
125 STRINGLIB_STR(self), |
|
126 STRINGLIB_LEN(self)); |
|
127 if (right) |
|
128 memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self), |
|
129 fill, right); |
|
130 } |
|
131 |
|
132 return u; |
|
133 } |
|
134 |
|
135 PyDoc_STRVAR(ljust__doc__, |
|
136 "B.ljust(width[, fillchar]) -> copy of B\n" |
|
137 "\n" |
|
138 "Return B left justified in a string of length width. Padding is\n" |
|
139 "done using the specified fill character (default is a space)."); |
|
140 |
|
141 static PyObject * |
|
142 stringlib_ljust(PyObject *self, PyObject *args) |
|
143 { |
|
144 Py_ssize_t width; |
|
145 char fillchar = ' '; |
|
146 |
|
147 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar)) |
|
148 return NULL; |
|
149 |
|
150 if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { |
|
151 #if STRINGLIB_MUTABLE |
|
152 /* We're defined as returning a copy; If the object is mutable |
|
153 * that means we must make an identical copy. */ |
|
154 return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); |
|
155 #else |
|
156 Py_INCREF(self); |
|
157 return (PyObject*) self; |
|
158 #endif |
|
159 } |
|
160 |
|
161 return pad(self, 0, width - STRINGLIB_LEN(self), fillchar); |
|
162 } |
|
163 |
|
164 |
|
165 PyDoc_STRVAR(rjust__doc__, |
|
166 "B.rjust(width[, fillchar]) -> copy of B\n" |
|
167 "\n" |
|
168 "Return B right justified in a string of length width. Padding is\n" |
|
169 "done using the specified fill character (default is a space)"); |
|
170 |
|
171 static PyObject * |
|
172 stringlib_rjust(PyObject *self, PyObject *args) |
|
173 { |
|
174 Py_ssize_t width; |
|
175 char fillchar = ' '; |
|
176 |
|
177 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar)) |
|
178 return NULL; |
|
179 |
|
180 if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { |
|
181 #if STRINGLIB_MUTABLE |
|
182 /* We're defined as returning a copy; If the object is mutable |
|
183 * that means we must make an identical copy. */ |
|
184 return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); |
|
185 #else |
|
186 Py_INCREF(self); |
|
187 return (PyObject*) self; |
|
188 #endif |
|
189 } |
|
190 |
|
191 return pad(self, width - STRINGLIB_LEN(self), 0, fillchar); |
|
192 } |
|
193 |
|
194 |
|
195 PyDoc_STRVAR(center__doc__, |
|
196 "B.center(width[, fillchar]) -> copy of B\n" |
|
197 "\n" |
|
198 "Return B centered in a string of length width. Padding is\n" |
|
199 "done using the specified fill character (default is a space)."); |
|
200 |
|
201 static PyObject * |
|
202 stringlib_center(PyObject *self, PyObject *args) |
|
203 { |
|
204 Py_ssize_t marg, left; |
|
205 Py_ssize_t width; |
|
206 char fillchar = ' '; |
|
207 |
|
208 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar)) |
|
209 return NULL; |
|
210 |
|
211 if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { |
|
212 #if STRINGLIB_MUTABLE |
|
213 /* We're defined as returning a copy; If the object is mutable |
|
214 * that means we must make an identical copy. */ |
|
215 return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); |
|
216 #else |
|
217 Py_INCREF(self); |
|
218 return (PyObject*) self; |
|
219 #endif |
|
220 } |
|
221 |
|
222 marg = width - STRINGLIB_LEN(self); |
|
223 left = marg / 2 + (marg & width & 1); |
|
224 |
|
225 return pad(self, left, marg - left, fillchar); |
|
226 } |
|
227 |
|
228 PyDoc_STRVAR(zfill__doc__, |
|
229 "B.zfill(width) -> copy of B\n" |
|
230 "\n" |
|
231 "Pad a numeric string B with zeros on the left, to fill a field\n" |
|
232 "of the specified width. B is never truncated."); |
|
233 |
|
234 static PyObject * |
|
235 stringlib_zfill(PyObject *self, PyObject *args) |
|
236 { |
|
237 Py_ssize_t fill; |
|
238 PyObject *s; |
|
239 char *p; |
|
240 Py_ssize_t width; |
|
241 |
|
242 if (!PyArg_ParseTuple(args, "n:zfill", &width)) |
|
243 return NULL; |
|
244 |
|
245 if (STRINGLIB_LEN(self) >= width) { |
|
246 if (STRINGLIB_CHECK_EXACT(self)) { |
|
247 #if STRINGLIB_MUTABLE |
|
248 /* We're defined as returning a copy; If the object is mutable |
|
249 * that means we must make an identical copy. */ |
|
250 return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); |
|
251 #else |
|
252 Py_INCREF(self); |
|
253 return (PyObject*) self; |
|
254 #endif |
|
255 } |
|
256 else |
|
257 return STRINGLIB_NEW( |
|
258 STRINGLIB_STR(self), |
|
259 STRINGLIB_LEN(self) |
|
260 ); |
|
261 } |
|
262 |
|
263 fill = width - STRINGLIB_LEN(self); |
|
264 |
|
265 s = pad(self, fill, 0, '0'); |
|
266 |
|
267 if (s == NULL) |
|
268 return NULL; |
|
269 |
|
270 p = STRINGLIB_STR(s); |
|
271 if (p[fill] == '+' || p[fill] == '-') { |
|
272 /* move sign to beginning of string */ |
|
273 p[0] = p[fill]; |
|
274 p[fill] = '0'; |
|
275 } |
|
276 |
|
277 return (PyObject*) s; |
|
278 } |
|
279 |
|
280 |
|
281 #define _STRINGLIB_SPLIT_APPEND(data, left, right) \ |
|
282 str = STRINGLIB_NEW((data) + (left), \ |
|
283 (right) - (left)); \ |
|
284 if (str == NULL) \ |
|
285 goto onError; \ |
|
286 if (PyList_Append(list, str)) { \ |
|
287 Py_DECREF(str); \ |
|
288 goto onError; \ |
|
289 } \ |
|
290 else \ |
|
291 Py_DECREF(str); |
|
292 |
|
293 PyDoc_STRVAR(splitlines__doc__, |
|
294 "B.splitlines([keepends]) -> list of lines\n\ |
|
295 \n\ |
|
296 Return a list of the lines in B, breaking at line boundaries.\n\ |
|
297 Line breaks are not included in the resulting list unless keepends\n\ |
|
298 is given and true."); |
|
299 |
|
300 static PyObject* |
|
301 stringlib_splitlines(PyObject *self, PyObject *args) |
|
302 { |
|
303 register Py_ssize_t i; |
|
304 register Py_ssize_t j; |
|
305 Py_ssize_t len; |
|
306 int keepends = 0; |
|
307 PyObject *list; |
|
308 PyObject *str; |
|
309 char *data; |
|
310 |
|
311 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) |
|
312 return NULL; |
|
313 |
|
314 data = STRINGLIB_STR(self); |
|
315 len = STRINGLIB_LEN(self); |
|
316 |
|
317 /* This does not use the preallocated list because splitlines is |
|
318 usually run with hundreds of newlines. The overhead of |
|
319 switching between PyList_SET_ITEM and append causes about a |
|
320 2-3% slowdown for that common case. A smarter implementation |
|
321 could move the if check out, so the SET_ITEMs are done first |
|
322 and the appends only done when the prealloc buffer is full. |
|
323 That's too much work for little gain.*/ |
|
324 |
|
325 list = PyList_New(0); |
|
326 if (!list) |
|
327 goto onError; |
|
328 |
|
329 for (i = j = 0; i < len; ) { |
|
330 Py_ssize_t eol; |
|
331 |
|
332 /* Find a line and append it */ |
|
333 while (i < len && data[i] != '\n' && data[i] != '\r') |
|
334 i++; |
|
335 |
|
336 /* Skip the line break reading CRLF as one line break */ |
|
337 eol = i; |
|
338 if (i < len) { |
|
339 if (data[i] == '\r' && i + 1 < len && |
|
340 data[i+1] == '\n') |
|
341 i += 2; |
|
342 else |
|
343 i++; |
|
344 if (keepends) |
|
345 eol = i; |
|
346 } |
|
347 _STRINGLIB_SPLIT_APPEND(data, j, eol); |
|
348 j = i; |
|
349 } |
|
350 if (j < len) { |
|
351 _STRINGLIB_SPLIT_APPEND(data, j, len); |
|
352 } |
|
353 |
|
354 return list; |
|
355 |
|
356 onError: |
|
357 Py_XDECREF(list); |
|
358 return NULL; |
|
359 } |
|
360 |
|
361 #undef _STRINGLIB_SPLIT_APPEND |
|
362 |