|
1 { |
|
2 pcRegExp - Perl compatible regular expressions for Virtual Pascal |
|
3 (c) 2001 Peter S. Voronov aka Chem O'Dun <petervrn@yahoo.com> |
|
4 |
|
5 Based on PCRE library interface unit for Virtual Pascal. |
|
6 (c) 2001 Alexander Tokarev <dwalin@dwalin.ru> |
|
7 |
|
8 The current PCRE version is: 3.7 |
|
9 |
|
10 This software must be distributed as Freeware. |
|
11 |
|
12 The PCRE library is written by: Philip Hazel <ph10@cam.ac.uk> |
|
13 Copyright (c) 1997-2004 University of Cambridge |
|
14 |
|
15 AngelsHolocaust 4-11-04 updated to use version v5.0 |
|
16 (INFO: this is regex-directed, NFA) |
|
17 AH: 9-11-04 - pcre_free: removed var, pcre already gives the ptr, now |
|
18 everything works as it should (no more crashes) |
|
19 -> removed CheckRegExp because pcre handles errors perfectly |
|
20 10-11-04 - added pcError (errorhandling), pcInit |
|
21 13-11-04 - removed the ErrorPos = 0 check -> always print erroroffset |
|
22 17-10-05 - support for \1-\9 backreferences in TpcRegExp.GetReplStr |
|
23 17-02-06 - added RunTimeOptions: caller can set options while searching |
|
24 19-02-06 - added SearchOfs(): let PCRE use the complete string and offset |
|
25 into the string itself |
|
26 20-12-06 - support for version 7.0 |
|
27 27.08.08 - support for v7.7 |
|
28 } |
|
29 |
|
30 {$H+} {$DEFINE PCRE_3_7} {$DEFINE PCRE_5_0} {$DEFINE PCRE_7_0} {$DEFINE PCRE_7_7} |
|
31 |
|
32 Unit pcregexp; |
|
33 |
|
34 Interface |
|
35 |
|
36 uses objects; |
|
37 |
|
38 Type |
|
39 PpcRegExp = ^TpcRegExp; |
|
40 // TpcRegExp = object |
|
41 TpcRegExp = object(TObject) |
|
42 MatchesCount: integer; |
|
43 RegExpC, RegExpExt : Pointer; |
|
44 Matches:Pointer; |
|
45 RegExp: shortstring; |
|
46 SourceLen: integer; |
|
47 PartialMatch : boolean; |
|
48 Error : boolean; |
|
49 ErrorMsg : Pchar; |
|
50 ErrorPos : integer; |
|
51 RunTimeOptions: Integer; // options which can be set by the caller |
|
52 constructor Init(const ARegExp : shortstring; AOptions : integer; ALocale : Pointer); |
|
53 function Search(AStr: Pchar; ALen : longint) : boolean; virtual; |
|
54 function SearchNext( AStr: Pchar; ALen : longint) : boolean; virtual; |
|
55 function SearchOfs ( AStr: Pchar; ALen, AOfs : longint) : boolean; virtual; |
|
56 function MatchSub(ANom: integer; var Pos, Len : longint) : boolean; virtual; |
|
57 function MatchFull(var Pos, Len : longint) : boolean; virtual; |
|
58 function GetSubStr(ANom: integer; AStr: Pchar) : string; virtual; |
|
59 function GetFullStr(AStr: Pchar) : string; virtual; |
|
60 function GetReplStr(AStr: Pchar; const ARepl: string) : string; virtual; |
|
61 function GetPreSubStr(AStr: Pchar) : string; virtual; |
|
62 function GetPostSubStr(AStr: Pchar) : string; virtual; |
|
63 function ErrorStr : string; virtual; |
|
64 destructor Done; virtual; |
|
65 end; |
|
66 |
|
67 function pcGrepMatch(WildCard, aStr: string; AOptions:integer; ALocale : Pointer): Boolean; |
|
68 function pcGrepSub(WildCard, aStr, aRepl: string; AOptions:integer; ALocale : Pointer): string; |
|
69 |
|
70 function pcFastGrepMatch(WildCard, aStr: string): Boolean; |
|
71 function pcFastGrepSub(WildCard, aStr, aRepl: string): string; |
|
72 |
|
73 {$IFDEF PCRE_5_0} |
|
74 function pcGetVersion : pchar; |
|
75 {$ENDIF} |
|
76 |
|
77 function pcError (var pRegExp : Pointer) : Boolean; |
|
78 function pcInit (const Pattern: Shortstring; CaseSens: Boolean) : Pointer; |
|
79 |
|
80 Const { Options } |
|
81 PCRE_CASELESS = $0001; |
|
82 PCRE_MULTILINE = $0002; |
|
83 PCRE_DOTALL = $0004; |
|
84 PCRE_EXTENDED = $0008; |
|
85 PCRE_ANCHORED = $0010; |
|
86 PCRE_DOLLAR_ENDONLY = $0020; |
|
87 PCRE_EXTRA = $0040; |
|
88 PCRE_NOTBOL = $0080; |
|
89 PCRE_NOTEOL = $0100; |
|
90 PCRE_UNGREEDY = $0200; |
|
91 PCRE_NOTEMPTY = $0400; |
|
92 {$IFDEF PCRE_5_0} |
|
93 PCRE_UTF8 = $0800; |
|
94 PCRE_NO_AUTO_CAPTURE = $1000; |
|
95 PCRE_NO_UTF8_CHECK = $2000; |
|
96 PCRE_AUTO_CALLOUT = $4000; |
|
97 PCRE_PARTIAL = $8000; |
|
98 {$ENDIF} |
|
99 {$IFDEF PCRE_7_0} |
|
100 PCRE_DFA_SHORTEST = $00010000; |
|
101 PCRE_DFA_RESTART = $00020000; |
|
102 PCRE_FIRSTLINE = $00040000; |
|
103 PCRE_DUPNAMES = $00080000; |
|
104 PCRE_NEWLINE_CR = $00100000; |
|
105 PCRE_NEWLINE_LF = $00200000; |
|
106 PCRE_NEWLINE_CRLF = $00300000; |
|
107 PCRE_NEWLINE_ANY = $00400000; |
|
108 PCRE_NEWLINE_ANYCRLF = $00500000; |
|
109 |
|
110 PCRE_NEWLINE_BITS = PCRE_NEWLINE_CR or PCRE_NEWLINE_LF or PCRE_NEWLINE_ANY; |
|
111 |
|
112 {$ENDIF} |
|
113 {$IFDEF PCRE_7_7} |
|
114 PCRE_BSR_ANYCRLF = $00800000; |
|
115 PCRE_BSR_UNICODE = $01000000; |
|
116 PCRE_JAVASCRIPT_COMPAT= $02000000; |
|
117 {$ENDIF} |
|
118 |
|
119 PCRE_COMPILE_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_AUTO_CALLOUT + PCRE_CASELESS + |
|
120 PCRE_DOLLAR_ENDONLY + PCRE_DOTALL + PCRE_EXTENDED + |
|
121 PCRE_EXTRA + PCRE_MULTILINE + PCRE_NO_AUTO_CAPTURE + |
|
122 PCRE_UNGREEDY + PCRE_UTF8 + PCRE_NO_UTF8_CHECK |
|
123 {$IFDEF PCRE_7_0} |
|
124 + PCRE_DUPNAMES + PCRE_FIRSTLINE + PCRE_NEWLINE_BITS |
|
125 {$ENDIF} |
|
126 {$IFDEF PCRE_7_7} |
|
127 + PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE + PCRE_JAVASCRIPT_COMPAT |
|
128 {$ENDIF} |
|
129 ; |
|
130 |
|
131 PCRE_EXEC_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_NOTBOL + PCRE_NOTEOL + |
|
132 PCRE_NOTEMPTY + PCRE_NO_UTF8_CHECK + PCRE_PARTIAL |
|
133 {$IFDEF PCRE_7_0} |
|
134 + PCRE_NEWLINE_BITS |
|
135 {$ENDIF} |
|
136 {$IFDEF PCRE_7_7} |
|
137 + PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE |
|
138 {$ENDIF} |
|
139 ; |
|
140 |
|
141 {$IFDEF PCRE_7_0} |
|
142 PCRE_DFA_EXEC_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_NOTBOL + PCRE_NOTEOL + |
|
143 PCRE_NOTEMPTY + PCRE_NO_UTF8_CHECK + PCRE_PARTIAL + |
|
144 PCRE_DFA_SHORTEST + PCRE_DFA_RESTART + |
|
145 PCRE_NEWLINE_BITS |
|
146 {$IFDEF PCRE_7_7} |
|
147 + PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE |
|
148 {$ENDIF} |
|
149 ; |
|
150 {$ENDIF} |
|
151 |
|
152 { Exec-time and get/set-time error codes } |
|
153 PCRE_ERROR_NOMATCH = -1; |
|
154 PCRE_ERROR_NULL = -2; |
|
155 PCRE_ERROR_BADOPTION = -3; |
|
156 PCRE_ERROR_BADMAGIC = -4; |
|
157 PCRE_ERROR_UNKNOWN_MODE = -5; |
|
158 PCRE_ERROR_NOMEMORY = -6; |
|
159 PCRE_ERROR_NOSUBSTRING = -7; |
|
160 {$IFDEF PCRE_5_0} |
|
161 PCRE_ERROR_MATCHLIMIT = -8; |
|
162 PCRE_ERROR_CALLOUT = -9; { Never used by PCRE itself } |
|
163 PCRE_ERROR_BADUTF8 = -10; |
|
164 PCRE_ERROR_BADUTF8_OFFSET = -11; |
|
165 PCRE_ERROR_PARTIAL = -12; |
|
166 PCRE_ERROR_BADPARTIAL = -13; |
|
167 PCRE_ERROR_INTERNAL = -14; |
|
168 PCRE_ERROR_BADCOUNT = -15; |
|
169 {$ENDIF} |
|
170 {$IFDEF PCRE_7_0} |
|
171 PCRE_ERROR_DFA_UITEM = -16; |
|
172 PCRE_ERROR_DFA_UCOND = -17; |
|
173 PCRE_ERROR_DFA_UMLIMIT = -18; |
|
174 PCRE_ERROR_DFA_WSSIZE = -19; |
|
175 PCRE_ERROR_DFA_RECURSE = -20; |
|
176 PCRE_ERROR_RECURSIONLIMIT = -21; |
|
177 PCRE_ERROR_NULLWSLIMIT = -22; |
|
178 PCRE_ERROR_BADNEWLINE = -23; |
|
179 {$ENDIF} |
|
180 |
|
181 { Request types for pcre_fullinfo() } |
|
182 |
|
183 PCRE_INFO_OPTIONS = 0; |
|
184 PCRE_INFO_SIZE = 1; |
|
185 PCRE_INFO_CAPTURECOUNT = 2; |
|
186 PCRE_INFO_BACKREFMAX = 3; |
|
187 PCRE_INFO_FIRSTBYTE = 4; |
|
188 PCRE_INFO_FIRSTCHAR = 4; { For backwards compatibility } |
|
189 PCRE_INFO_FIRSTTABLE = 5; |
|
190 {$IFDEF PCRE_5_0} |
|
191 PCRE_INFO_LASTLITERAL = 6; |
|
192 PCRE_INFO_NAMEENTRYSIZE = 7; |
|
193 PCRE_INFO_NAMECOUNT = 8; |
|
194 PCRE_INFO_NAMETABLE = 9; |
|
195 PCRE_INFO_STUDYSIZE = 10; |
|
196 PCRE_INFO_DEFAULT_TABLES = 11; |
|
197 {$ENDIF PCRE_5_0} |
|
198 {$IFDEF PCRE_7_7} |
|
199 PCRE_INFO_OKPARTIAL = 12; |
|
200 PCRE_INFO_JCHANGED = 13; |
|
201 PCRE_INFO_HASCRORLF = 14; |
|
202 {$ENDIF} |
|
203 |
|
204 { Request types for pcre_config() } |
|
205 {$IFDEF PCRE_5_0} |
|
206 PCRE_CONFIG_UTF8 = 0; |
|
207 PCRE_CONFIG_NEWLINE = 1; |
|
208 PCRE_CONFIG_LINK_SIZE = 2; |
|
209 PCRE_CONFIG_POSIX_MALLOC_THRESHOLD = 3; |
|
210 PCRE_CONFIG_MATCH_LIMIT = 4; |
|
211 PCRE_CONFIG_STACKRECURSE = 5; |
|
212 PCRE_CONFIG_UNICODE_PROPERTIES = 6; |
|
213 {$ENDIF PCRE_5_0} |
|
214 {$IFDEF PCRE_7_0} |
|
215 PCRE_CONFIG_MATCH_LIMIT_RECURSION = 7; |
|
216 {$ENDIF} |
|
217 {$IFDEF PCRE_7_7} |
|
218 PCRE_CONFIG_BSR = 8; |
|
219 {$ENDIF} |
|
220 |
|
221 { Bit flags for the pcre_extra structure } |
|
222 {$IFDEF PCRE_5_0} |
|
223 PCRE_EXTRA_STUDY_DATA = $0001; |
|
224 PCRE_EXTRA_MATCH_LIMIT = $0002; |
|
225 PCRE_EXTRA_CALLOUT_DATA = $0004; |
|
226 PCRE_EXTRA_TABLES = $0008; |
|
227 {$ENDIF PCRE_5_0} |
|
228 {$IFDEF PCRE_7_0} |
|
229 PCRE_EXTRA_MATCH_LIMIT_RECURSION = $0010; |
|
230 {$ENDIF} |
|
231 |
|
232 Const |
|
233 // DefaultOptions : integer = 0; |
|
234 DefaultLocaleTable : pointer = nil; |
|
235 |
|
236 {$IFDEF PCRE_5_0} |
|
237 { The structure for passing additional data to pcre_exec(). This is defined in |
|
238 such as way as to be extensible. Always add new fields at the end, in order to |
|
239 remain compatible. } |
|
240 |
|
241 type ppcre_extra = ^tpcre_extra; |
|
242 tpcre_extra = record |
|
243 flags : longint; { Bits for which fields are set } |
|
244 study_data : pointer; { Opaque data from pcre_study() } |
|
245 match_limit : longint; { Maximum number of calls to match() } |
|
246 callout_data : pointer; { Data passed back in callouts } |
|
247 tables : pointer; { Pointer to character tables } |
|
248 match_limit_recursion: longint; { Max recursive calls to match() } |
|
249 end; |
|
250 |
|
251 type ppcre_callout_block = ^pcre_callout_block; |
|
252 pcre_callout_block = record |
|
253 version, |
|
254 (* ------------------------ Version 0 ------------------------------- *) |
|
255 callout_number : integer; |
|
256 offset_vector : pointer; |
|
257 subject : pchar; |
|
258 subject_length, start_match, current_position, capture_top, |
|
259 capture_last : integer; |
|
260 callout_data : pointer; |
|
261 (* ------------------- Added for Version 1 -------------------------- *) |
|
262 pattern_position, next_item_length : integer; |
|
263 end; |
|
264 {$ENDIF PCRE_5_0} |
|
265 |
|
266 {$OrgName+} |
|
267 {$IFDEF VIRTUALPASCAL} {&Cdecl+} {$ENDIF VIRTUALPASCAL} |
|
268 |
|
269 { local replacement of external pcre memory management functions } |
|
270 function pcre_malloc( size : integer ) : pointer; |
|
271 procedure pcre_free( {var} p : pointer ); |
|
272 {$IFDEF PCRE_5_0} |
|
273 const pcre_stack_malloc: function ( size : integer ): pointer = pcre_malloc; |
|
274 pcre_stack_free: procedure ( {var} p : pointer ) = pcre_free; |
|
275 function pcre_callout(var p : ppcre_callout_block) : integer; |
|
276 {$ENDIF PCRE_5_0} |
|
277 {$IFDEF VIRTUALPASCAL} {&Cdecl-} {$ENDIF VIRTUALPASCAL} |
|
278 |
|
279 Implementation |
|
280 |
|
281 Uses strings, collect, messages, dnapp, commands, advance0, stringsx |
|
282 {$IFDEF VIRTUALPASCAL} ,vpsyslow {$ENDIF VIRTUALPASCAL}; |
|
283 |
|
284 Const |
|
285 MAGIC_NUMBER = $50435245; { 'PCRE' } |
|
286 MAX_MATCHES = 90; { changed in 3.5 version; should be divisible by 3, was 64} |
|
287 |
|
288 Type |
|
289 PMatchArray = ^TMatchArray; |
|
290 TMatchArray = array[0..( MAX_MATCHES * 3 )] of integer; |
|
291 |
|
292 PRegExpCollection = ^TRegExpCollection; |
|
293 TRegExpCollection = object(TSortedCollection) |
|
294 MaxRegExp : integer; |
|
295 SearchRegExp : shortstring; |
|
296 CompareModeInsert : boolean; |
|
297 constructor Init(AMaxRegExp:integer); |
|
298 procedure FreeItem(P: Pointer); virtual; |
|
299 function Compare(P1, P2: Pointer): Integer; virtual; |
|
300 function Find(ARegExp:shortstring;var P: PpcRegExp):boolean; virtual; |
|
301 function CheckNew(ARegExp:shortstring):PpcRegExp;virtual; |
|
302 end; |
|
303 |
|
304 Var |
|
305 PRegExpCache : PRegExpCollection; |
|
306 |
|
307 |
|
308 {$IFDEF VIRTUALPASCAL} {&Cdecl+} {$ENDIF VIRTUALPASCAL} |
|
309 |
|
310 { imported original pcre functions } |
|
311 |
|
312 function pcre_compile( const pattern : PChar; options : integer; |
|
313 var errorptr : PChar; var erroroffset : integer; |
|
314 const tables : PChar ) : pointer {pcre}; external; |
|
315 {$IFDEF PCRE_7_0} |
|
316 function pcre_compile2( const pattern : PChar; options : integer; |
|
317 var errorcodeptr : Integer; |
|
318 var errorptr : PChar; var erroroffset : integer; |
|
319 const tables : PChar ) : pointer {pcre}; external; |
|
320 {$ENDIF} |
|
321 {$IFDEF PCRE_5_0} |
|
322 function pcre_config( what : integer; where : pointer) : integer; external; |
|
323 function pcre_copy_named_substring( const code : pointer {pcre}; |
|
324 const subject : pchar; |
|
325 var ovector : integer; |
|
326 stringcount : integer; |
|
327 const stringname : pchar; |
|
328 var buffer : pchar; |
|
329 size : integer) : integer; external; |
|
330 function pcre_copy_substring( const subject : pchar; var ovector : integer; |
|
331 stringcount, stringnumber : integer; |
|
332 var buffer : pchar; size : integer ) |
|
333 : integer; external; |
|
334 function pcre_exec( const argument_re : pointer {pcre}; |
|
335 const extra_data : pointer {pcre_extra}; |
|
336 {$ELSE} |
|
337 function pcre_exec( const external_re : pointer; |
|
338 const external_extra : pointer; |
|
339 {$ENDIF} |
|
340 const subject : PChar; |
|
341 length, start_offset, options : integer; |
|
342 offsets : pointer; |
|
343 offsetcount : integer ) : integer; external; |
|
344 {$IFDEF PCRE_7_0} |
|
345 function pcre_dfa_exec( const argument_re : pointer {pcre}; |
|
346 const extra_data : pointer {pcre_extra}; |
|
347 const subject : pchar; |
|
348 length, start_offset, options : integer; |
|
349 offsets : pointer; |
|
350 offsetcount : integer; |
|
351 workspace : pointer; |
|
352 wscount : integer ) : integer; external; |
|
353 {$ENDIF} |
|
354 {$IFDEF PCRE_5_0} |
|
355 procedure pcre_free_substring( const p : pchar ); external; |
|
356 procedure pcre_free_substring_list( var p : pchar ); external; |
|
357 function pcre_fullinfo( const argument_re : pointer {pcre}; |
|
358 const extra_data : pointer {pcre_extra}; |
|
359 what : integer; |
|
360 where : pointer ) : integer; external; |
|
361 function pcre_get_named_substring( const code : pointer {pcre}; |
|
362 const subject : pchar; |
|
363 var ovector : integer; |
|
364 stringcount : integer; |
|
365 const stringname : pchar; |
|
366 var stringptr : pchar ) : integer; external; |
|
367 function pcre_get_stringnumber( const code : pointer {pcre}; |
|
368 const stringname : pchar ) : integer; external; |
|
369 function pcre_get_stringtable_entries( const code : pointer {pcre}; |
|
370 const stringname : pchar; |
|
371 var firstptr, |
|
372 lastptr : pchar ) : integer; external; |
|
373 function pcre_get_substring( const subject : pchar; var ovector : integer; |
|
374 stringcount, stringnumber : integer; |
|
375 var stringptr : pchar ) : integer; external; |
|
376 function pcre_get_substring_list( const subject : pchar; var ovector : integer; |
|
377 stringcount : integer; |
|
378 listptr : pointer {const char ***listptr}) : integer; external; |
|
379 function pcre_info( const argument_re : pointer {pcre}; |
|
380 var optptr : integer; |
|
381 var first_byte : integer ) : integer; external; |
|
382 function pcre_maketables : pchar; external; |
|
383 {$ENDIF} |
|
384 {$IFDEF PCRE_7_0} |
|
385 function pcre_refcount( const argument_re : pointer {pcre}; |
|
386 adjust : integer ) : pchar; external; |
|
387 {$ENDIF} |
|
388 function pcre_study( const external_re : pointer {pcre}; |
|
389 options : integer; |
|
390 var errorptr : PChar ) : pointer {pcre_extra}; external; |
|
391 {$IFDEF PCRE_5_0} |
|
392 function pcre_version : pchar; external; |
|
393 {$ENDIF} |
|
394 |
|
395 function pcre_malloc( size : integer ) : pointer; |
|
396 begin |
|
397 GetMem( result, size ); |
|
398 end; |
|
399 |
|
400 procedure pcre_free( {var} p : pointer ); |
|
401 begin |
|
402 if (p <> nil) then |
|
403 FreeMem( p, 0 ); |
|
404 {@p := nil;} |
|
405 end; |
|
406 |
|
407 {$IFDEF PCRE_5_0} |
|
408 (* Called from PCRE as a result of the (?C) item. We print out where we are in |
|
409 the match. Yield zero unless more callouts than the fail count, or the callout |
|
410 data is not zero. *) |
|
411 |
|
412 function pcre_callout; |
|
413 begin |
|
414 end; |
|
415 {$ENDIF} |
|
416 |
|
417 {$IFDEF VIRTUALPASCAL} {&Cdecl-} {$ENDIF VIRTUALPASCAL} |
|
418 |
|
419 // Always include the newest version of the library |
|
420 {$IFDEF PCRE_7_7} |
|
421 {$L pcre77.lib} |
|
422 {$ELSE} |
|
423 {$IFDEF PCRE_7_0} |
|
424 {$L pcre70.lib} |
|
425 {$ELSE} |
|
426 {$IFDEF PCRE_5_0} |
|
427 {$L pcre50.lib} |
|
428 {$ELSE} |
|
429 {$IFDEF PCRE_3_7} |
|
430 {$L pcre37.lib} |
|
431 {$ENDIF PCRE_3_7} |
|
432 {$ENDIF PCRE_5_0} |
|
433 {$ENDIF PCRE_7_0} |
|
434 {$ENDIF PCRE_7_7} |
|
435 |
|
436 {TpcRegExp} |
|
437 |
|
438 constructor TpcRegExp.Init(const ARegExp:shortstring; AOptions:integer; ALocale : Pointer); |
|
439 var |
|
440 pRegExp : PChar; |
|
441 begin |
|
442 RegExp:=ARegExp; |
|
443 RegExpC:=nil; |
|
444 RegExpExt:=nil; |
|
445 Matches:=nil; |
|
446 MatchesCount:=0; |
|
447 Error:=true; |
|
448 ErrorMsg:=nil; |
|
449 ErrorPos:=0; |
|
450 RunTimeOptions := 0; |
|
451 if length(RegExp) < 255 then |
|
452 begin |
|
453 RegExp[length(RegExp)+1]:=#0; |
|
454 pRegExp:=@RegExp[1]; |
|
455 end |
|
456 else |
|
457 begin |
|
458 GetMem(pRegExp,length(RegExp)+1); |
|
459 pRegExp:=strpcopy(pRegExp,RegExp); |
|
460 end; |
|
461 RegExpC := pcre_compile( pRegExp, |
|
462 AOptions and PCRE_COMPILE_ALLOWED_OPTIONS, |
|
463 ErrorMsg, ErrorPos, ALocale); |
|
464 if length(RegExp) = 255 then |
|
465 StrDispose(pRegExp); |
|
466 if RegExpC = nil then |
|
467 exit; |
|
468 ErrorMsg:=nil; |
|
469 RegExpExt := pcre_study( RegExpC, 0, ErrorMsg ); |
|
470 if (RegExpExt = nil) and (ErrorMsg <> nil) then |
|
471 begin |
|
472 pcre_free(RegExpC); |
|
473 exit; |
|
474 end; |
|
475 GetMem(Matches,SizeOf(TMatchArray)); |
|
476 Error:=false; |
|
477 end; |
|
478 |
|
479 destructor TpcRegExp.Done; |
|
480 begin |
|
481 if RegExpC <> nil then |
|
482 pcre_free(RegExpC); |
|
483 if RegExpExt <> nil then |
|
484 pcre_free(RegExpExt); |
|
485 if Matches <> nil then |
|
486 FreeMem(Matches,SizeOf(TMatchArray)); |
|
487 end; |
|
488 |
|
489 function TpcRegExp.SearchNext( AStr: Pchar; ALen : longint ) : boolean; |
|
490 var Options: Integer; |
|
491 begin // must handle PCRE_ERROR_PARTIAL here |
|
492 Options := (RunTimeOptions or startup.MiscMultiData.cfgRegEx.DefaultOptions) and |
|
493 PCRE_EXEC_ALLOWED_OPTIONS; |
|
494 if MatchesCount > 0 then |
|
495 MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, PMatchArray(Matches)^[1], |
|
496 Options, Matches, MAX_MATCHES ) else |
|
497 MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, 0, |
|
498 Options, Matches, MAX_MATCHES ); |
|
499 { if MatchesCount = 0 then |
|
500 MatchesCount := MatchesCount div 3;} |
|
501 PartialMatch := MatchesCount = PCRE_ERROR_PARTIAL; |
|
502 SearchNext := MatchesCount > 0; |
|
503 end; |
|
504 |
|
505 function TpcRegExp.Search( AStr: Pchar; ALen : longint):boolean; |
|
506 begin |
|
507 MatchesCount:=0; |
|
508 Search:=SearchNext(AStr,ALen); |
|
509 SourceLen:=ALen; |
|
510 end; |
|
511 |
|
512 function TpcRegExp.SearchOfs( AStr: Pchar; ALen, AOfs: longint ) : boolean; |
|
513 var Options: Integer; |
|
514 begin |
|
515 MatchesCount:=0; |
|
516 Options := (RunTimeOptions or startup.MiscMultiData.cfgRegEx.DefaultOptions) and |
|
517 PCRE_EXEC_ALLOWED_OPTIONS; |
|
518 MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, AOfs, |
|
519 Options, Matches, MAX_MATCHES ); |
|
520 PartialMatch := MatchesCount = PCRE_ERROR_PARTIAL; |
|
521 SearchOfs := MatchesCount > 0; |
|
522 SourceLen := ALen-AOfs; |
|
523 end; |
|
524 |
|
525 function TpcRegExp.MatchSub(ANom:integer; var Pos,Len:longint):boolean; |
|
526 begin |
|
527 if (MatchesCount > 0) and (ANom <= (MatchesCount-1)) then |
|
528 begin |
|
529 ANom:=ANom*2; |
|
530 Pos:=PMatchArray(Matches)^[ANom]; |
|
531 Len:=PMatchArray(Matches)^[ANom+1]-Pos; |
|
532 MatchSub:=true; |
|
533 end |
|
534 else |
|
535 MatchSub:=false; |
|
536 end; |
|
537 |
|
538 function TpcRegExp.MatchFull(var Pos,Len:longint):boolean; |
|
539 begin |
|
540 MatchFull:=MatchSub(0,Pos,Len); |
|
541 end; |
|
542 |
|
543 function TpcRegExp.GetSubStr(ANom: integer; AStr: Pchar):string; |
|
544 var |
|
545 s: ansistring; |
|
546 pos,len: longint; |
|
547 begin |
|
548 s:=''; |
|
549 if MatchSub(ANom, pos, len) then |
|
550 begin |
|
551 setlength(s, len); |
|
552 Move(AStr[pos], s[1], len); |
|
553 end; |
|
554 GetSubStr:=s; |
|
555 end; |
|
556 |
|
557 function TpcRegExp.GetPreSubStr(AStr: Pchar):string; |
|
558 var |
|
559 s: ansistring; |
|
560 l: longint; |
|
561 begin |
|
562 s:=''; |
|
563 if (MatchesCount > 0) then |
|
564 begin |
|
565 l:=PMatchArray(Matches)^[0]-1; |
|
566 if l > 0 then |
|
567 begin |
|
568 setlength(s,l); |
|
569 Move(AStr[1],s[1],l); |
|
570 end; |
|
571 end; |
|
572 GetPreSubStr:=s; |
|
573 end; |
|
574 |
|
575 function TpcRegExp.GetPostSubStr(AStr: Pchar):string; |
|
576 var |
|
577 s: ansistring; |
|
578 l: longint; |
|
579 ANom: integer; |
|
580 begin |
|
581 s:=''; |
|
582 if (MatchesCount > 0) then |
|
583 begin |
|
584 ANom:=(MatchesCount-1){*2} shl 1; |
|
585 l:=SourceLen-PMatchArray(Matches)^[ANom+1]+1; |
|
586 if l > 0 then |
|
587 begin |
|
588 setlength(s,l); |
|
589 Move(AStr[PMatchArray(Matches)^[ANom+1]],s[1],l); |
|
590 end; |
|
591 end; |
|
592 GetPostSubStr:=s; |
|
593 end; |
|
594 |
|
595 |
|
596 function TpcRegExp.GetFullStr(AStr: Pchar):string; |
|
597 var |
|
598 s: ansistring; |
|
599 l: longint; |
|
600 begin |
|
601 GetFullStr:=GetSubStr(0,AStr); |
|
602 end; |
|
603 |
|
604 function TpcRegExp.GetReplStr(AStr: Pchar; const ARepl: string):string; |
|
605 var |
|
606 s: ansistring; |
|
607 l,i,lasti: longint; |
|
608 begin |
|
609 l:=length(ARepl); |
|
610 i:=1; |
|
611 lasti:=1; |
|
612 s:=''; |
|
613 while i <= l do |
|
614 begin |
|
615 case ARepl[i] of |
|
616 '\' : |
|
617 begin |
|
618 if i < l then |
|
619 begin |
|
620 s:=s+copy(ARepl,lasti,i-lasti){+ARepl[i+1]}; |
|
621 {AH 17-10-05 support for POSIX \1-\9 backreferences} |
|
622 case ARepl[i+1] of |
|
623 '0' : s:=s+GetFullStr(AStr); |
|
624 '1'..'9' : s:=s+GetSubStr(ord(ARepl[i+1])-ord('0'),AStr); |
|
625 else s:=s+ARepl[i+1]; // copy the escaped character |
|
626 end; |
|
627 end; |
|
628 inc(i); |
|
629 lasti:=i+1; |
|
630 end; |
|
631 '$' : |
|
632 begin |
|
633 if i < l then |
|
634 begin |
|
635 s:=s+copy(ARepl,lasti,i-lasti); |
|
636 case ARepl[i+1] of |
|
637 '&' : s:=s+GetFullStr(AStr); |
|
638 '1'..'9' : s:=s+GetSubStr(ord(ARepl[i+1])-ord('0'),AStr); |
|
639 '`' : s:=s+GetPreSubStr(AStr); |
|
640 #39 : s:=s+GetPostSubStr(AStr); |
|
641 end; |
|
642 end; |
|
643 inc(i); |
|
644 lasti:=i+1; |
|
645 end; |
|
646 end; |
|
647 inc(i); |
|
648 end; |
|
649 if lasti <= {AH 25-10-2004 added =, else l==1 won't work} l then |
|
650 s:=s+copy(ARepl,lasti,l-lasti+1); |
|
651 GetReplStr:=s; |
|
652 end; |
|
653 |
|
654 function TpcRegExp.ErrorStr:string; |
|
655 begin |
|
656 ErrorStr:=StrPas(ErrorMsg); |
|
657 end; |
|
658 |
|
659 {TRegExpCollection} |
|
660 |
|
661 constructor TRegExpCollection.Init(AMaxRegExp: integer); |
|
662 begin |
|
663 Inherited Init(1,1); |
|
664 MaxRegExp:=AMaxRegExp; |
|
665 CompareModeInsert:=true; |
|
666 end; |
|
667 |
|
668 procedure TRegExpCollection.FreeItem(P: Pointer); |
|
669 begin |
|
670 if P <> nil then |
|
671 begin |
|
672 Dispose(PpcRegExp(P),Done); |
|
673 end; |
|
674 end; |
|
675 |
|
676 function TRegExpCollection.Compare(P1, P2: Pointer): Integer; |
|
677 //var |
|
678 // l,l1,l2,i : byte; |
|
679 //// wPos: pchar; |
|
680 begin |
|
681 if CompareModeInsert then |
|
682 begin |
|
683 // l1:=length(PpcRegExp(P1)^.RegExp); |
|
684 // l2:=length(PpcRegExp(P2)^.RegExp); |
|
685 // if l1 > l2 then l:=l2 else |
|
686 // l:=l1; |
|
687 // for i:=1 to l do |
|
688 // if PpcRegExp(P1).RegExp[i] <> PpcRegExp(P2).RegExp[i] then break; |
|
689 // if i <=l then |
|
690 // Compare:=ord(PpcRegExp(P1).RegExp[i])-ord(PpcRegExp(P2).RegExp[i]) else |
|
691 // Compare:=l1-l2; |
|
692 Compare := stringsx.PasStrCmp(PpcRegExp(P1).RegExp, PpcRegExp(P2).RegExp, False); |
|
693 end |
|
694 else |
|
695 begin |
|
696 // l1:=length(PpcRegExp(P1)^.RegExp); |
|
697 // l2:=length(SearchRegExp); |
|
698 // if l1 > l2 then l:=l2 else |
|
699 // l:=l1; |
|
700 // for i:=1 to l do |
|
701 // if PpcRegExp(P1).RegExp[i] <> SearchRegExp[i] then |
|
702 // begin |
|
703 // Compare:=ord(PpcRegExp(P1).RegExp[i])-ord(SearchRegExp[i]); |
|
704 // break; |
|
705 // end; |
|
706 // if i > l then Compare:=l1-l2; |
|
707 Compare := stringsx.PasStrCmp(PpcRegExp(P1).RegExp, SearchRegExp, False); |
|
708 end; |
|
709 end; |
|
710 |
|
711 function TRegExpCollection.Find(ARegExp:shortstring;var P: PpcRegExp):boolean; |
|
712 var I : integer; |
|
713 begin |
|
714 CompareModeInsert:=false; |
|
715 SearchRegExp:=ARegExp; |
|
716 if Search(nil,I) then |
|
717 begin |
|
718 P:=PpcRegExp(At(I)); |
|
719 Find:=true; |
|
720 end |
|
721 else |
|
722 begin |
|
723 P:=nil; |
|
724 Find:=false; |
|
725 end; |
|
726 CompareModeInsert:=true; |
|
727 end; |
|
728 |
|
729 function TRegExpCollection.CheckNew(ARegExp:shortstring):PpcRegExp; |
|
730 var |
|
731 P : PpcRegExp; |
|
732 begin |
|
733 if not Find(ARegExp,P) then |
|
734 begin |
|
735 if Count = MaxRegExp then |
|
736 AtFree(0); |
|
737 P:=New(ppcRegExp,Init(ARegExp,PCRE_CASELESS,nil)); |
|
738 Insert(P); |
|
739 end; |
|
740 CheckNew:=P; |
|
741 end; |
|
742 |
|
743 function pcGrepMatch(WildCard, aStr: string; AOptions:integer; ALocale : Pointer): Boolean; |
|
744 var |
|
745 PpcRE:PpcRegExp; |
|
746 begin |
|
747 PpcRE:=New(ppcRegExp,Init(WildCard,AOptions,Alocale)); |
|
748 pcGrepMatch:=PpcRE^.Search(pchar(AStr),Length(AStr)); |
|
749 Dispose(PpcRE,Done); |
|
750 end; |
|
751 |
|
752 function pcGrepSub(WildCard, aStr, aRepl: string; AOptions:integer; ALocale : Pointer): string; |
|
753 var |
|
754 PpcRE:PpcRegExp; |
|
755 begin |
|
756 PpcRE:=New(ppcRegExp,Init(WildCard,AOptions,Alocale)); |
|
757 if PpcRE^.Search(pchar(AStr),Length(AStr)) then |
|
758 pcGrepSub:=PpcRE^.GetReplStr(pchar(AStr),ARepl) |
|
759 else |
|
760 pcGrepSub:=''; |
|
761 Dispose(PpcRE,Done); |
|
762 end; |
|
763 |
|
764 function pcFastGrepMatch(WildCard, aStr: string): Boolean; |
|
765 var |
|
766 PpcRE:PpcRegExp; |
|
767 begin |
|
768 PpcRE:=PRegExpCache^.CheckNew(WildCard); |
|
769 pcFastGrepMatch:=PpcRE^.Search(pchar(AStr),Length(AStr)); |
|
770 end; |
|
771 |
|
772 function pcFastGrepSub(WildCard, aStr, aRepl: string): string; |
|
773 var |
|
774 PpcRE:PpcRegExp; |
|
775 begin |
|
776 PpcRE:=PRegExpCache^.CheckNew(WildCard); |
|
777 if PpcRE^.Search(pchar(AStr),Length(AStr)) then |
|
778 pcFastGrepSub:=PpcRE^.GetReplStr(pchar(AStr),ARepl) |
|
779 else |
|
780 pcFastGrepSub:=''; |
|
781 end; |
|
782 |
|
783 {$IFDEF PCRE_5_0} |
|
784 function pcGetVersion : pchar; assembler; {$FRAME-}{$USES none} |
|
785 asm |
|
786 call pcre_version |
|
787 end; |
|
788 {$ENDIF PCRE_5_0} |
|
789 |
|
790 function pcError; |
|
791 var P: ppcRegExp absolute pRegExp; |
|
792 begin |
|
793 Result := (P = nil) or P^.Error; |
|
794 If Result and (P <> nil) then |
|
795 begin |
|
796 { if P^.ErrorPos = 0 then |
|
797 MessageBox(GetString(erRegExpCompile)+'"'+P^.ErrorStr+'"', nil,mfConfirmation+mfOkButton) |
|
798 else} |
|
799 MessageBox(GetString(erRegExpCompile)+'"'+P^.ErrorStr+'"'+GetString(erRegExpCompPos), |
|
800 @P^.ErrorPos,mfConfirmation+mfOkButton); |
|
801 Dispose(P, Done); |
|
802 P:=nil; |
|
803 end; |
|
804 end; |
|
805 |
|
806 function pcInit; |
|
807 var Options : Integer; |
|
808 begin |
|
809 If CaseSens then Options := 0 else Options := PCRE_CASELESS; |
|
810 Result := New( PpcRegExp, Init( Pattern, |
|
811 {DefaultOptions} |
|
812 startup.MiscMultiData.cfgRegEx.DefaultOptions or Options, |
|
813 DefaultLocaleTable) ); |
|
814 end; |
|
815 |
|
816 Initialization |
|
817 PRegExpCache:=New(PRegExpCollection,Init(64)); |
|
818 Finalization |
|
819 Dispose(PRegExpCache,Done); |
|
820 End. |