libraries/spcre/libpcre/pcre/doc/pcresyntax.3
changeset 0 7f656887cf89
equal deleted inserted replaced
-1:000000000000 0:7f656887cf89
       
     1 .TH PCRESYNTAX 3
       
     2 .SH NAME
       
     3 PCRE - Perl-compatible regular expressions
       
     4 .SH "PCRE REGULAR EXPRESSION SYNTAX SUMMARY"
       
     5 .rs
       
     6 .sp
       
     7 The full syntax and semantics of the regular expressions that are supported by
       
     8 PCRE are described in the
       
     9 .\" HREF
       
    10 \fBpcrepattern\fP
       
    11 .\"
       
    12 documentation. This document contains just a quick-reference summary of the
       
    13 syntax.
       
    14 .
       
    15 .
       
    16 .SH "QUOTING"
       
    17 .rs
       
    18 .sp
       
    19   \ex         where x is non-alphanumeric is a literal x
       
    20   \eQ...\eE    treat enclosed characters as literal
       
    21 .
       
    22 .
       
    23 .SH "CHARACTERS"
       
    24 .rs
       
    25 .sp
       
    26   \ea         alarm, that is, the BEL character (hex 07)
       
    27   \ecx        "control-x", where x is any character
       
    28   \ee         escape (hex 1B)
       
    29   \ef         formfeed (hex 0C)
       
    30   \en         newline (hex 0A)
       
    31   \er         carriage return (hex 0D)
       
    32   \et         tab (hex 09)
       
    33   \eddd       character with octal code ddd, or backreference
       
    34   \exhh       character with hex code hh
       
    35   \ex{hhh..}  character with hex code hhh..
       
    36 .
       
    37 .
       
    38 .SH "CHARACTER TYPES"
       
    39 .rs
       
    40 .sp
       
    41   .          any character except newline;
       
    42                in dotall mode, any character whatsoever
       
    43   \eC         one byte, even in UTF-8 mode (best avoided)
       
    44   \ed         a decimal digit
       
    45   \eD         a character that is not a decimal digit
       
    46   \eh         a horizontal whitespace character
       
    47   \eH         a character that is not a horizontal whitespace character
       
    48   \ep{\fIxx\fP}     a character with the \fIxx\fP property
       
    49   \eP{\fIxx\fP}     a character without the \fIxx\fP property
       
    50   \eR         a newline sequence
       
    51   \es         a whitespace character
       
    52   \eS         a character that is not a whitespace character
       
    53   \ev         a vertical whitespace character
       
    54   \eV         a character that is not a vertical whitespace character
       
    55   \ew         a "word" character
       
    56   \eW         a "non-word" character
       
    57   \eX         an extended Unicode sequence
       
    58 .sp
       
    59 In PCRE, \ed, \eD, \es, \eS, \ew, and \eW recognize only ASCII characters.
       
    60 .
       
    61 .
       
    62 .SH "GENERAL CATEGORY PROPERTY CODES FOR \ep and \eP"
       
    63 .rs
       
    64 .sp
       
    65   C          Other
       
    66   Cc         Control
       
    67   Cf         Format
       
    68   Cn         Unassigned
       
    69   Co         Private use
       
    70   Cs         Surrogate
       
    71 .sp
       
    72   L          Letter
       
    73   Ll         Lower case letter
       
    74   Lm         Modifier letter
       
    75   Lo         Other letter
       
    76   Lt         Title case letter
       
    77   Lu         Upper case letter
       
    78   L&         Ll, Lu, or Lt
       
    79 .sp
       
    80   M          Mark
       
    81   Mc         Spacing mark
       
    82   Me         Enclosing mark
       
    83   Mn         Non-spacing mark
       
    84 .sp
       
    85   N          Number
       
    86   Nd         Decimal number
       
    87   Nl         Letter number
       
    88   No         Other number
       
    89 .sp
       
    90   P          Punctuation
       
    91   Pc         Connector punctuation
       
    92   Pd         Dash punctuation
       
    93   Pe         Close punctuation
       
    94   Pf         Final punctuation
       
    95   Pi         Initial punctuation
       
    96   Po         Other punctuation
       
    97   Ps         Open punctuation
       
    98 .sp
       
    99   S          Symbol
       
   100   Sc         Currency symbol
       
   101   Sk         Modifier symbol
       
   102   Sm         Mathematical symbol
       
   103   So         Other symbol
       
   104 .sp
       
   105   Z          Separator
       
   106   Zl         Line separator
       
   107   Zp         Paragraph separator
       
   108   Zs         Space separator
       
   109 .
       
   110 .
       
   111 .SH "SCRIPT NAMES FOR \ep AND \eP"
       
   112 .rs
       
   113 .sp
       
   114 Arabic,
       
   115 Armenian,
       
   116 Balinese,
       
   117 Bengali,
       
   118 Bopomofo,
       
   119 Braille,
       
   120 Buginese,
       
   121 Buhid,
       
   122 Canadian_Aboriginal,
       
   123 Cherokee,
       
   124 Common,
       
   125 Coptic,
       
   126 Cuneiform,
       
   127 Cypriot,
       
   128 Cyrillic,
       
   129 Deseret,
       
   130 Devanagari,
       
   131 Ethiopic,
       
   132 Georgian,
       
   133 Glagolitic,
       
   134 Gothic,
       
   135 Greek,
       
   136 Gujarati,
       
   137 Gurmukhi,
       
   138 Han,
       
   139 Hangul,
       
   140 Hanunoo,
       
   141 Hebrew,
       
   142 Hiragana,
       
   143 Inherited,
       
   144 Kannada,
       
   145 Katakana,
       
   146 Kharoshthi,
       
   147 Khmer,
       
   148 Lao,
       
   149 Latin,
       
   150 Limbu,
       
   151 Linear_B,
       
   152 Malayalam,
       
   153 Mongolian,
       
   154 Myanmar,
       
   155 New_Tai_Lue,
       
   156 Nko,
       
   157 Ogham,
       
   158 Old_Italic,
       
   159 Old_Persian,
       
   160 Oriya,
       
   161 Osmanya,
       
   162 Phags_Pa,
       
   163 Phoenician,
       
   164 Runic,
       
   165 Shavian,
       
   166 Sinhala,
       
   167 Syloti_Nagri,
       
   168 Syriac,
       
   169 Tagalog,
       
   170 Tagbanwa,
       
   171 Tai_Le,
       
   172 Tamil,
       
   173 Telugu,
       
   174 Thaana,
       
   175 Thai,
       
   176 Tibetan,
       
   177 Tifinagh,
       
   178 Ugaritic,
       
   179 Yi.
       
   180 .
       
   181 .
       
   182 .SH "CHARACTER CLASSES"
       
   183 .rs
       
   184 .sp
       
   185   [...]       positive character class
       
   186   [^...]      negative character class
       
   187   [x-y]       range (can be used for hex characters)
       
   188   [[:xxx:]]   positive POSIX named set
       
   189   [[:^xxx:]]  negative POSIX named set
       
   190 .sp
       
   191   alnum       alphanumeric
       
   192   alpha       alphabetic
       
   193   ascii       0-127
       
   194   blank       space or tab
       
   195   cntrl       control character
       
   196   digit       decimal digit
       
   197   graph       printing, excluding space
       
   198   lower       lower case letter
       
   199   print       printing, including space
       
   200   punct       printing, excluding alphanumeric
       
   201   space       whitespace
       
   202   upper       upper case letter
       
   203   word        same as \ew
       
   204   xdigit      hexadecimal digit
       
   205 .sp
       
   206 In PCRE, POSIX character set names recognize only ASCII characters. You can use
       
   207 \eQ...\eE inside a character class.
       
   208 .
       
   209 .
       
   210 .SH "QUANTIFIERS"
       
   211 .rs
       
   212 .sp
       
   213   ?           0 or 1, greedy
       
   214   ?+          0 or 1, possessive
       
   215   ??          0 or 1, lazy
       
   216   *           0 or more, greedy
       
   217   *+          0 or more, possessive
       
   218   *?          0 or more, lazy
       
   219   +           1 or more, greedy
       
   220   ++          1 or more, possessive
       
   221   +?          1 or more, lazy
       
   222   {n}         exactly n
       
   223   {n,m}       at least n, no more than m, greedy
       
   224   {n,m}+      at least n, no more than m, possessive
       
   225   {n,m}?      at least n, no more than m, lazy
       
   226   {n,}        n or more, greedy
       
   227   {n,}+       n or more, possessive
       
   228   {n,}?       n or more, lazy
       
   229 .
       
   230 .
       
   231 .SH "ANCHORS AND SIMPLE ASSERTIONS"
       
   232 .rs
       
   233 .sp
       
   234   \eb          word boundary
       
   235   \eB          not a word boundary
       
   236   ^           start of subject
       
   237                also after internal newline in multiline mode
       
   238   \eA          start of subject
       
   239   $           end of subject
       
   240                also before newline at end of subject
       
   241                also before internal newline in multiline mode
       
   242   \eZ          end of subject
       
   243                also before newline at end of subject
       
   244   \ez          end of subject
       
   245   \eG          first matching position in subject
       
   246 .
       
   247 .
       
   248 .SH "MATCH POINT RESET"
       
   249 .rs
       
   250 .sp
       
   251   \eK          reset start of match
       
   252 .
       
   253 .
       
   254 .SH "ALTERNATION"
       
   255 .rs
       
   256 .sp
       
   257   expr|expr|expr...
       
   258 .
       
   259 .
       
   260 .SH "CAPTURING"
       
   261 .rs
       
   262 .sp
       
   263   (...)          capturing group
       
   264   (?<name>...)   named capturing group (Perl)
       
   265   (?'name'...)   named capturing group (Perl)
       
   266   (?P<name>...)  named capturing group (Python)
       
   267   (?:...)        non-capturing group
       
   268   (?|...)        non-capturing group; reset group numbers for
       
   269                   capturing groups in each alternative
       
   270 .
       
   271 .
       
   272 .SH "ATOMIC GROUPS"
       
   273 .rs
       
   274 .sp
       
   275   (?>...)        atomic, non-capturing group
       
   276 .
       
   277 .
       
   278 .
       
   279 .
       
   280 .SH "COMMENT"
       
   281 .rs
       
   282 .sp
       
   283   (?#....)       comment (not nestable)
       
   284 .
       
   285 .
       
   286 .SH "OPTION SETTING"
       
   287 .rs
       
   288 .sp
       
   289   (?i)           caseless
       
   290   (?J)           allow duplicate names
       
   291   (?m)           multiline
       
   292   (?s)           single line (dotall)
       
   293   (?U)           default ungreedy (lazy)
       
   294   (?x)           extended (ignore white space)
       
   295   (?-...)        unset option(s)
       
   296 .
       
   297 .
       
   298 .SH "LOOKAHEAD AND LOOKBEHIND ASSERTIONS"
       
   299 .rs
       
   300 .sp
       
   301   (?=...)        positive look ahead
       
   302   (?!...)        negative look ahead
       
   303   (?<=...)       positive look behind
       
   304   (?<!...)       negative look behind
       
   305 .sp
       
   306 Each top-level branch of a look behind must be of a fixed length.
       
   307 .
       
   308 .
       
   309 .SH "BACKREFERENCES"
       
   310 .rs
       
   311 .sp
       
   312   \en             reference by number (can be ambiguous)
       
   313   \egn            reference by number
       
   314   \eg{n}          reference by number
       
   315   \eg{-n}         relative reference by number
       
   316   \ek<name>       reference by name (Perl)
       
   317   \ek'name'       reference by name (Perl)
       
   318   \eg{name}       reference by name (Perl)
       
   319   \ek{name}       reference by name (.NET)
       
   320   (?P=name)      reference by name (Python)
       
   321 .
       
   322 .
       
   323 .SH "SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)"
       
   324 .rs
       
   325 .sp
       
   326   (?R)           recurse whole pattern
       
   327   (?n)           call subpattern by absolute number
       
   328   (?+n)          call subpattern by relative number
       
   329   (?-n)          call subpattern by relative number
       
   330   (?&name)       call subpattern by name (Perl)
       
   331   (?P>name)      call subpattern by name (Python)
       
   332   \eg<name>       call subpattern by name (Oniguruma)
       
   333   \eg'name'       call subpattern by name (Oniguruma)
       
   334   \eg<n>          call subpattern by absolute number (Oniguruma)
       
   335   \eg'n'          call subpattern by absolute number (Oniguruma)
       
   336   \eg<+n>         call subpattern by relative number (PCRE extension)
       
   337   \eg'+n'         call subpattern by relative number (PCRE extension)
       
   338   \eg<-n>         call subpattern by relative number (PCRE extension)
       
   339   \eg'-n'         call subpattern by relative number (PCRE extension)
       
   340 .
       
   341 .
       
   342 .SH "CONDITIONAL PATTERNS"
       
   343 .rs
       
   344 .sp
       
   345   (?(condition)yes-pattern)
       
   346   (?(condition)yes-pattern|no-pattern)
       
   347 .sp
       
   348   (?(n)...       absolute reference condition
       
   349   (?(+n)...      relative reference condition
       
   350   (?(-n)...      relative reference condition
       
   351   (?(<name>)...  named reference condition (Perl)
       
   352   (?('name')...  named reference condition (Perl)
       
   353   (?(name)...    named reference condition (PCRE)
       
   354   (?(R)...       overall recursion condition
       
   355   (?(Rn)...      specific group recursion condition
       
   356   (?(R&name)...  specific recursion condition
       
   357   (?(DEFINE)...  define subpattern for reference
       
   358   (?(assert)...  assertion condition
       
   359 .
       
   360 .
       
   361 .SH "BACKTRACKING CONTROL"
       
   362 .rs
       
   363 .sp
       
   364 The following act immediately they are reached:
       
   365 .sp
       
   366   (*ACCEPT)      force successful match
       
   367   (*FAIL)        force backtrack; synonym (*F)
       
   368 .sp
       
   369 The following act only when a subsequent match failure causes a backtrack to
       
   370 reach them. They all force a match failure, but they differ in what happens
       
   371 afterwards. Those that advance the start-of-match point do so only if the
       
   372 pattern is not anchored.
       
   373 .sp
       
   374   (*COMMIT)      overall failure, no advance of starting point
       
   375   (*PRUNE)       advance to next starting character
       
   376   (*SKIP)        advance start to current matching position
       
   377   (*THEN)        local failure, backtrack to next alternation
       
   378 .
       
   379 .
       
   380 .SH "NEWLINE CONVENTIONS"
       
   381 .rs
       
   382 .sp
       
   383 These are recognized only at the very start of the pattern or after a
       
   384 (*BSR_...) option.
       
   385 .sp
       
   386   (*CR)
       
   387   (*LF)
       
   388   (*CRLF)
       
   389   (*ANYCRLF)
       
   390   (*ANY)
       
   391 .
       
   392 .
       
   393 .SH "WHAT \eR MATCHES"
       
   394 .rs
       
   395 .sp
       
   396 These are recognized only at the very start of the pattern or after a
       
   397 (*...) option that sets the newline convention.
       
   398 .sp
       
   399   (*BSR_ANYCRLF)
       
   400   (*BSR_UNICODE)
       
   401 .
       
   402 .
       
   403 .SH "CALLOUTS"
       
   404 .rs
       
   405 .sp
       
   406   (?C)      callout
       
   407   (?Cn)     callout with data n
       
   408 .
       
   409 .
       
   410 .SH "SEE ALSO"
       
   411 .rs
       
   412 .sp
       
   413 \fBpcrepattern\fP(3), \fBpcreapi\fP(3), \fBpcrecallout\fP(3),
       
   414 \fBpcrematching\fP(3), \fBpcre\fP(3).
       
   415 .
       
   416 .
       
   417 .SH AUTHOR
       
   418 .rs
       
   419 .sp
       
   420 .nf
       
   421 Philip Hazel
       
   422 University Computing Service
       
   423 Cambridge CB2 3QH, England.
       
   424 .fi
       
   425 .
       
   426 .
       
   427 .SH REVISION
       
   428 .rs
       
   429 .sp
       
   430 .nf
       
   431 Last updated: 09 April 2008
       
   432 Copyright (c) 1997-2008 University of Cambridge.
       
   433 .fi