libraries/spcre/libpcre/pcre/pcredemo.c
changeset 0 7f656887cf89
equal deleted inserted replaced
-1:000000000000 0:7f656887cf89
       
     1 /*************************************************
       
     2 *           PCRE DEMONSTRATION PROGRAM           *
       
     3 *************************************************/
       
     4 
       
     5 /* This is a demonstration program to illustrate the most straightforward ways
       
     6 of calling the PCRE regular expression library from a C program. See the
       
     7 pcresample documentation for a short discussion ("man pcresample" if you have
       
     8 the PCRE man pages installed).
       
     9 
       
    10 In Unix-like environments, compile this program thuswise:
       
    11 
       
    12   gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
       
    13     -R/usr/local/lib -lpcre
       
    14 
       
    15 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
       
    16 library files for PCRE are installed on your system. You don't need -I and -L
       
    17 if PCRE is installed in the standard system libraries. Only some operating
       
    18 systems (e.g. Solaris) use the -R option.
       
    19 
       
    20 Building under Windows:
       
    21 
       
    22 If you want to statically link this program against a non-dll .a file, you must
       
    23 define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
       
    24 pcre_free() exported functions will be declared __declspec(dllimport), with
       
    25 unwanted results. So in this environment, uncomment the following line. */
       
    26 
       
    27 /* #define PCRE_STATIC */
       
    28 
       
    29 #include <stdio.h>
       
    30 #include <string.h>
       
    31 #include <pcre.h>
       
    32 
       
    33 #define OVECCOUNT 30    /* should be a multiple of 3 */
       
    34 
       
    35 
       
    36 int main(int argc, char **argv)
       
    37 {
       
    38 pcre *re;
       
    39 const char *error;
       
    40 char *pattern;
       
    41 char *subject;
       
    42 unsigned char *name_table;
       
    43 int erroffset;
       
    44 int find_all;
       
    45 int namecount;
       
    46 int name_entry_size;
       
    47 int ovector[OVECCOUNT];
       
    48 int subject_length;
       
    49 int rc, i;
       
    50 
       
    51 
       
    52 /**************************************************************************
       
    53 * First, sort out the command line. There is only one possible option at  *
       
    54 * the moment, "-g" to request repeated matching to find all occurrences,  *
       
    55 * like Perl's /g option. We set the variable find_all to a non-zero value *
       
    56 * if the -g option is present. Apart from that, there must be exactly two *
       
    57 * arguments.                                                              *
       
    58 **************************************************************************/
       
    59 
       
    60 find_all = 0;
       
    61 for (i = 1; i < argc; i++)
       
    62   {
       
    63   if (strcmp(argv[i], "-g") == 0) find_all = 1;
       
    64     else break;
       
    65   }
       
    66 
       
    67 /* After the options, we require exactly two arguments, which are the pattern,
       
    68 and the subject string. */
       
    69 
       
    70 if (argc - i != 2)
       
    71   {
       
    72   printf("Two arguments required: a regex and a subject string\n");
       
    73   return 1;
       
    74   }
       
    75 
       
    76 pattern = argv[i];
       
    77 subject = argv[i+1];
       
    78 subject_length = (int)strlen(subject);
       
    79 
       
    80 
       
    81 /*************************************************************************
       
    82 * Now we are going to compile the regular expression pattern, and handle *
       
    83 * and errors that are detected.                                          *
       
    84 *************************************************************************/
       
    85 
       
    86 re = pcre_compile(
       
    87   pattern,              /* the pattern */
       
    88   0,                    /* default options */
       
    89   &error,               /* for error message */
       
    90   &erroffset,           /* for error offset */
       
    91   NULL);                /* use default character tables */
       
    92 
       
    93 /* Compilation failed: print the error message and exit */
       
    94 
       
    95 if (re == NULL)
       
    96   {
       
    97   printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
       
    98   return 1;
       
    99   }
       
   100 
       
   101 
       
   102 /*************************************************************************
       
   103 * If the compilation succeeded, we call PCRE again, in order to do a     *
       
   104 * pattern match against the subject string. This does just ONE match. If *
       
   105 * further matching is needed, it will be done below.                     *
       
   106 *************************************************************************/
       
   107 
       
   108 rc = pcre_exec(
       
   109   re,                   /* the compiled pattern */
       
   110   NULL,                 /* no extra data - we didn't study the pattern */
       
   111   subject,              /* the subject string */
       
   112   subject_length,       /* the length of the subject */
       
   113   0,                    /* start at offset 0 in the subject */
       
   114   0,                    /* default options */
       
   115   ovector,              /* output vector for substring information */
       
   116   OVECCOUNT);           /* number of elements in the output vector */
       
   117 
       
   118 /* Matching failed: handle error cases */
       
   119 
       
   120 if (rc < 0)
       
   121   {
       
   122   switch(rc)
       
   123     {
       
   124     case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
       
   125     /*
       
   126     Handle other special cases if you like
       
   127     */
       
   128     default: printf("Matching error %d\n", rc); break;
       
   129     }
       
   130   pcre_free(re);     /* Release memory used for the compiled pattern */
       
   131   return 1;
       
   132   }
       
   133 
       
   134 /* Match succeded */
       
   135 
       
   136 printf("\nMatch succeeded at offset %d\n", ovector[0]);
       
   137 
       
   138 
       
   139 /*************************************************************************
       
   140 * We have found the first match within the subject string. If the output *
       
   141 * vector wasn't big enough, say so. Then output any substrings that were *
       
   142 * captured.                                                              *
       
   143 *************************************************************************/
       
   144 
       
   145 /* The output vector wasn't big enough */
       
   146 
       
   147 if (rc == 0)
       
   148   {
       
   149   rc = OVECCOUNT/3;
       
   150   printf("ovector only has room for %d captured substrings\n", rc - 1);
       
   151   }
       
   152 
       
   153 /* Show substrings stored in the output vector by number. Obviously, in a real
       
   154 application you might want to do things other than print them. */
       
   155 
       
   156 for (i = 0; i < rc; i++)
       
   157   {
       
   158   char *substring_start = subject + ovector[2*i];
       
   159   int substring_length = ovector[2*i+1] - ovector[2*i];
       
   160   printf("%2d: %.*s\n", i, substring_length, substring_start);
       
   161   }
       
   162 
       
   163 
       
   164 /**************************************************************************
       
   165 * That concludes the basic part of this demonstration program. We have    *
       
   166 * compiled a pattern, and performed a single match. The code that follows *
       
   167 * shows first how to access named substrings, and then how to code for    *
       
   168 * repeated matches on the same subject.                                   *
       
   169 **************************************************************************/
       
   170 
       
   171 /* See if there are any named substrings, and if so, show them by name. First
       
   172 we have to extract the count of named parentheses from the pattern. */
       
   173 
       
   174 (void)pcre_fullinfo(
       
   175   re,                   /* the compiled pattern */
       
   176   NULL,                 /* no extra data - we didn't study the pattern */
       
   177   PCRE_INFO_NAMECOUNT,  /* number of named substrings */
       
   178   &namecount);          /* where to put the answer */
       
   179 
       
   180 if (namecount <= 0) printf("No named substrings\n"); else
       
   181   {
       
   182   unsigned char *tabptr;
       
   183   printf("Named substrings\n");
       
   184 
       
   185   /* Before we can access the substrings, we must extract the table for
       
   186   translating names to numbers, and the size of each entry in the table. */
       
   187 
       
   188   (void)pcre_fullinfo(
       
   189     re,                       /* the compiled pattern */
       
   190     NULL,                     /* no extra data - we didn't study the pattern */
       
   191     PCRE_INFO_NAMETABLE,      /* address of the table */
       
   192     &name_table);             /* where to put the answer */
       
   193 
       
   194   (void)pcre_fullinfo(
       
   195     re,                       /* the compiled pattern */
       
   196     NULL,                     /* no extra data - we didn't study the pattern */
       
   197     PCRE_INFO_NAMEENTRYSIZE,  /* size of each entry in the table */
       
   198     &name_entry_size);        /* where to put the answer */
       
   199 
       
   200   /* Now we can scan the table and, for each entry, print the number, the name,
       
   201   and the substring itself. */
       
   202 
       
   203   tabptr = name_table;
       
   204   for (i = 0; i < namecount; i++)
       
   205     {
       
   206     int n = (tabptr[0] << 8) | tabptr[1];
       
   207     printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
       
   208       ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
       
   209     tabptr += name_entry_size;
       
   210     }
       
   211   }
       
   212 
       
   213 
       
   214 /*************************************************************************
       
   215 * If the "-g" option was given on the command line, we want to continue  *
       
   216 * to search for additional matches in the subject string, in a similar   *
       
   217 * way to the /g option in Perl. This turns out to be trickier than you   *
       
   218 * might think because of the possibility of matching an empty string.    *
       
   219 * What happens is as follows:                                            *
       
   220 *                                                                        *
       
   221 * If the previous match was NOT for an empty string, we can just start   *
       
   222 * the next match at the end of the previous one.                         *
       
   223 *                                                                        *
       
   224 * If the previous match WAS for an empty string, we can't do that, as it *
       
   225 * would lead to an infinite loop. Instead, a special call of pcre_exec() *
       
   226 * is made with the PCRE_NOTEMPTY and PCRE_ANCHORED flags set. The first  *
       
   227 * of these tells PCRE that an empty string is not a valid match; other   *
       
   228 * possibilities must be tried. The second flag restricts PCRE to one     *
       
   229 * match attempt at the initial string position. If this match succeeds,  *
       
   230 * an alternative to the empty string match has been found, and we can    *
       
   231 * proceed round the loop.                                                *
       
   232 *************************************************************************/
       
   233 
       
   234 if (!find_all)
       
   235   {
       
   236   pcre_free(re);   /* Release the memory used for the compiled pattern */
       
   237   return 0;        /* Finish unless -g was given */
       
   238   }
       
   239 
       
   240 /* Loop for second and subsequent matches */
       
   241 
       
   242 for (;;)
       
   243   {
       
   244   int options = 0;                 /* Normally no options */
       
   245   int start_offset = ovector[1];   /* Start at end of previous match */
       
   246 
       
   247   /* If the previous match was for an empty string, we are finished if we are
       
   248   at the end of the subject. Otherwise, arrange to run another match at the
       
   249   same point to see if a non-empty match can be found. */
       
   250 
       
   251   if (ovector[0] == ovector[1])
       
   252     {
       
   253     if (ovector[0] == subject_length) break;
       
   254     options = PCRE_NOTEMPTY | PCRE_ANCHORED;
       
   255     }
       
   256 
       
   257   /* Run the next matching operation */
       
   258 
       
   259   rc = pcre_exec(
       
   260     re,                   /* the compiled pattern */
       
   261     NULL,                 /* no extra data - we didn't study the pattern */
       
   262     subject,              /* the subject string */
       
   263     subject_length,       /* the length of the subject */
       
   264     start_offset,         /* starting offset in the subject */
       
   265     options,              /* options */
       
   266     ovector,              /* output vector for substring information */
       
   267     OVECCOUNT);           /* number of elements in the output vector */
       
   268 
       
   269   /* This time, a result of NOMATCH isn't an error. If the value in "options"
       
   270   is zero, it just means we have found all possible matches, so the loop ends.
       
   271   Otherwise, it means we have failed to find a non-empty-string match at a
       
   272   point where there was a previous empty-string match. In this case, we do what
       
   273   Perl does: advance the matching position by one, and continue. We do this by
       
   274   setting the "end of previous match" offset, because that is picked up at the
       
   275   top of the loop as the point at which to start again. */
       
   276 
       
   277   if (rc == PCRE_ERROR_NOMATCH)
       
   278     {
       
   279     if (options == 0) break;
       
   280     ovector[1] = start_offset + 1;
       
   281     continue;    /* Go round the loop again */
       
   282     }
       
   283 
       
   284   /* Other matching errors are not recoverable. */
       
   285 
       
   286   if (rc < 0)
       
   287     {
       
   288     printf("Matching error %d\n", rc);
       
   289     pcre_free(re);    /* Release memory used for the compiled pattern */
       
   290     return 1;
       
   291     }
       
   292 
       
   293   /* Match succeded */
       
   294 
       
   295   printf("\nMatch succeeded again at offset %d\n", ovector[0]);
       
   296 
       
   297   /* The match succeeded, but the output vector wasn't big enough. */
       
   298 
       
   299   if (rc == 0)
       
   300     {
       
   301     rc = OVECCOUNT/3;
       
   302     printf("ovector only has room for %d captured substrings\n", rc - 1);
       
   303     }
       
   304 
       
   305   /* As before, show substrings stored in the output vector by number, and then
       
   306   also any named substrings. */
       
   307 
       
   308   for (i = 0; i < rc; i++)
       
   309     {
       
   310     char *substring_start = subject + ovector[2*i];
       
   311     int substring_length = ovector[2*i+1] - ovector[2*i];
       
   312     printf("%2d: %.*s\n", i, substring_length, substring_start);
       
   313     }
       
   314 
       
   315   if (namecount <= 0) printf("No named substrings\n"); else
       
   316     {
       
   317     unsigned char *tabptr = name_table;
       
   318     printf("Named substrings\n");
       
   319     for (i = 0; i < namecount; i++)
       
   320       {
       
   321       int n = (tabptr[0] << 8) | tabptr[1];
       
   322       printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
       
   323         ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
       
   324       tabptr += name_entry_size;
       
   325       }
       
   326     }
       
   327   }      /* End of loop to find second and subsequent matches */
       
   328 
       
   329 printf("\n");
       
   330 pcre_free(re);       /* Release memory used for the compiled pattern */
       
   331 return 0;
       
   332 }
       
   333 
       
   334 /* End of pcredemo.c */