Engine directory for ticket #1

This commit is contained in:
DavidWyand-GG 2012-09-19 11:15:01 -04:00
parent 352279af7a
commit 7dbfe6994d
3795 changed files with 1363358 additions and 0 deletions

View file

@ -0,0 +1,5 @@
Don't use bits/type_traits.h on Linux - Andrew Galante, GG 8/2/2009: pcre_stringpiece.h
Both Mac and Linux support strtoq - Andrew Galante, GG 8/2/2009: config.h
Neither Mac nor Linux support _strtoi64 - Andrew Galante, GG 8/2/2009: config.h

251
Engine/lib/pcre/config.h Normal file
View file

@ -0,0 +1,251 @@
/* config.h. Generated from config.h.in by configure. */
/* config.h.in. Generated from configure.ac by autoheader. */
/* On Unix-like systems config.h.in is converted by "configure" into config.h.
Some other environments also support the use of "configure". PCRE is written in
Standard C, but there are a few non-standard things it can cope with, allowing
it to run on SunOS4 and other "close to standard" systems.
If you are going to build PCRE "by hand" on a system without "configure" you
should copy the distributed config.h.generic to config.h, and then set up the
macro definitions the way you need them. You must then add -DHAVE_CONFIG_H to
all of your compile commands, so that config.h is included at the start of
every source.
Alternatively, you can avoid editing by using -D on the compiler command line
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
them both to 0; an emulation function will be used. */
/* By default, the \R escape sequence matches any Unicode line ending
character or sequence of characters. If BSR_ANYCRLF is defined, this is
changed so that backslash-R matches only CR, LF, or CRLF. The build- time
default can be overridden by the user of PCRE at runtime. On systems that
support it, "configure" can be used to override the default. */
/* #undef BSR_ANYCRLF */
/* If you are compiling for a system that uses EBCDIC instead of ASCII
character codes, define this macro as 1. On systems that can use
"configure", this can be done via --enable-ebcdic. */
/* #undef EBCDIC */
/* Define to 1 if you have the `bcopy' function. */
/* #undef HAVE_BCOPY */
/* Define to 1 if you have the <bits/type_traits.h> header file. */
#define HAVE_BITS_TYPE_TRAITS_H 1
/* Define to 1 if you have the <bzlib.h> header file. */
/* #undef HAVE_BZLIB_H */
/* Define to 1 if you have the <dirent.h> header file. */
#define HAVE_DIRENT_H 1
/* Define to 1 if you have the <dlfcn.h> header file. */
/* #undef HAVE_DLFCN_H */
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
/* Define to 1 if you have the <limits.h> header file. */
#define HAVE_LIMITS_H 1
/* Define to 1 if the system has the type `long long'. */
#define HAVE_LONG_LONG 1
/* Define to 1 if you have the `memmove' function. */
#define HAVE_MEMMOVE 1
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define to 1 if you have the <readline/history.h> header file. */
/* #undef HAVE_READLINE_HISTORY_H */
/* Define to 1 if you have the <readline/readline.h> header file. */
/* #undef HAVE_READLINE_READLINE_H */
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define to 1 if you have the `strerror' function. */
#define HAVE_STRERROR 1
/* Define to 1 if you have the <string> header file. */
#define HAVE_STRING 1
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define to 1 if you have the `strtoll' function. */
#if defined(SN_TARGET_PS3)
#define HAVE_STRTOLL 1
#endif
/* Define to 1 if you have the `strtoq' function. */
/* Both Mac and Linux support strtoq - Andrew Galante, GG 8/2/2009 */
#ifdef __GNUC__
#define HAVE_STRTOQ 1
#endif
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if you have the <type_traits.h> header file. */
/* #undef HAVE_TYPE_TRAITS_H */
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
/* Define to 1 if the system has the type `unsigned long long'. */
#define HAVE_UNSIGNED_LONG_LONG 1
/* Define to 1 if you have the <windows.h> header file. */
#define HAVE_WINDOWS_H 1
/* Define to 1 if you have the <zlib.h> header file. */
/* #undef HAVE_ZLIB_H */
/* Define to 1 if you have the `_strtoi64' function. */
/* Neither Mac nor Linux support _strtoi64 - Andrew Galante, GG 8/2/2009 */
#ifndef __GNUC__
#define HAVE__STRTOI64 1
#endif
/* The value of LINK_SIZE determines the number of bytes used to store links
as offsets within the compiled regex. The default is 2, which allows for
compiled patterns up to 64K long. This covers the vast majority of cases.
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
for longer patterns in extreme cases. On systems that support it,
"configure" can be used to override this default. */
#define LINK_SIZE 2
/* The value of MATCH_LIMIT determines the default number of times the
internal match() function can be called during a single execution of
pcre_exec(). There is a runtime interface for setting a different limit.
The limit exists in order to catch runaway regular expressions that take
for ever to determine that they do not match. The default is set very large
so that it does not accidentally catch legitimate cases. On systems that
support it, "configure" can be used to override this default default. */
#define MATCH_LIMIT 10000000
/* The above limit applies to all calls of match(), whether or not they
increase the recursion depth. In some environments it is desirable to limit
the depth of recursive calls of match() more strictly, in order to restrict
the maximum amount of stack (or heap, if NO_RECURSE is defined) that is
used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
match(). To have any useful effect, it must be less than the value of
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
a runtime method for setting a different limit. On systems that support it,
"configure" can be used to override the default. */
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
/* This limit is parameterized just in case anybody ever wants to change it.
Care must be taken if it is increased, because it guards against integer
overflow caused by enormously large patterns. */
#define MAX_NAME_COUNT 10000
/* This limit is parameterized just in case anybody ever wants to change it.
Care must be taken if it is increased, because it guards against integer
overflow caused by enormously large patterns. */
#define MAX_NAME_SIZE 32
/* The value of NEWLINE determines the newline character sequence. On systems
that support it, "configure" can be used to override the default, which is
10. The possible values are 10 (LF), 13 (CR), 3338 (CRLF), -1 (ANY), or -2
(ANYCRLF). */
#define NEWLINE 10
/* PCRE uses recursive function calls to handle backtracking while matching.
This can sometimes be a problem on systems that have stacks of limited
size. Define NO_RECURSE to get a version that doesn't use recursion in the
match() function; instead it creates its own stack by steam using
pcre_recurse_malloc() to obtain memory from the heap. For more detail, see
the comments and other stuff just above the match() function. On systems
that support it, "configure" can be used to set this in the Makefile (use
--disable-stack-for-recursion). */
/* #undef NO_RECURSE */
/* Name of package */
#define PACKAGE "pcre"
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT ""
/* Define to the full name of this package. */
#define PACKAGE_NAME "PCRE"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "PCRE 7.6"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "pcre"
/* Define to the version of this package. */
#define PACKAGE_VERSION "7.6"
/* If you are compiling for a system other than a Unix-like system or
Win32, and it needs some magic to be inserted before the definition
of a function that is exported by the library, define this macro to
contain the relevant magic. If you do not define this macro, it
defaults to "extern" for a C compiler and "extern C" for a C++
compiler on non-Win32 systems. This macro apears at the start of
every exported function that is part of the external API. It does
not appear on functions that are "external" in the C sense, but
which are internal to the library. */
/* #undef PCRE_EXP_DEFN */
/* Define if linking statically (TODO: make nice with Libtool) */
#define PCRE_STATIC 1
/* When calling PCRE via the POSIX interface, additional working storage is
required for holding the pointers to capturing substrings because PCRE
requires three integers per substring, whereas the POSIX interface provides
only two. If the number of expected substrings is small, the wrapper
function uses space on the stack, because this is faster than using
malloc() for each call. The threshold above which the stack is no longer
used is defined by POSIX_MALLOC_THRESHOLD. On systems that support it,
"configure" can be used to override this default. */
#define POSIX_MALLOC_THRESHOLD 10
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Define to allow pcregrep to be linked with libbz2, so that it is able to
handle .bz2 files. */
/* #undef SUPPORT_LIBBZ2 */
/* Define to allow pcretest to be linked with libreadline. */
/* #undef SUPPORT_LIBREADLINE */
/* Define to allow pcregrep to be linked with libz, so that it is able to
handle .gz files. */
/* #undef SUPPORT_LIBZ */
/* Define to enable support for Unicode properties */
/* #undef SUPPORT_UCP */
/* Define to enable support for the UTF-8 Unicode encoding. */
/* #undef SUPPORT_UTF8 */
/* Version number of package */
#define VERSION "7.6"
/* Define to empty if `const' does not conform to ANSI C. */
/* #undef const */
/* Define to `unsigned int' if <sys/types.h> does not define. */
/* #undef size_t */

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

303
Engine/lib/pcre/pcre.h Normal file
View file

@ -0,0 +1,303 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* This is the public header file for the PCRE library, to be #included by
applications that call the PCRE functions.
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef _PCRE_H
#define _PCRE_H
/* The current PCRE version information. */
#define PCRE_MAJOR 7
#define PCRE_MINOR 6
#define PCRE_PRERELEASE
#define PCRE_DATE 2008-01-28
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
export setting is defined in pcre_internal.h, which includes this file. So we
don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */
#if defined(_WIN32) && !defined(PCRE_STATIC)
# ifndef PCRE_EXP_DECL
# define PCRE_EXP_DECL extern __declspec(dllimport)
# endif
# ifdef __cplusplus
# ifndef PCRECPP_EXP_DECL
# define PCRECPP_EXP_DECL extern __declspec(dllimport)
# endif
# ifndef PCRECPP_EXP_DEFN
# define PCRECPP_EXP_DEFN __declspec(dllimport)
# endif
# endif
#endif
/* By default, we use the standard "extern" declarations. */
#ifndef PCRE_EXP_DECL
# ifdef __cplusplus
# define PCRE_EXP_DECL extern "C"
# else
# define PCRE_EXP_DECL extern
# endif
#endif
#ifdef __cplusplus
# ifndef PCRECPP_EXP_DECL
# define PCRECPP_EXP_DECL extern
# endif
# ifndef PCRECPP_EXP_DEFN
# define PCRECPP_EXP_DEFN
# endif
#endif
/* Have to include stdlib.h in order to ensure that size_t is defined;
it is needed here for malloc. */
#include <stdlib.h>
/* Allow for C++ users */
#ifdef __cplusplus
extern "C" {
#endif
/* Options */
#define PCRE_CASELESS 0x00000001
#define PCRE_MULTILINE 0x00000002
#define PCRE_DOTALL 0x00000004
#define PCRE_EXTENDED 0x00000008
#define PCRE_ANCHORED 0x00000010
#define PCRE_DOLLAR_ENDONLY 0x00000020
#define PCRE_EXTRA 0x00000040
#define PCRE_NOTBOL 0x00000080
#define PCRE_NOTEOL 0x00000100
#define PCRE_UNGREEDY 0x00000200
#define PCRE_NOTEMPTY 0x00000400
#define PCRE_UTF8 0x00000800
#define PCRE_NO_AUTO_CAPTURE 0x00001000
#define PCRE_NO_UTF8_CHECK 0x00002000
#define PCRE_AUTO_CALLOUT 0x00004000
#define PCRE_PARTIAL 0x00008000
#define PCRE_DFA_SHORTEST 0x00010000
#define PCRE_DFA_RESTART 0x00020000
#define PCRE_FIRSTLINE 0x00040000
#define PCRE_DUPNAMES 0x00080000
#define PCRE_NEWLINE_CR 0x00100000
#define PCRE_NEWLINE_LF 0x00200000
#define PCRE_NEWLINE_CRLF 0x00300000
#define PCRE_NEWLINE_ANY 0x00400000
#define PCRE_NEWLINE_ANYCRLF 0x00500000
#define PCRE_BSR_ANYCRLF 0x00800000
#define PCRE_BSR_UNICODE 0x01000000
/* Exec-time and get/set-time error codes */
#define PCRE_ERROR_NOMATCH (-1)
#define PCRE_ERROR_NULL (-2)
#define PCRE_ERROR_BADOPTION (-3)
#define PCRE_ERROR_BADMAGIC (-4)
#define PCRE_ERROR_UNKNOWN_OPCODE (-5)
#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */
#define PCRE_ERROR_NOMEMORY (-6)
#define PCRE_ERROR_NOSUBSTRING (-7)
#define PCRE_ERROR_MATCHLIMIT (-8)
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
#define PCRE_ERROR_BADUTF8 (-10)
#define PCRE_ERROR_BADUTF8_OFFSET (-11)
#define PCRE_ERROR_PARTIAL (-12)
#define PCRE_ERROR_BADPARTIAL (-13)
#define PCRE_ERROR_INTERNAL (-14)
#define PCRE_ERROR_BADCOUNT (-15)
#define PCRE_ERROR_DFA_UITEM (-16)
#define PCRE_ERROR_DFA_UCOND (-17)
#define PCRE_ERROR_DFA_UMLIMIT (-18)
#define PCRE_ERROR_DFA_WSSIZE (-19)
#define PCRE_ERROR_DFA_RECURSE (-20)
#define PCRE_ERROR_RECURSIONLIMIT (-21)
#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */
#define PCRE_ERROR_BADNEWLINE (-23)
/* Request types for pcre_fullinfo() */
#define PCRE_INFO_OPTIONS 0
#define PCRE_INFO_SIZE 1
#define PCRE_INFO_CAPTURECOUNT 2
#define PCRE_INFO_BACKREFMAX 3
#define PCRE_INFO_FIRSTBYTE 4
#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
#define PCRE_INFO_FIRSTTABLE 5
#define PCRE_INFO_LASTLITERAL 6
#define PCRE_INFO_NAMEENTRYSIZE 7
#define PCRE_INFO_NAMECOUNT 8
#define PCRE_INFO_NAMETABLE 9
#define PCRE_INFO_STUDYSIZE 10
#define PCRE_INFO_DEFAULT_TABLES 11
#define PCRE_INFO_OKPARTIAL 12
#define PCRE_INFO_JCHANGED 13
#define PCRE_INFO_HASCRORLF 14
/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */
#define PCRE_CONFIG_UTF8 0
#define PCRE_CONFIG_NEWLINE 1
#define PCRE_CONFIG_LINK_SIZE 2
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3
#define PCRE_CONFIG_MATCH_LIMIT 4
#define PCRE_CONFIG_STACKRECURSE 5
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
#define PCRE_CONFIG_BSR 8
/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
these bits, just add new ones on the end, in order to remain compatible. */
#define PCRE_EXTRA_STUDY_DATA 0x0001
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
#define PCRE_EXTRA_TABLES 0x0008
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
/* Types */
struct real_pcre; /* declaration; the definition is private */
typedef struct real_pcre pcre;
/* When PCRE is compiled as a C++ library, the subject pointer type can be
replaced with a custom type. For conventional use, the public interface is a
const char *. */
#ifndef PCRE_SPTR
#define PCRE_SPTR const char *
#endif
/* The structure for passing additional data to pcre_exec(). This is defined in
such as way as to be extensible. Always add new fields at the end, in order to
remain compatible. */
typedef struct pcre_extra {
unsigned long int flags; /* Bits for which fields are set */
void *study_data; /* Opaque data from pcre_study() */
unsigned long int match_limit; /* Maximum number of calls to match() */
void *callout_data; /* Data passed back in callouts */
const unsigned char *tables; /* Pointer to character tables */
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
} pcre_extra;
/* The structure for passing out data via the pcre_callout_function. We use a
structure so that new fields can be added on the end in future versions,
without changing the API of the function, thereby allowing old clients to work
without modification. */
typedef struct pcre_callout_block {
int version; /* Identifies version of block */
/* ------------------------ Version 0 ------------------------------- */
int callout_number; /* Number compiled into pattern */
int *offset_vector; /* The offset vector */
PCRE_SPTR subject; /* The subject being matched */
int subject_length; /* The length of the subject */
int start_match; /* Offset to start of this match attempt */
int current_position; /* Where we currently are in the subject */
int capture_top; /* Max current capture */
int capture_last; /* Most recently closed capture */
void *callout_data; /* Data passed in with the call */
/* ------------------- Added for Version 1 -------------------------- */
int pattern_position; /* Offset to next item in the pattern */
int next_item_length; /* Length of next item in the pattern */
/* ------------------------------------------------------------------ */
} pcre_callout_block;
/* Indirection for store get and free functions. These can be set to
alternative malloc/free functions if required. Special ones are used in the
non-recursive case for "frames". There is also an optional callout function
that is triggered by the (?) regex item. For Virtual Pascal, these definitions
have to take another form. */
#ifndef VPCOMPAT
PCRE_EXP_DECL void *(*pcre_malloc)(size_t);
PCRE_EXP_DECL void (*pcre_free)(void *);
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
PCRE_EXP_DECL void (*pcre_stack_free)(void *);
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
#else /* VPCOMPAT */
PCRE_EXP_DECL void *pcre_malloc(size_t);
PCRE_EXP_DECL void pcre_free(void *);
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
PCRE_EXP_DECL void pcre_stack_free(void *);
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
#endif /* VPCOMPAT */
/* Exported PCRE functions */
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
const unsigned char *);
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
int *, const unsigned char *);
PCRE_EXP_DECL int pcre_config(int, void *);
PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
int *, int, const char *, char *, int);
PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, char *,
int);
PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
const char *, int, int, int, int *, int , int *, int);
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
int, int, int, int *, int);
PCRE_EXP_DECL void pcre_free_substring(const char *);
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
void *);
PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
int *, int, const char *, const char **);
PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
char **, char **);
PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
const char **);
PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
const char ***);
PCRE_EXP_DECL int pcre_info(const pcre *, int *, int *);
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
PCRE_EXP_DECL int pcre_refcount(pcre *, int);
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
PCRE_EXP_DECL const char *pcre_version(void);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* End of pcre.h */

View file

@ -0,0 +1,193 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* This file was automatically written by the dftables auxiliary
program. It contains character tables that are used when no external
tables are passed to PCRE by the application that calls it. The tables
are used only for characters whose code values are less than 256.
The following #includes are present because without them gcc 4.x may remove
the array definition from the final binary if PCRE is built into a static
library and dead code stripping is activated. This leads to link errors.
Pulling in the header ensures that the array gets flagged as "someone
outside this compilation unit might reference this" and so it will always
be supplied to the linker. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
const unsigned char _pcre_default_tables[] = {
/* This table is a lower casing table. */
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63,
64, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,
120,121,122, 91, 92, 93, 94, 95,
96, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,
120,121,122,123,124,125,126,127,
128,129,130,131,132,133,134,135,
136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,
152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,
168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,
184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,
200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,
216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,
232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,
248,249,250,251,252,253,254,255,
/* This table is a case flipping table. */
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63,
64, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,
120,121,122, 91, 92, 93, 94, 95,
96, 65, 66, 67, 68, 69, 70, 71,
72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87,
88, 89, 90,123,124,125,126,127,
128,129,130,131,132,133,134,135,
136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,
152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,
168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,
184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,
200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,
216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,
232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,
248,249,250,251,252,253,254,255,
/* This table contains bit maps for various character classes.
Each map is 32 bytes long and the bits run from the least
significant end of each byte. The classes that have their own
maps are: space, xdigit, digit, upper, lower, word, graph
print, punct, and cntrl. Other classes are built from combinations. */
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
/* This table identifies various classes of character by individual bits:
0x01 white space character
0x02 letter
0x04 decimal digit
0x08 hexadecimal digit
0x10 alphanumeric or '_'
0x80 regular expression metacharacter or binary zero
*/
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
/* End of pcre_chartables.c */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,128 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_config(). */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Return info about what features are configured *
*************************************************/
/* This function has an extensible interface so that additional items can be
added compatibly.
Arguments:
what what information is required
where where to put the information
Returns: 0 if data returned, negative on error
*/
PCRE_EXP_DEFN int
pcre_config(int what, void *where)
{
switch (what)
{
case PCRE_CONFIG_UTF8:
#ifdef SUPPORT_UTF8
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
case PCRE_CONFIG_UNICODE_PROPERTIES:
#ifdef SUPPORT_UCP
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
case PCRE_CONFIG_NEWLINE:
*((int *)where) = NEWLINE;
break;
case PCRE_CONFIG_BSR:
#ifdef BSR_ANYCRLF
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
case PCRE_CONFIG_LINK_SIZE:
*((int *)where) = LINK_SIZE;
break;
case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
*((int *)where) = POSIX_MALLOC_THRESHOLD;
break;
case PCRE_CONFIG_MATCH_LIMIT:
*((unsigned int *)where) = MATCH_LIMIT;
break;
case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
*((unsigned int *)where) = MATCH_LIMIT_RECURSION;
break;
case PCRE_CONFIG_STACKRECURSE:
#ifdef NO_RECURSE
*((int *)where) = 0;
#else
*((int *)where) = 1;
#endif
break;
default: return PCRE_ERROR_BADOPTION;
}
return 0;
}
/* End of pcre_config.c */

File diff suppressed because it is too large Load diff

4940
Engine/lib/pcre/pcre_exec.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,165 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_fullinfo(), which returns
information about a compiled pattern. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Return info about compiled pattern *
*************************************************/
/* This is a newer "info" function which has an extensible interface so
that additional items can be added compatibly.
Arguments:
argument_re points to compiled code
extra_data points extra data, or NULL
what what information is required
where where to put the information
Returns: 0 if data returned, negative on error
*/
PCRE_EXP_DEFN int
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
void *where)
{
real_pcre internal_re;
pcre_study_data internal_study;
const real_pcre *re = (const real_pcre *)argument_re;
const pcre_study_data *study = NULL;
if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
study = (const pcre_study_data *)extra_data->study_data;
if (re->magic_number != MAGIC_NUMBER)
{
re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
if (re == NULL) return PCRE_ERROR_BADMAGIC;
if (study != NULL) study = &internal_study;
}
switch (what)
{
case PCRE_INFO_OPTIONS:
*((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
break;
case PCRE_INFO_SIZE:
*((size_t *)where) = re->size;
break;
case PCRE_INFO_STUDYSIZE:
*((size_t *)where) = (study == NULL)? 0 : study->size;
break;
case PCRE_INFO_CAPTURECOUNT:
*((int *)where) = re->top_bracket;
break;
case PCRE_INFO_BACKREFMAX:
*((int *)where) = re->top_backref;
break;
case PCRE_INFO_FIRSTBYTE:
*((int *)where) =
((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
break;
/* Make sure we pass back the pointer to the bit vector in the external
block, not the internal copy (with flipped integer fields). */
case PCRE_INFO_FIRSTTABLE:
*((const uschar **)where) =
(study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?
((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
break;
case PCRE_INFO_LASTLITERAL:
*((int *)where) =
((re->flags & PCRE_REQCHSET) != 0)? re->req_byte : -1;
break;
case PCRE_INFO_NAMEENTRYSIZE:
*((int *)where) = re->name_entry_size;
break;
case PCRE_INFO_NAMECOUNT:
*((int *)where) = re->name_count;
break;
case PCRE_INFO_NAMETABLE:
*((const uschar **)where) = (const uschar *)re + re->name_table_offset;
break;
case PCRE_INFO_DEFAULT_TABLES:
*((const uschar **)where) = (const uschar *)(_pcre_default_tables);
break;
case PCRE_INFO_OKPARTIAL:
*((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0;
break;
case PCRE_INFO_JCHANGED:
*((int *)where) = (re->flags & PCRE_JCHANGED) != 0;
break;
case PCRE_INFO_HASCRORLF:
*((int *)where) = (re->flags & PCRE_HASCRORLF) != 0;
break;
default: return PCRE_ERROR_BADOPTION;
}
return 0;
}
/* End of pcre_fullinfo.c */

465
Engine/lib/pcre/pcre_get.c Normal file
View file

@ -0,0 +1,465 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains some convenience functions for extracting substrings
from the subject string after a regex match has succeeded. The original idea
for these functions came from Scott Wimer. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Find number for named string *
*************************************************/
/* This function is used by the get_first_set() function below, as well
as being generally available. It assumes that names are unique.
Arguments:
code the compiled regex
stringname the name whose number is required
Returns: the number of the named parentheses, or a negative number
(PCRE_ERROR_NOSUBSTRING) if not found
*/
int
pcre_get_stringnumber(const pcre *code, const char *stringname)
{
int rc;
int entrysize;
int top, bot;
uschar *nametable;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
return rc;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
return rc;
bot = 0;
while (top > bot)
{
int mid = (top + bot) / 2;
uschar *entry = nametable + entrysize*mid;
int c = strcmp(stringname, (char *)(entry + 2));
if (c == 0) return (entry[0] << 8) + entry[1];
if (c > 0) bot = mid + 1; else top = mid;
}
return PCRE_ERROR_NOSUBSTRING;
}
/*************************************************
* Find (multiple) entries for named string *
*************************************************/
/* This is used by the get_first_set() function below, as well as being
generally available. It is used when duplicated names are permitted.
Arguments:
code the compiled regex
stringname the name whose entries required
firstptr where to put the pointer to the first entry
lastptr where to put the pointer to the last entry
Returns: the length of each entry, or a negative number
(PCRE_ERROR_NOSUBSTRING) if not found
*/
int
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
char **firstptr, char **lastptr)
{
int rc;
int entrysize;
int top, bot;
uschar *nametable, *lastentry;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
return rc;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
return rc;
lastentry = nametable + entrysize * (top - 1);
bot = 0;
while (top > bot)
{
int mid = (top + bot) / 2;
uschar *entry = nametable + entrysize*mid;
int c = strcmp(stringname, (char *)(entry + 2));
if (c == 0)
{
uschar *first = entry;
uschar *last = entry;
while (first > nametable)
{
if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
first -= entrysize;
}
while (last < lastentry)
{
if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
last += entrysize;
}
*firstptr = (char *)first;
*lastptr = (char *)last;
return entrysize;
}
if (c > 0) bot = mid + 1; else top = mid;
}
return PCRE_ERROR_NOSUBSTRING;
}
/*************************************************
* Find first set of multiple named strings *
*************************************************/
/* This function allows for duplicate names in the table of named substrings.
It returns the number of the first one that was set in a pattern match.
Arguments:
code the compiled regex
stringname the name of the capturing substring
ovector the vector of matched substrings
Returns: the number of the first that is set,
or the number of the last one if none are set,
or a negative number on error
*/
static int
get_first_set(const pcre *code, const char *stringname, int *ovector)
{
const real_pcre *re = (const real_pcre *)code;
int entrysize;
char *first, *last;
uschar *entry;
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
return pcre_get_stringnumber(code, stringname);
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
if (entrysize <= 0) return entrysize;
for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
{
int n = (entry[0] << 8) + entry[1];
if (ovector[n*2] >= 0) return n;
}
return (first[0] << 8) + first[1];
}
/*************************************************
* Copy captured string to given buffer *
*************************************************/
/* This function copies a single captured substring into a given buffer.
Note that we use memcpy() rather than strncpy() in case there are binary zeros
in the string.
Arguments:
subject the subject string that was matched
ovector pointer to the offsets table
stringcount the number of substrings that were captured
(i.e. the yield of the pcre_exec call, unless
that was zero, in which case it should be 1/3
of the offset table size)
stringnumber the number of the required substring
buffer where to put the substring
size the size of the buffer
Returns: if successful:
the length of the copied string, not including the zero
that is put on the end; can be zero
if not successful:
PCRE_ERROR_NOMEMORY (-6) buffer too small
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
int
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, char *buffer, int size)
{
int yield;
if (stringnumber < 0 || stringnumber >= stringcount)
return PCRE_ERROR_NOSUBSTRING;
stringnumber *= 2;
yield = ovector[stringnumber+1] - ovector[stringnumber];
if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
memcpy(buffer, subject + ovector[stringnumber], yield);
buffer[yield] = 0;
return yield;
}
/*************************************************
* Copy named captured string to given buffer *
*************************************************/
/* This function copies a single captured substring into a given buffer,
identifying it by name. If the regex permits duplicate names, the first
substring that is set is chosen.
Arguments:
code the compiled regex
subject the subject string that was matched
ovector pointer to the offsets table
stringcount the number of substrings that were captured
(i.e. the yield of the pcre_exec call, unless
that was zero, in which case it should be 1/3
of the offset table size)
stringname the name of the required substring
buffer where to put the substring
size the size of the buffer
Returns: if successful:
the length of the copied string, not including the zero
that is put on the end; can be zero
if not successful:
PCRE_ERROR_NOMEMORY (-6) buffer too small
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
int
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, char *buffer, int size)
{
int n = get_first_set(code, stringname, ovector);
if (n <= 0) return n;
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
}
/*************************************************
* Copy all captured strings to new store *
*************************************************/
/* This function gets one chunk of store and builds a list of pointers and all
of the captured substrings in it. A NULL pointer is put on the end of the list.
Arguments:
subject the subject string that was matched
ovector pointer to the offsets table
stringcount the number of substrings that were captured
(i.e. the yield of the pcre_exec call, unless
that was zero, in which case it should be 1/3
of the offset table size)
listptr set to point to the list of pointers
Returns: if successful: 0
if not successful:
PCRE_ERROR_NOMEMORY (-6) failed to get store
*/
int
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
const char ***listptr)
{
int i;
int size = sizeof(char *);
int double_count = stringcount * 2;
char **stringlist;
char *p;
for (i = 0; i < double_count; i += 2)
size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
stringlist = (char **)(pcre_malloc)(size);
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
*listptr = (const char **)stringlist;
p = (char *)(stringlist + stringcount + 1);
for (i = 0; i < double_count; i += 2)
{
int len = ovector[i+1] - ovector[i];
memcpy(p, subject + ovector[i], len);
*stringlist++ = p;
p += len;
*p++ = 0;
}
*stringlist = NULL;
return 0;
}
/*************************************************
* Free store obtained by get_substring_list *
*************************************************/
/* This function exists for the benefit of people calling PCRE from non-C
programs that can call its functions, but not free() or (pcre_free)() directly.
Argument: the result of a previous pcre_get_substring_list()
Returns: nothing
*/
void
pcre_free_substring_list(const char **pointer)
{
(pcre_free)((void *)pointer);
}
/*************************************************
* Copy captured string to new store *
*************************************************/
/* This function copies a single captured substring into a piece of new
store
Arguments:
subject the subject string that was matched
ovector pointer to the offsets table
stringcount the number of substrings that were captured
(i.e. the yield of the pcre_exec call, unless
that was zero, in which case it should be 1/3
of the offset table size)
stringnumber the number of the required substring
stringptr where to put a pointer to the substring
Returns: if successful:
the length of the string, not including the zero that
is put on the end; can be zero
if not successful:
PCRE_ERROR_NOMEMORY (-6) failed to get store
PCRE_ERROR_NOSUBSTRING (-7) substring not present
*/
int
pcre_get_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, const char **stringptr)
{
int yield;
char *substring;
if (stringnumber < 0 || stringnumber >= stringcount)
return PCRE_ERROR_NOSUBSTRING;
stringnumber *= 2;
yield = ovector[stringnumber+1] - ovector[stringnumber];
substring = (char *)(pcre_malloc)(yield + 1);
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
memcpy(substring, subject + ovector[stringnumber], yield);
substring[yield] = 0;
*stringptr = substring;
return yield;
}
/*************************************************
* Copy named captured string to new store *
*************************************************/
/* This function copies a single captured substring, identified by name, into
new store. If the regex permits duplicate names, the first substring that is
set is chosen.
Arguments:
code the compiled regex
subject the subject string that was matched
ovector pointer to the offsets table
stringcount the number of substrings that were captured
(i.e. the yield of the pcre_exec call, unless
that was zero, in which case it should be 1/3
of the offset table size)
stringname the name of the required substring
stringptr where to put the pointer
Returns: if successful:
the length of the copied string, not including the zero
that is put on the end; can be zero
if not successful:
PCRE_ERROR_NOMEMORY (-6) couldn't get memory
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
int
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, const char **stringptr)
{
int n = get_first_set(code, stringname, ovector);
if (n <= 0) return n;
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
}
/*************************************************
* Free store obtained by get_substring *
*************************************************/
/* This function exists for the benefit of people calling PCRE from non-C
programs that can call its functions, but not free() or (pcre_free)() directly.
Argument: the result of a previous pcre_get_substring()
Returns: nothing
*/
void
pcre_free_substring(const char *pointer)
{
(pcre_free)((void *)pointer);
}
/* End of pcre_get.c */

View file

@ -0,0 +1,63 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains global variables that are exported by the PCRE library.
PCRE is thread-clean and doesn't use any global variables in the normal sense.
However, it calls memory allocation and freeing functions via the four
indirections below, and it can optionally do callouts, using the fifth
indirection. These values can be changed by the caller, but are shared between
all threads. However, when compiling for Virtual Pascal, things are done
differently, and global variables are not used (see pcre.in). */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
#ifndef VPCOMPAT
PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = malloc;
PCRE_EXP_DATA_DEFN void (*pcre_free)(void *) = free;
PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = malloc;
PCRE_EXP_DATA_DEFN void (*pcre_stack_free)(void *) = free;
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
#endif
/* End of pcre_globals.c */

View file

@ -0,0 +1,93 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_info(), which gives some
information about a compiled pattern. However, use of this function is now
deprecated, as it has been superseded by pcre_fullinfo(). */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* (Obsolete) Return info about compiled pattern *
*************************************************/
/* This is the original "info" function. It picks potentially useful data out
of the private structure, but its interface was too rigid. It remains for
backwards compatibility. The public options are passed back in an int - though
the re->options field has been expanded to a long int, all the public options
at the low end of it, and so even on 16-bit systems this will still be OK.
Therefore, I haven't changed the API for pcre_info().
Arguments:
argument_re points to compiled code
optptr where to pass back the options
first_byte where to pass back the first character,
or -1 if multiline and all branches start ^,
or -2 otherwise
Returns: number of capturing subpatterns
or negative values on error
*/
PCRE_EXP_DEFN int
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
{
real_pcre internal_re;
const real_pcre *re = (const real_pcre *)argument_re;
if (re == NULL) return PCRE_ERROR_NULL;
if (re->magic_number != MAGIC_NUMBER)
{
re = _pcre_try_flipped(re, &internal_re, NULL, NULL);
if (re == NULL) return PCRE_ERROR_BADMAGIC;
}
if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
if (first_byte != NULL)
*first_byte = ((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
return re->top_bracket;
}
/* End of pcre_info.c */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,143 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_maketables(), which builds
character tables for PCRE in the current locale. The file is compiled on its
own as part of the PCRE library. However, it is also included in the
compilation of dftables.c, in which case the macro DFTABLES is defined. */
#ifndef DFTABLES
# ifdef HAVE_CONFIG_H
# include "config.h"
# endif
# include "pcre_internal.h"
#endif
/*************************************************
* Create PCRE character tables *
*************************************************/
/* This function builds a set of character tables for use by PCRE and returns
a pointer to them. They are build using the ctype functions, and consequently
their contents will depend upon the current locale setting. When compiled as
part of the library, the store is obtained via pcre_malloc(), but when compiled
inside dftables, use malloc().
Arguments: none
Returns: pointer to the contiguous block of data
*/
const unsigned char *
pcre_maketables(void)
{
unsigned char *yield, *p;
int i;
#ifndef DFTABLES
yield = (unsigned char*)(pcre_malloc)(tables_length);
#else
yield = (unsigned char*)malloc(tables_length);
#endif
if (yield == NULL) return NULL;
p = yield;
/* First comes the lower casing table */
for (i = 0; i < 256; i++) *p++ = tolower(i);
/* Next the case-flipping table */
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
/* Then the character class tables. Don't try to be clever and save effort on
exclusive ones - in some locales things may be different. Note that the table
for "space" includes everything "isspace" gives, including VT in the default
locale. This makes it work for the POSIX class [:space:]. Note also that it is
possible for a character to be alnum or alpha without being lower or upper,
such as "male and female ordinals" (\xAA and \xBA) in the fr_FR locale (at
least under Debian Linux's locales as of 12/2005). So we must test for alnum
specially. */
memset(p, 0, cbit_length);
for (i = 0; i < 256; i++)
{
if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7);
if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7);
if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7);
if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7);
if (i == '_') p[cbit_word + i/8] |= 1 << (i&7);
if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7);
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7);
if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7);
if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7);
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7);
}
p += cbit_length;
/* Finally, the character type table. In this, we exclude VT from the white
space chars, because Perl doesn't recognize it as such for \s and for comments
within regexes. */
for (i = 0; i < 256; i++)
{
int x = 0;
if (i != 0x0b && isspace(i)) x += ctype_space;
if (isalpha(i)) x += ctype_letter;
if (isdigit(i)) x += ctype_digit;
if (isxdigit(i)) x += ctype_xdigit;
if (isalnum(i) || i == '_') x += ctype_word;
/* Note: strchr includes the terminating zero in the characters it considers.
In this instance, that is ok because we want binary zero to be flagged as a
meta-character, which in this sense is any character that terminates a run
of data characters. */
if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
*p++ = x;
}
return yield;
}
/* End of pcre_maketables.c */

View file

@ -0,0 +1,164 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains internal functions for testing newlines when more than
one kind of newline is to be recognized. When a newline is found, its length is
returned. In principle, we could implement several newline "types", each
referring to a different set of newline characters. At present, PCRE supports
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
http://unicode.org/unicode/reports/tr18/. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Check for newline at given position *
*************************************************/
/* It is guaranteed that the initial value of ptr is less than the end of the
string that is being processed.
Arguments:
ptr pointer to possible newline
type the newline type
endptr pointer to the end of the string
lenptr where to return the length
utf8 TRUE if in utf8 mode
Returns: TRUE or FALSE
*/
BOOL
_pcre_is_newline(const uschar *ptr, int type, const uschar *endptr,
int *lenptr, BOOL utf8)
{
int c;
if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
if (type == NLTYPE_ANYCRLF) switch(c)
{
case 0x000a: *lenptr = 1; return TRUE; /* LF */
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
return TRUE; /* CR */
default: return FALSE;
}
/* NLTYPE_ANY */
else switch(c)
{
case 0x000a: /* LF */
case 0x000b: /* VT */
case 0x000c: *lenptr = 1; return TRUE; /* FF */
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
return TRUE; /* CR */
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
default: return FALSE;
}
}
/*************************************************
* Check for newline at previous position *
*************************************************/
/* It is guaranteed that the initial value of ptr is greater than the start of
the string that is being processed.
Arguments:
ptr pointer to possible newline
type the newline type
startptr pointer to the start of the string
lenptr where to return the length
utf8 TRUE if in utf8 mode
Returns: TRUE or FALSE
*/
BOOL
_pcre_was_newline(const uschar *ptr, int type, const uschar *startptr,
int *lenptr, BOOL utf8)
{
int c;
ptr--;
#ifdef SUPPORT_UTF8
if (utf8)
{
BACKCHAR(ptr);
GETCHAR(c, ptr);
}
else c = *ptr;
#else /* no UTF-8 support */
c = *ptr;
#endif /* SUPPORT_UTF8 */
if (type == NLTYPE_ANYCRLF) switch(c)
{
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
return TRUE; /* LF */
case 0x000d: *lenptr = 1; return TRUE; /* CR */
default: return FALSE;
}
else switch(c)
{
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
return TRUE; /* LF */
case 0x000b: /* VT */
case 0x000c: /* FF */
case 0x000d: *lenptr = 1; return TRUE; /* CR */
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
default: return FALSE;
}
}
/* End of pcre_newline.c */

View file

@ -0,0 +1,85 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This file contains a private PCRE function that converts an ordinal
character value into a UTF8 string. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Convert character value to UTF-8 *
*************************************************/
/* This function takes an integer value in the range 0 - 0x7fffffff
and encodes it as a UTF-8 character in 0 to 6 bytes.
Arguments:
cvalue the character value
buffer pointer to buffer for result - at least 6 bytes long
Returns: number of characters placed in the buffer
*/
int
_pcre_ord2utf8(int cvalue, uschar *buffer)
{
#ifdef SUPPORT_UTF8
register int i, j;
for (i = 0; i < _pcre_utf8_table1_size; i++)
if (cvalue <= _pcre_utf8_table1[i]) break;
buffer += i;
for (j = i; j > 0; j--)
{
*buffer-- = 0x80 | (cvalue & 0x3f);
cvalue >>= 6;
}
*buffer = _pcre_utf8_table2[i] | cvalue;
return i + 1;
#else
return 0; /* Keep compiler happy; this function won't ever be */
#endif /* called when SUPPORT_UTF8 is not defined. */
}
/* End of pcre_ord2utf8.c */

View file

@ -0,0 +1,512 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains a PCRE private debugging function for printing out the
internal form of a compiled regular expression, along with some supporting
local functions. This source file is used in two places:
(1) It is #included by pcre_compile.c when it is compiled in debugging mode
(DEBUG defined in pcre_internal.h). It is not included in production compiles.
(2) It is always #included by pcretest.c, which can be asked to print out a
compiled regex for debugging purposes. */
/* Macro that decides whether a character should be output as a literal or in
hexadecimal. We don't use isprint() because that can vary from system to system
(even without the use of locales) and we want the output always to be the same,
for testing purposes. This macro is used in pcretest as well as in this file. */
#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
/* The table of operator names. */
static const char *OP_names[] = { OP_NAME_LIST };
/*************************************************
* Print single- or multi-byte character *
*************************************************/
static int
print_char(FILE *f, uschar *ptr, BOOL utf8)
{
int c = *ptr;
#ifndef SUPPORT_UTF8
utf8 = utf8; /* Avoid compiler warning */
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
return 0;
#else
if (!utf8 || (c & 0xc0) != 0xc0)
{
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
return 0;
}
else
{
int i;
int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
int s = 6*a;
c = (c & _pcre_utf8_table3[a]) << s;
for (i = 1; i <= a; i++)
{
/* This is a check for malformed UTF-8; it should only occur if the sanity
check has been turned off. Rather than swallow random bytes, just stop if
we hit a bad one. Print it with \X instead of \x as an indication. */
if ((ptr[i] & 0xc0) != 0x80)
{
fprintf(f, "\\X{%x}", c);
return i - 1;
}
/* The byte is OK */
s -= 6;
c |= (ptr[i] & 0x3f) << s;
}
if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
return a;
}
#endif
}
/*************************************************
* Find Unicode property name *
*************************************************/
static const char *
get_ucpname(int ptype, int pvalue)
{
#ifdef SUPPORT_UCP
int i;
for (i = _pcre_utt_size - 1; i >= 0; i--)
{
if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
}
return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
#else
/* It gets harder and harder to shut off unwanted compiler warnings. */
ptype = ptype * pvalue;
return (ptype == pvalue)? "??" : "??";
#endif
}
/*************************************************
* Print compiled regex *
*************************************************/
/* Make this function work for a regex with integers either byte order.
However, we assume that what we are passed is a compiled regex. The
print_lengths flag controls whether offsets and lengths of items are printed.
They can be turned off from pcretest so that automatic tests on bytecode can be
written that do not depend on the value of LINK_SIZE. */
static void
pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
{
real_pcre *re = (real_pcre *)external_re;
uschar *codestart, *code;
BOOL utf8;
unsigned int options = re->options;
int offset = re->name_table_offset;
int count = re->name_count;
int size = re->name_entry_size;
if (re->magic_number != MAGIC_NUMBER)
{
offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
options = ((options << 24) & 0xff000000) |
((options << 8) & 0x00ff0000) |
((options >> 8) & 0x0000ff00) |
((options >> 24) & 0x000000ff);
}
code = codestart = (uschar *)re + offset + count * size;
utf8 = (options & PCRE_UTF8) != 0;
for(;;)
{
uschar *ccode;
int c;
int extra = 0;
if (print_lengths)
fprintf(f, "%3d ", (int)(code - codestart));
else
fprintf(f, " ");
switch(*code)
{
case OP_END:
fprintf(f, " %s\n", OP_names[*code]);
fprintf(f, "------------------------------------------------------------------\n");
return;
case OP_OPT:
fprintf(f, " %.2x %s", code[1], OP_names[*code]);
break;
case OP_CHAR:
fprintf(f, " ");
do
{
code++;
code += 1 + print_char(f, code, utf8);
}
while (*code == OP_CHAR);
fprintf(f, "\n");
continue;
case OP_CHARNC:
fprintf(f, " NC ");
do
{
code++;
code += 1 + print_char(f, code, utf8);
}
while (*code == OP_CHARNC);
fprintf(f, "\n");
continue;
case OP_CBRA:
case OP_SCBRA:
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
else fprintf(f, " ");
fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
break;
case OP_BRA:
case OP_SBRA:
case OP_KETRMAX:
case OP_KETRMIN:
case OP_ALT:
case OP_KET:
case OP_ASSERT:
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
case OP_COND:
case OP_SCOND:
case OP_REVERSE:
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
else fprintf(f, " ");
fprintf(f, "%s", OP_names[*code]);
break;
case OP_CREF:
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
break;
case OP_RREF:
c = GET2(code, 1);
if (c == RREF_ANY)
fprintf(f, " Cond recurse any");
else
fprintf(f, " Cond recurse %d", c);
break;
case OP_DEF:
fprintf(f, " Cond def");
break;
case OP_STAR:
case OP_MINSTAR:
case OP_POSSTAR:
case OP_PLUS:
case OP_MINPLUS:
case OP_POSPLUS:
case OP_QUERY:
case OP_MINQUERY:
case OP_POSQUERY:
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEPOSSTAR:
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
case OP_TYPEPOSPLUS:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
case OP_TYPEPOSQUERY:
fprintf(f, " ");
if (*code >= OP_TYPESTAR)
{
fprintf(f, "%s", OP_names[code[1]]);
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
{
fprintf(f, " %s ", get_ucpname(code[2], code[3]));
extra = 2;
}
}
else extra = print_char(f, code+1, utf8);
fprintf(f, "%s", OP_names[*code]);
break;
case OP_EXACT:
case OP_UPTO:
case OP_MINUPTO:
case OP_POSUPTO:
fprintf(f, " ");
extra = print_char(f, code+3, utf8);
fprintf(f, "{");
if (*code != OP_EXACT) fprintf(f, "0,");
fprintf(f, "%d}", GET2(code,1));
if (*code == OP_MINUPTO) fprintf(f, "?");
else if (*code == OP_POSUPTO) fprintf(f, "+");
break;
case OP_TYPEEXACT:
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEPOSUPTO:
fprintf(f, " %s", OP_names[code[3]]);
if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
{
fprintf(f, " %s ", get_ucpname(code[4], code[5]));
extra = 2;
}
fprintf(f, "{");
if (*code != OP_TYPEEXACT) fprintf(f, "0,");
fprintf(f, "%d}", GET2(code,1));
if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
break;
case OP_NOT:
c = code[1];
if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
else fprintf(f, " [^\\x%02x]", c);
break;
case OP_NOTSTAR:
case OP_NOTMINSTAR:
case OP_NOTPOSSTAR:
case OP_NOTPLUS:
case OP_NOTMINPLUS:
case OP_NOTPOSPLUS:
case OP_NOTQUERY:
case OP_NOTMINQUERY:
case OP_NOTPOSQUERY:
c = code[1];
if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
else fprintf(f, " [^\\x%02x]", c);
fprintf(f, "%s", OP_names[*code]);
break;
case OP_NOTEXACT:
case OP_NOTUPTO:
case OP_NOTMINUPTO:
case OP_NOTPOSUPTO:
c = code[3];
if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
else fprintf(f, " [^\\x%02x]{", c);
if (*code != OP_NOTEXACT) fprintf(f, "0,");
fprintf(f, "%d}", GET2(code,1));
if (*code == OP_NOTMINUPTO) fprintf(f, "?");
else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
break;
case OP_RECURSE:
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
else fprintf(f, " ");
fprintf(f, "%s", OP_names[*code]);
break;
case OP_REF:
fprintf(f, " \\%d", GET2(code,1));
ccode = code + _pcre_OP_lengths[*code];
goto CLASS_REF_REPEAT;
case OP_CALLOUT:
fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
GET(code, 2 + LINK_SIZE));
break;
case OP_PROP:
case OP_NOTPROP:
fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
break;
/* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
having this code always here, and it makes it less messy without all those
#ifdefs. */
case OP_CLASS:
case OP_NCLASS:
case OP_XCLASS:
{
int i, min, max;
BOOL printmap;
fprintf(f, " [");
if (*code == OP_XCLASS)
{
extra = GET(code, 1);
ccode = code + LINK_SIZE + 1;
printmap = (*ccode & XCL_MAP) != 0;
if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
}
else
{
printmap = TRUE;
ccode = code + 1;
}
/* Print a bit map */
if (printmap)
{
for (i = 0; i < 256; i++)
{
if ((ccode[i/8] & (1 << (i&7))) != 0)
{
int j;
for (j = i+1; j < 256; j++)
if ((ccode[j/8] & (1 << (j&7))) == 0) break;
if (i == '-' || i == ']') fprintf(f, "\\");
if (PRINTABLE(i)) fprintf(f, "%c", i);
else fprintf(f, "\\x%02x", i);
if (--j > i)
{
if (j != i + 1) fprintf(f, "-");
if (j == '-' || j == ']') fprintf(f, "\\");
if (PRINTABLE(j)) fprintf(f, "%c", j);
else fprintf(f, "\\x%02x", j);
}
i = j;
}
}
ccode += 32;
}
/* For an XCLASS there is always some additional data */
if (*code == OP_XCLASS)
{
int ch;
while ((ch = *ccode++) != XCL_END)
{
if (ch == XCL_PROP)
{
int ptype = *ccode++;
int pvalue = *ccode++;
fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
}
else if (ch == XCL_NOTPROP)
{
int ptype = *ccode++;
int pvalue = *ccode++;
fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
}
else
{
ccode += 1 + print_char(f, ccode, TRUE);
if (ch == XCL_RANGE)
{
fprintf(f, "-");
ccode += 1 + print_char(f, ccode, TRUE);
}
}
}
}
/* Indicate a non-UTF8 class which was created by negation */
fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
/* Handle repeats after a class or a back reference */
CLASS_REF_REPEAT:
switch(*ccode)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRPLUS:
case OP_CRMINPLUS:
case OP_CRQUERY:
case OP_CRMINQUERY:
fprintf(f, "%s", OP_names[*ccode]);
extra += _pcre_OP_lengths[*ccode];
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
min = GET2(ccode,1);
max = GET2(ccode,3);
if (max == 0) fprintf(f, "{%d,}", min);
else fprintf(f, "{%d,%d}", min, max);
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
extra += _pcre_OP_lengths[*ccode];
break;
/* Do nothing if it's not a repeat; this code stops picky compilers
warning about the lack of a default code path. */
default:
break;
}
}
break;
/* Anything else is just an item with no data*/
default:
fprintf(f, " %s", OP_names[*code]);
break;
}
code += _pcre_OP_lengths[*code] + extra;
fprintf(f, "\n");
}
}
/* End of pcre_printint.src */

View file

@ -0,0 +1,82 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_refcount(), which is an
auxiliary function that can be used to maintain a reference count in a compiled
pattern data block. This might be helpful in applications where the block is
shared by different users. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Maintain reference count *
*************************************************/
/* The reference count is a 16-bit field, initialized to zero. It is not
possible to transfer a non-zero count from one host to a different host that
has a different byte order - though I can't see why anyone in their right mind
would ever want to do that!
Arguments:
argument_re points to compiled code
adjust value to add to the count
Returns: the (possibly updated) count value (a non-negative number), or
a negative error number
*/
PCRE_EXP_DEFN int
pcre_refcount(pcre *argument_re, int adjust)
{
real_pcre *re = (real_pcre *)argument_re;
if (re == NULL) return PCRE_ERROR_NULL;
re->ref_count = (-adjust > re->ref_count)? 0 :
(adjust + re->ref_count > 65535)? 65535 :
re->ref_count + adjust;
return re->ref_count;
}
/* End of pcre_refcount.c */

View file

@ -0,0 +1,199 @@
// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: Sanjay Ghemawat
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <vector>
#include <assert.h>
#include "pcrecpp_internal.h"
#include "pcre_scanner.h"
using std::vector;
namespace pcrecpp {
Scanner::Scanner()
: data_(),
input_(data_),
skip_(NULL),
should_skip_(false),
skip_repeat_(false),
save_comments_(false),
comments_(NULL),
comments_offset_(0) {
}
Scanner::Scanner(const string& in)
: data_(in),
input_(data_),
skip_(NULL),
should_skip_(false),
skip_repeat_(false),
save_comments_(false),
comments_(NULL),
comments_offset_(0) {
}
Scanner::~Scanner() {
delete skip_;
delete comments_;
}
void Scanner::SetSkipExpression(const char* re) {
delete skip_;
if (re != NULL) {
skip_ = new RE(re);
should_skip_ = true;
skip_repeat_ = true;
ConsumeSkip();
} else {
skip_ = NULL;
should_skip_ = false;
skip_repeat_ = false;
}
}
void Scanner::Skip(const char* re) {
delete skip_;
if (re != NULL) {
skip_ = new RE(re);
should_skip_ = true;
skip_repeat_ = false;
ConsumeSkip();
} else {
skip_ = NULL;
should_skip_ = false;
skip_repeat_ = false;
}
}
void Scanner::DisableSkip() {
assert(skip_ != NULL);
should_skip_ = false;
}
void Scanner::EnableSkip() {
assert(skip_ != NULL);
should_skip_ = true;
ConsumeSkip();
}
int Scanner::LineNumber() const {
// TODO: Make it more efficient by keeping track of the last point
// where we computed line numbers and counting newlines since then.
// We could use std:count, but not all systems have it. :-(
int count = 1;
for (const char* p = data_.data(); p < input_.data(); ++p)
if (*p == '\n')
++count;
return count;
}
int Scanner::Offset() const {
return input_.data() - data_.c_str();
}
bool Scanner::LookingAt(const RE& re) const {
int consumed;
return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0);
}
bool Scanner::Consume(const RE& re,
const Arg& arg0,
const Arg& arg1,
const Arg& arg2) {
const bool result = re.Consume(&input_, arg0, arg1, arg2);
if (result && should_skip_) ConsumeSkip();
return result;
}
// helper function to consume *skip_ and honour save_comments_
void Scanner::ConsumeSkip() {
const char* start_data = input_.data();
while (skip_->Consume(&input_)) {
if (!skip_repeat_) {
// Only one skip allowed.
break;
}
}
if (save_comments_) {
if (comments_ == NULL) {
comments_ = new vector<StringPiece>;
}
// already pointing one past end, so no need to +1
int length = input_.data() - start_data;
if (length > 0) {
comments_->push_back(StringPiece(start_data, length));
}
}
}
void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) {
// short circuit out if we've not yet initialized comments_
// (e.g., when save_comments is false)
if (!comments_) {
return;
}
// TODO: if we guarantee that comments_ will contain StringPieces
// that are ordered by their start, then we can do a binary search
// for the first StringPiece at or past start and then scan for the
// ones contained in the range, quit early (use equal_range or
// lower_bound)
for (vector<StringPiece>::const_iterator it = comments_->begin();
it != comments_->end(); ++it) {
if ((it->data() >= data_.c_str() + start &&
it->data() + it->size() <= data_.c_str() + end)) {
ranges->push_back(*it);
}
}
}
void Scanner::GetNextComments(vector<StringPiece> *ranges) {
// short circuit out if we've not yet initialized comments_
// (e.g., when save_comments is false)
if (!comments_) {
return;
}
for (vector<StringPiece>::const_iterator it =
comments_->begin() + comments_offset_;
it != comments_->end(); ++it) {
ranges->push_back(*it);
++comments_offset_;
}
}
} // namespace pcrecpp

View file

@ -0,0 +1,172 @@
// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: Sanjay Ghemawat
//
// Regular-expression based scanner for parsing an input stream.
//
// Example 1: parse a sequence of "var = number" entries from input:
//
// Scanner scanner(input);
// string var;
// int number;
// scanner.SetSkipExpression("\\s+"); // Skip any white space we encounter
// while (scanner.Consume("(\\w+) = (\\d+)", &var, &number)) {
// ...;
// }
#ifndef _PCRE_SCANNER_H
#define _PCRE_SCANNER_H
#include <assert.h>
#include <string>
#include <vector>
#include <pcrecpp.h>
#include <pcre_stringpiece.h>
namespace pcrecpp {
class PCRECPP_EXP_DEFN Scanner {
public:
Scanner();
explicit Scanner(const std::string& input);
~Scanner();
// Return current line number. The returned line-number is
// one-based. I.e. it returns 1 + the number of consumed newlines.
//
// Note: this method may be slow. It may take time proportional to
// the size of the input.
int LineNumber() const;
// Return the byte-offset that the scanner is looking in the
// input data;
int Offset() const;
// Return true iff the start of the remaining input matches "re"
bool LookingAt(const RE& re) const;
// Return true iff all of the following are true
// a. the start of the remaining input matches "re",
// b. if any arguments are supplied, matched sub-patterns can be
// parsed and stored into the arguments.
// If it returns true, it skips over the matched input and any
// following input that matches the "skip" regular expression.
bool Consume(const RE& re,
const Arg& arg0 = RE::no_arg,
const Arg& arg1 = RE::no_arg,
const Arg& arg2 = RE::no_arg
// TODO: Allow more arguments?
);
// Set the "skip" regular expression. If after consuming some data,
// a prefix of the input matches this RE, it is automatically
// skipped. For example, a programming language scanner would use
// a skip RE that matches white space and comments.
//
// scanner.SetSkipExpression("\\s+|//.*|/[*](.|\n)*?[*]/");
//
// Skipping repeats as long as it succeeds. We used to let people do
// this by writing "(...)*" in the regular expression, but that added
// up to lots of recursive calls within the pcre library, so now we
// control repetition explicitly via the function call API.
//
// You can pass NULL for "re" if you do not want any data to be skipped.
void Skip(const char* re); // DEPRECATED; does *not* repeat
void SetSkipExpression(const char* re);
// Temporarily pause "skip"ing. This
// Skip("Foo"); code ; DisableSkip(); code; EnableSkip()
// is similar to
// Skip("Foo"); code ; Skip(NULL); code ; Skip("Foo");
// but avoids creating/deleting new RE objects.
void DisableSkip();
// Reenable previously paused skipping. Any prefix of the input
// that matches the skip pattern is immediately dropped.
void EnableSkip();
/***** Special wrappers around SetSkip() for some common idioms *****/
// Arranges to skip whitespace, C comments, C++ comments.
// The overall RE is a disjunction of the following REs:
// \\s whitespace
// //.*\n C++ comment
// /[*](.|\n)*?[*]/ C comment (x*? means minimal repetitions of x)
// We get repetition via the semantics of SetSkipExpression, not by using *
void SkipCXXComments() {
SetSkipExpression("\\s|//.*\n|/[*](?:\n|.)*?[*]/");
}
void set_save_comments(bool comments) {
save_comments_ = comments;
}
bool save_comments() {
return save_comments_;
}
// Append to vector ranges the comments found in the
// byte range [start,end] (inclusive) of the input data.
// Only comments that were extracted entirely within that
// range are returned: no range splitting of atomically-extracted
// comments is performed.
void GetComments(int start, int end, std::vector<StringPiece> *ranges);
// Append to vector ranges the comments added
// since the last time this was called. This
// functionality is provided for efficiency when
// interleaving scanning with parsing.
void GetNextComments(std::vector<StringPiece> *ranges);
private:
std::string data_; // All the input data
StringPiece input_; // Unprocessed input
RE* skip_; // If non-NULL, RE for skipping input
bool should_skip_; // If true, use skip_
bool skip_repeat_; // If true, repeat skip_ as long as it works
bool save_comments_; // If true, aggregate the skip expression
// the skipped comments
// TODO: later consider requiring that the StringPieces be added
// in order by their start position
std::vector<StringPiece> *comments_;
// the offset into comments_ that has been returned by GetNextComments
int comments_offset_;
// helper function to consume *skip_ and honour
// save_comments_
void ConsumeSkip();
};
} // namespace pcrecpp
#endif /* _PCRE_SCANNER_H */

View file

@ -0,0 +1,43 @@
// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wilsonh@google.com (Wilson Hsieh)
//
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <iostream>
#include "pcrecpp_internal.h"
#include "pcre_stringpiece.h"
std::ostream& operator<<(std::ostream& o, const pcrecpp::StringPiece& piece) {
return (o << piece.as_string());
}

View file

@ -0,0 +1,180 @@
// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: Sanjay Ghemawat
//
// A string like object that points into another piece of memory.
// Useful for providing an interface that allows clients to easily
// pass in either a "const char*" or a "string".
//
// Arghh! I wish C++ literals were automatically of type "string".
#ifndef _PCRE_STRINGPIECE_H
#define _PCRE_STRINGPIECE_H
#include <string.h>
#include <string>
#include <iosfwd> // for ostream forward-declaration
// Don't use bits/type_traits.h on Linux - Andrew Galante, GG 8/2/2009
#if !defined(_MSC_VER) && !defined(__CELLOS_LV2__) && !defined(__APPLE__) && !defined(__linux__)
#ifdef __MINGW32__
#define HAVE_TYPE_TRAITS
#include <bits/type_traits.h>
#else
#define HAVE_TYPE_TRAITS
#include <type_traits.h>
#endif
#endif
#include <pcre.h>
using std::string;
namespace pcrecpp {
class PCRECPP_EXP_DEFN StringPiece {
private:
const char* ptr_;
int length_;
public:
// We provide non-explicit singleton constructors so users can pass
// in a "const char*" or a "string" wherever a "StringPiece" is
// expected.
StringPiece()
: ptr_(NULL), length_(0) { }
StringPiece(const char* str)
: ptr_(str), length_(static_cast<int>(strlen(ptr_))) { }
StringPiece(const unsigned char* str)
: ptr_(reinterpret_cast<const char*>(str)),
length_(static_cast<int>(strlen(ptr_))) { }
StringPiece(const string& str)
: ptr_(str.data()), length_(static_cast<int>(str.size())) { }
StringPiece(const char* offset, int len)
: ptr_(offset), length_(len) { }
// data() may return a pointer to a buffer with embedded NULs, and the
// returned buffer may or may not be null terminated. Therefore it is
// typically a mistake to pass data() to a routine that expects a NUL
// terminated string. Use "as_string().c_str()" if you really need to do
// this. Or better yet, change your routine so it does not rely on NUL
// termination.
const char* data() const { return ptr_; }
int size() const { return length_; }
bool empty() const { return length_ == 0; }
void clear() { ptr_ = NULL; length_ = 0; }
void set(const char* buffer, int len) { ptr_ = buffer; length_ = len; }
void set(const char* str) {
ptr_ = str;
length_ = static_cast<int>(strlen(str));
}
void set(const void* buffer, int len) {
ptr_ = reinterpret_cast<const char*>(buffer);
length_ = len;
}
char operator[](int i) const { return ptr_[i]; }
void remove_prefix(int n) {
ptr_ += n;
length_ -= n;
}
void remove_suffix(int n) {
length_ -= n;
}
bool operator==(const StringPiece& x) const {
return ((length_ == x.length_) &&
(memcmp(ptr_, x.ptr_, length_) == 0));
}
bool operator!=(const StringPiece& x) const {
return !(*this == x);
}
#define STRINGPIECE_BINARY_PREDICATE(cmp,auxcmp) \
bool operator cmp (const StringPiece& x) const { \
int r = memcmp(ptr_, x.ptr_, length_ < x.length_ ? length_ : x.length_); \
return ((r auxcmp 0) || ((r == 0) && (length_ cmp x.length_))); \
}
STRINGPIECE_BINARY_PREDICATE(<, <);
STRINGPIECE_BINARY_PREDICATE(<=, <);
STRINGPIECE_BINARY_PREDICATE(>=, >);
STRINGPIECE_BINARY_PREDICATE(>, >);
#undef STRINGPIECE_BINARY_PREDICATE
int compare(const StringPiece& x) const {
int r = memcmp(ptr_, x.ptr_, length_ < x.length_ ? length_ : x.length_);
if (r == 0) {
if (length_ < x.length_) r = -1;
else if (length_ > x.length_) r = +1;
}
return r;
}
string as_string() const {
return string(data(), size());
}
void CopyToString(string* target) const {
target->assign(ptr_, length_);
}
// Does "this" start with "x"
bool starts_with(const StringPiece& x) const {
return ((length_ >= x.length_) && (memcmp(ptr_, x.ptr_, x.length_) == 0));
}
};
} // namespace pcrecpp
// ------------------------------------------------------------------
// Functions used to create STL containers that use StringPiece
// Remember that a StringPiece's lifetime had better be less than
// that of the underlying string or char*. If it is not, then you
// cannot safely store a StringPiece into an STL container
// ------------------------------------------------------------------
#ifdef HAVE_TYPE_TRAITS
// This makes vector<StringPiece> really fast for some STL implementations
template<> struct __type_traits<pcrecpp::StringPiece> {
typedef __true_type has_trivial_default_constructor;
typedef __true_type has_trivial_copy_constructor;
typedef __true_type has_trivial_assignment_operator;
typedef __true_type has_trivial_destructor;
typedef __true_type is_POD_type;
};
#endif
// allow StringPiece to be logged
std::ostream& operator<<(std::ostream& o, const pcrecpp::StringPiece& piece);
#endif /* _PCRE_STRINGPIECE_H */

View file

@ -0,0 +1,579 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_study(), along with local
supporting functions. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/* Returns from set_start_bits() */
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE };
/*************************************************
* Set a bit and maybe its alternate case *
*************************************************/
/* Given a character, set its bit in the table, and also the bit for the other
version of a letter if we are caseless.
Arguments:
start_bits points to the bit map
c is the character
caseless the caseless flag
cd the block with char table pointers
Returns: nothing
*/
static void
set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd)
{
start_bits[c/8] |= (1 << (c&7));
if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
}
/*************************************************
* Create bitmap of starting bytes *
*************************************************/
/* This function scans a compiled unanchored expression recursively and
attempts to build a bitmap of the set of possible starting bytes. As time goes
by, we may be able to get more clever at doing this. The SSB_CONTINUE return is
useful for parenthesized groups in patterns such as (a*)b where the group
provides some optional starting bytes but scanning must continue at the outer
level to find at least one mandatory byte. At the outermost level, this
function fails unless the result is SSB_DONE.
Arguments:
code points to an expression
start_bits points to a 32-byte table, initialized to 0
caseless the current state of the caseless flag
utf8 TRUE if in UTF-8 mode
cd the block with char table pointers
Returns: SSB_FAIL => Failed to find any starting bytes
SSB_DONE => Found mandatory starting bytes
SSB_CONTINUE => Found optional starting bytes
*/
static int
set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
BOOL utf8, compile_data *cd)
{
register int c;
int yield = SSB_DONE;
#if 0
/* ========================================================================= */
/* The following comment and code was inserted in January 1999. In May 2006,
when it was observed to cause compiler warnings about unused values, I took it
out again. If anybody is still using OS/2, they will have to put it back
manually. */
/* This next statement and the later reference to dummy are here in order to
trick the optimizer of the IBM C compiler for OS/2 into generating correct
code. Apparently IBM isn't going to fix the problem, and we would rather not
disable optimization (in this module it actually makes a big difference, and
the pcre module can use all the optimization it can get). */
volatile int dummy;
/* ========================================================================= */
#endif
do
{
const uschar *tcode = code + (((int)*code == OP_CBRA)? 3:1) + LINK_SIZE;
BOOL try_next = TRUE;
while (try_next) /* Loop for items in this branch */
{
int rc;
switch(*tcode)
{
/* Fail if we reach something we don't understand */
default:
return SSB_FAIL;
/* If we hit a bracket or a positive lookahead assertion, recurse to set
bits from within the subpattern. If it can't find anything, we have to
give up. If it finds some mandatory character(s), we are done for this
branch. Otherwise, carry on scanning after the subpattern. */
case OP_BRA:
case OP_SBRA:
case OP_CBRA:
case OP_SCBRA:
case OP_ONCE:
case OP_ASSERT:
rc = set_start_bits(tcode, start_bits, caseless, utf8, cd);
if (rc == SSB_FAIL) return SSB_FAIL;
if (rc == SSB_DONE) try_next = FALSE; else
{
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
}
break;
/* If we hit ALT or KET, it means we haven't found anything mandatory in
this branch, though we might have found something optional. For ALT, we
continue with the next alternative, but we have to arrange that the final
result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET,
return SSB_CONTINUE: if this is the top level, that indicates failure,
but after a nested subpattern, it causes scanning to continue. */
case OP_ALT:
yield = SSB_CONTINUE;
try_next = FALSE;
break;
case OP_KET:
case OP_KETRMAX:
case OP_KETRMIN:
return SSB_CONTINUE;
/* Skip over callout */
case OP_CALLOUT:
tcode += 2 + 2*LINK_SIZE;
break;
/* Skip over lookbehind and negative lookahead assertions */
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
break;
/* Skip over an option setting, changing the caseless flag */
case OP_OPT:
caseless = (tcode[1] & PCRE_CASELESS) != 0;
tcode += 2;
break;
/* BRAZERO does the bracket, but carries on. */
case OP_BRAZERO:
case OP_BRAMINZERO:
if (set_start_bits(++tcode, start_bits, caseless, utf8, cd) == SSB_FAIL)
return SSB_FAIL;
/* =========================================================================
See the comment at the head of this function concerning the next line,
which was an old fudge for the benefit of OS/2.
dummy = 1;
========================================================================= */
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
break;
/* Single-char * or ? sets the bit and tries the next item */
case OP_STAR:
case OP_MINSTAR:
case OP_POSSTAR:
case OP_QUERY:
case OP_MINQUERY:
case OP_POSQUERY:
set_bit(start_bits, tcode[1], caseless, cd);
tcode += 2;
#ifdef SUPPORT_UTF8
if (utf8 && tcode[-1] >= 0xc0)
tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
#endif
break;
/* Single-char upto sets the bit and tries the next */
case OP_UPTO:
case OP_MINUPTO:
case OP_POSUPTO:
set_bit(start_bits, tcode[3], caseless, cd);
tcode += 4;
#ifdef SUPPORT_UTF8
if (utf8 && tcode[-1] >= 0xc0)
tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
#endif
break;
/* At least one single char sets the bit and stops */
case OP_EXACT: /* Fall through */
tcode += 2;
case OP_CHAR:
case OP_CHARNC:
case OP_PLUS:
case OP_MINPLUS:
case OP_POSPLUS:
set_bit(start_bits, tcode[1], caseless, cd);
try_next = FALSE;
break;
/* Single character type sets the bits and stops */
case OP_NOT_DIGIT:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_digit];
try_next = FALSE;
break;
case OP_DIGIT:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_digit];
try_next = FALSE;
break;
/* The cbit_space table has vertical tab as whitespace; we have to
discard it. */
case OP_NOT_WHITESPACE:
for (c = 0; c < 32; c++)
{
int d = cd->cbits[c+cbit_space];
if (c == 1) d &= ~0x08;
start_bits[c] |= ~d;
}
try_next = FALSE;
break;
/* The cbit_space table has vertical tab as whitespace; we have to
discard it. */
case OP_WHITESPACE:
for (c = 0; c < 32; c++)
{
int d = cd->cbits[c+cbit_space];
if (c == 1) d &= ~0x08;
start_bits[c] |= d;
}
try_next = FALSE;
break;
case OP_NOT_WORDCHAR:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_word];
try_next = FALSE;
break;
case OP_WORDCHAR:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_word];
try_next = FALSE;
break;
/* One or more character type fudges the pointer and restarts, knowing
it will hit a single character type and stop there. */
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
tcode++;
break;
case OP_TYPEEXACT:
tcode += 3;
break;
/* Zero or more repeats of character types set the bits and then
try again. */
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEPOSUPTO:
tcode += 2; /* Fall through */
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEPOSSTAR:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
case OP_TYPEPOSQUERY:
switch(tcode[1])
{
case OP_ANY:
return SSB_FAIL;
case OP_NOT_DIGIT:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_digit];
break;
case OP_DIGIT:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_digit];
break;
/* The cbit_space table has vertical tab as whitespace; we have to
discard it. */
case OP_NOT_WHITESPACE:
for (c = 0; c < 32; c++)
{
int d = cd->cbits[c+cbit_space];
if (c == 1) d &= ~0x08;
start_bits[c] |= ~d;
}
break;
/* The cbit_space table has vertical tab as whitespace; we have to
discard it. */
case OP_WHITESPACE:
for (c = 0; c < 32; c++)
{
int d = cd->cbits[c+cbit_space];
if (c == 1) d &= ~0x08;
start_bits[c] |= d;
}
break;
case OP_NOT_WORDCHAR:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_word];
break;
case OP_WORDCHAR:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_word];
break;
}
tcode += 2;
break;
/* Character class where all the information is in a bit map: set the
bits and either carry on or not, according to the repeat count. If it was
a negative class, and we are operating with UTF-8 characters, any byte
with a value >= 0xc4 is a potentially valid starter because it starts a
character with a value > 255. */
case OP_NCLASS:
#ifdef SUPPORT_UTF8
if (utf8)
{
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
}
#endif
/* Fall through */
case OP_CLASS:
{
tcode++;
/* In UTF-8 mode, the bits in a bit map correspond to character
values, not to byte values. However, the bit map we are constructing is
for byte values. So we have to do a conversion for characters whose
value is > 127. In fact, there are only two possible starting bytes for
characters in the range 128 - 255. */
#ifdef SUPPORT_UTF8
if (utf8)
{
for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
for (c = 128; c < 256; c++)
{
if ((tcode[c/8] && (1 << (c&7))) != 0)
{
int d = (c >> 6) | 0xc0; /* Set bit for this starter */
start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */
c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */
}
}
}
/* In non-UTF-8 mode, the two bit maps are completely compatible. */
else
#endif
{
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
}
/* Advance past the bit map, and act on what follows */
tcode += 32;
switch (*tcode)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
tcode++;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
else try_next = FALSE;
break;
default:
try_next = FALSE;
break;
}
}
break; /* End of bitmap class handling */
} /* End of switch */
} /* End of try_next loop */
code += GET(code, 1); /* Advance to next branch */
}
while (*code == OP_ALT);
return yield;
}
/*************************************************
* Study a compiled expression *
*************************************************/
/* This function is handed a compiled expression that it must study to produce
information that will speed up the matching. It returns a pcre_extra block
which then gets handed back to pcre_exec().
Arguments:
re points to the compiled expression
options contains option bits
errorptr points to where to place error messages;
set NULL unless error
Returns: pointer to a pcre_extra block, with study_data filled in and the
appropriate flag set;
NULL on error or if no optimization possible
*/
PCRE_EXP_DEFN pcre_extra *
pcre_study(const pcre *external_re, int options, const char **errorptr)
{
uschar start_bits[32];
pcre_extra *extra;
pcre_study_data *study;
const uschar *tables;
uschar *code;
compile_data compile_block;
const real_pcre *re = (const real_pcre *)external_re;
*errorptr = NULL;
if (re == NULL || re->magic_number != MAGIC_NUMBER)
{
*errorptr = "argument is not a compiled regular expression";
return NULL;
}
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
{
*errorptr = "unknown or incorrect option bit(s) set";
return NULL;
}
code = (uschar *)re + re->name_table_offset +
(re->name_count * re->name_entry_size);
/* For an anchored pattern, or an unanchored pattern that has a first char, or
a multiline pattern that matches only at "line starts", no further processing
at present. */
if ((re->options & PCRE_ANCHORED) != 0 ||
(re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
return NULL;
/* Set the character tables in the block that is passed around */
tables = re->tables;
if (tables == NULL)
(void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
(void *)(&tables));
compile_block.lcc = tables + lcc_offset;
compile_block.fcc = tables + fcc_offset;
compile_block.cbits = tables + cbits_offset;
compile_block.ctypes = tables + ctypes_offset;
/* See if we can find a fixed set of initial characters for the pattern. */
memset(start_bits, 0, 32 * sizeof(uschar));
if (set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
(re->options & PCRE_UTF8) != 0, &compile_block) != SSB_DONE) return NULL;
/* Get a pcre_extra block and a pcre_study_data block. The study data is put in
the latter, which is pointed to by the former, which may also get additional
data set later by the calling program. At the moment, the size of
pcre_study_data is fixed. We nevertheless save it in a field for returning via
the pcre_fullinfo() function so that if it becomes variable in the future, we
don't have to change that code. */
extra = (pcre_extra *)(pcre_malloc)
(sizeof(pcre_extra) + sizeof(pcre_study_data));
if (extra == NULL)
{
*errorptr = "failed to get memory";
return NULL;
}
study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));
extra->flags = PCRE_EXTRA_STUDY_DATA;
extra->study_data = study;
study->size = sizeof(pcre_study_data);
study->options = PCRE_STUDY_MAPPED;
memcpy(study->start_bits, start_bits, sizeof(start_bits));
return extra;
}
/* End of pcre_study.c */

View file

@ -0,0 +1,318 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains some fixed tables that are used by more than one of the
PCRE code modules. The tables are also #included by the pcretest program, which
uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
clashes with the library. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
the definition is next to the definition of the opcodes in pcre_internal.h. */
const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
/*************************************************
* Tables for UTF-8 support *
*************************************************/
/* These are the breakpoints for different numbers of bytes in a UTF-8
character. */
#ifdef SUPPORT_UTF8
const int _pcre_utf8_table1[] =
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int);
/* These are the indicator bits and the mask for the data bits to set in the
first byte of a character, indexed by the number of additional bytes. */
const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
/* Table of the number of extra bytes, indexed by the first byte masked with
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
const uschar _pcre_utf8_table4[] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
/* The pcre_utt[] table below translates Unicode property names into type and
code values. It is searched by binary chop, so must be in collating sequence of
name. Originally, the table contained pointers to the name strings in the first
field of each entry. However, that leads to a large number of relocations when
a shared library is dynamically loaded. A significant reduction is made by
putting all the names into a single, large string and then using offsets in the
table itself. Maintenance is more error-prone, but frequent changes to this
data is unlikely. */
const char _pcre_utt_names[] =
"Any\0"
"Arabic\0"
"Armenian\0"
"Balinese\0"
"Bengali\0"
"Bopomofo\0"
"Braille\0"
"Buginese\0"
"Buhid\0"
"C\0"
"Canadian_Aboriginal\0"
"Cc\0"
"Cf\0"
"Cherokee\0"
"Cn\0"
"Co\0"
"Common\0"
"Coptic\0"
"Cs\0"
"Cuneiform\0"
"Cypriot\0"
"Cyrillic\0"
"Deseret\0"
"Devanagari\0"
"Ethiopic\0"
"Georgian\0"
"Glagolitic\0"
"Gothic\0"
"Greek\0"
"Gujarati\0"
"Gurmukhi\0"
"Han\0"
"Hangul\0"
"Hanunoo\0"
"Hebrew\0"
"Hiragana\0"
"Inherited\0"
"Kannada\0"
"Katakana\0"
"Kharoshthi\0"
"Khmer\0"
"L\0"
"L&\0"
"Lao\0"
"Latin\0"
"Limbu\0"
"Linear_B\0"
"Ll\0"
"Lm\0"
"Lo\0"
"Lt\0"
"Lu\0"
"M\0"
"Malayalam\0"
"Mc\0"
"Me\0"
"Mn\0"
"Mongolian\0"
"Myanmar\0"
"N\0"
"Nd\0"
"New_Tai_Lue\0"
"Nko\0"
"Nl\0"
"No\0"
"Ogham\0"
"Old_Italic\0"
"Old_Persian\0"
"Oriya\0"
"Osmanya\0"
"P\0"
"Pc\0"
"Pd\0"
"Pe\0"
"Pf\0"
"Phags_Pa\0"
"Phoenician\0"
"Pi\0"
"Po\0"
"Ps\0"
"Runic\0"
"S\0"
"Sc\0"
"Shavian\0"
"Sinhala\0"
"Sk\0"
"Sm\0"
"So\0"
"Syloti_Nagri\0"
"Syriac\0"
"Tagalog\0"
"Tagbanwa\0"
"Tai_Le\0"
"Tamil\0"
"Telugu\0"
"Thaana\0"
"Thai\0"
"Tibetan\0"
"Tifinagh\0"
"Ugaritic\0"
"Yi\0"
"Z\0"
"Zl\0"
"Zp\0"
"Zs\0";
const ucp_type_table _pcre_utt[] = {
{ 0, PT_ANY, 0 },
{ 4, PT_SC, ucp_Arabic },
{ 11, PT_SC, ucp_Armenian },
{ 20, PT_SC, ucp_Balinese },
{ 29, PT_SC, ucp_Bengali },
{ 37, PT_SC, ucp_Bopomofo },
{ 46, PT_SC, ucp_Braille },
{ 54, PT_SC, ucp_Buginese },
{ 63, PT_SC, ucp_Buhid },
{ 69, PT_GC, ucp_C },
{ 71, PT_SC, ucp_Canadian_Aboriginal },
{ 91, PT_PC, ucp_Cc },
{ 94, PT_PC, ucp_Cf },
{ 97, PT_SC, ucp_Cherokee },
{ 106, PT_PC, ucp_Cn },
{ 109, PT_PC, ucp_Co },
{ 112, PT_SC, ucp_Common },
{ 119, PT_SC, ucp_Coptic },
{ 126, PT_PC, ucp_Cs },
{ 129, PT_SC, ucp_Cuneiform },
{ 139, PT_SC, ucp_Cypriot },
{ 147, PT_SC, ucp_Cyrillic },
{ 156, PT_SC, ucp_Deseret },
{ 164, PT_SC, ucp_Devanagari },
{ 175, PT_SC, ucp_Ethiopic },
{ 184, PT_SC, ucp_Georgian },
{ 193, PT_SC, ucp_Glagolitic },
{ 204, PT_SC, ucp_Gothic },
{ 211, PT_SC, ucp_Greek },
{ 217, PT_SC, ucp_Gujarati },
{ 226, PT_SC, ucp_Gurmukhi },
{ 235, PT_SC, ucp_Han },
{ 239, PT_SC, ucp_Hangul },
{ 246, PT_SC, ucp_Hanunoo },
{ 254, PT_SC, ucp_Hebrew },
{ 261, PT_SC, ucp_Hiragana },
{ 270, PT_SC, ucp_Inherited },
{ 280, PT_SC, ucp_Kannada },
{ 288, PT_SC, ucp_Katakana },
{ 297, PT_SC, ucp_Kharoshthi },
{ 308, PT_SC, ucp_Khmer },
{ 314, PT_GC, ucp_L },
{ 316, PT_LAMP, 0 },
{ 319, PT_SC, ucp_Lao },
{ 323, PT_SC, ucp_Latin },
{ 329, PT_SC, ucp_Limbu },
{ 335, PT_SC, ucp_Linear_B },
{ 344, PT_PC, ucp_Ll },
{ 347, PT_PC, ucp_Lm },
{ 350, PT_PC, ucp_Lo },
{ 353, PT_PC, ucp_Lt },
{ 356, PT_PC, ucp_Lu },
{ 359, PT_GC, ucp_M },
{ 361, PT_SC, ucp_Malayalam },
{ 371, PT_PC, ucp_Mc },
{ 374, PT_PC, ucp_Me },
{ 377, PT_PC, ucp_Mn },
{ 380, PT_SC, ucp_Mongolian },
{ 390, PT_SC, ucp_Myanmar },
{ 398, PT_GC, ucp_N },
{ 400, PT_PC, ucp_Nd },
{ 403, PT_SC, ucp_New_Tai_Lue },
{ 415, PT_SC, ucp_Nko },
{ 419, PT_PC, ucp_Nl },
{ 422, PT_PC, ucp_No },
{ 425, PT_SC, ucp_Ogham },
{ 431, PT_SC, ucp_Old_Italic },
{ 442, PT_SC, ucp_Old_Persian },
{ 454, PT_SC, ucp_Oriya },
{ 460, PT_SC, ucp_Osmanya },
{ 468, PT_GC, ucp_P },
{ 470, PT_PC, ucp_Pc },
{ 473, PT_PC, ucp_Pd },
{ 476, PT_PC, ucp_Pe },
{ 479, PT_PC, ucp_Pf },
{ 482, PT_SC, ucp_Phags_Pa },
{ 491, PT_SC, ucp_Phoenician },
{ 502, PT_PC, ucp_Pi },
{ 505, PT_PC, ucp_Po },
{ 508, PT_PC, ucp_Ps },
{ 511, PT_SC, ucp_Runic },
{ 517, PT_GC, ucp_S },
{ 519, PT_PC, ucp_Sc },
{ 522, PT_SC, ucp_Shavian },
{ 530, PT_SC, ucp_Sinhala },
{ 538, PT_PC, ucp_Sk },
{ 541, PT_PC, ucp_Sm },
{ 544, PT_PC, ucp_So },
{ 547, PT_SC, ucp_Syloti_Nagri },
{ 560, PT_SC, ucp_Syriac },
{ 567, PT_SC, ucp_Tagalog },
{ 575, PT_SC, ucp_Tagbanwa },
{ 584, PT_SC, ucp_Tai_Le },
{ 591, PT_SC, ucp_Tamil },
{ 597, PT_SC, ucp_Telugu },
{ 604, PT_SC, ucp_Thaana },
{ 611, PT_SC, ucp_Thai },
{ 616, PT_SC, ucp_Tibetan },
{ 624, PT_SC, ucp_Tifinagh },
{ 633, PT_SC, ucp_Ugaritic },
{ 642, PT_SC, ucp_Yi },
{ 645, PT_GC, ucp_Z },
{ 647, PT_PC, ucp_Zl },
{ 650, PT_PC, ucp_Zp },
{ 653, PT_PC, ucp_Zs }
};
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
#endif /* SUPPORT_UTF8 */
/* End of pcre_tables.c */

View file

@ -0,0 +1,137 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains an internal function that tests a compiled pattern to
see if it was compiled with the opposite endianness. If so, it uses an
auxiliary local function to flip the appropriate bytes. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Flip bytes in an integer *
*************************************************/
/* This function is called when the magic number in a regex doesn't match, in
order to flip its bytes to see if we are dealing with a pattern that was
compiled on a host of different endianness. If so, this function is used to
flip other byte values.
Arguments:
value the number to flip
n the number of bytes to flip (assumed to be 2 or 4)
Returns: the flipped value
*/
static unsigned long int
byteflip(unsigned long int value, int n)
{
if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
return ((value & 0x000000ff) << 24) |
((value & 0x0000ff00) << 8) |
((value & 0x00ff0000) >> 8) |
((value & 0xff000000) >> 24);
}
/*************************************************
* Test for a byte-flipped compiled regex *
*************************************************/
/* This function is called from pcre_exec(), pcre_dfa_exec(), and also from
pcre_fullinfo(). Its job is to test whether the regex is byte-flipped - that
is, it was compiled on a system of opposite endianness. The function is called
only when the native MAGIC_NUMBER test fails. If the regex is indeed flipped,
we flip all the relevant values into a different data block, and return it.
Arguments:
re points to the regex
study points to study data, or NULL
internal_re points to a new regex block
internal_study points to a new study block
Returns: the new block if is is indeed a byte-flipped regex
NULL if it is not
*/
real_pcre *
_pcre_try_flipped(const real_pcre *re, real_pcre *internal_re,
const pcre_study_data *study, pcre_study_data *internal_study)
{
if (byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER)
return NULL;
*internal_re = *re; /* To copy other fields */
internal_re->size = byteflip(re->size, sizeof(re->size));
internal_re->options = byteflip(re->options, sizeof(re->options));
internal_re->flags = (pcre_uint16)byteflip(re->flags, sizeof(re->flags));
internal_re->top_bracket =
(pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket));
internal_re->top_backref =
(pcre_uint16)byteflip(re->top_backref, sizeof(re->top_backref));
internal_re->first_byte =
(pcre_uint16)byteflip(re->first_byte, sizeof(re->first_byte));
internal_re->req_byte =
(pcre_uint16)byteflip(re->req_byte, sizeof(re->req_byte));
internal_re->name_table_offset =
(pcre_uint16)byteflip(re->name_table_offset, sizeof(re->name_table_offset));
internal_re->name_entry_size =
(pcre_uint16)byteflip(re->name_entry_size, sizeof(re->name_entry_size));
internal_re->name_count =
(pcre_uint16)byteflip(re->name_count, sizeof(re->name_count));
if (study != NULL)
{
*internal_study = *study; /* To copy other fields */
internal_study->size = byteflip(study->size, sizeof(study->size));
internal_study->options = byteflip(study->options, sizeof(study->options));
}
return internal_re;
}
/* End of pcre_tryflipped.c */

View file

@ -0,0 +1,179 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains code for searching the table of Unicode character
properties. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
#include "ucp.h" /* Category definitions */
#include "ucpinternal.h" /* Internal table details */
#include "ucptable.h" /* The table itself */
/* Table to translate from particular type value to the general value. */
static const int ucp_gentype[] = {
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
ucp_P, ucp_P, /* Ps, Po */
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
};
/*************************************************
* Search table and return type *
*************************************************/
/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
Arguments:
c the character value
type_ptr the detailed character type is returned here
script_ptr the script is returned here
Returns: the character type category
*/
int
_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
{
int bot = 0;
int top = sizeof(ucp_table)/sizeof(cnode);
int mid;
/* The table is searched using a binary chop. You might think that using
intermediate variables to hold some of the common expressions would speed
things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
makes things a lot slower. */
for (;;)
{
if (top <= bot)
{
*type_ptr = ucp_Cn;
*script_ptr = ucp_Common;
return ucp_C;
}
mid = (bot + top) >> 1;
if (c == (ucp_table[mid].f0 & f0_charmask)) break;
if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
else
{
if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
c <= (ucp_table[mid].f0 & f0_charmask) +
(ucp_table[mid].f1 & f1_rangemask)) break;
bot = mid + 1;
}
}
/* Found an entry in the table. Set the script and detailed type values, and
return the general type. */
*script_ptr = (ucp_table[mid].f0 & f0_scriptmask) >> f0_scriptshift;
*type_ptr = (ucp_table[mid].f1 & f1_typemask) >> f1_typeshift;
return ucp_gentype[*type_ptr];
}
/*************************************************
* Search table and return other case *
*************************************************/
/* If the given character is a letter, and there is another case for the
letter, return the other case. Otherwise, return -1.
Arguments:
c the character value
Returns: the other case or NOTACHAR if none
*/
unsigned int
_pcre_ucp_othercase(const unsigned int c)
{
int bot = 0;
int top = sizeof(ucp_table)/sizeof(cnode);
int mid, offset;
/* The table is searched using a binary chop. You might think that using
intermediate variables to hold some of the common expressions would speed
things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
makes things a lot slower. */
for (;;)
{
if (top <= bot) return -1;
mid = (bot + top) >> 1;
if (c == (ucp_table[mid].f0 & f0_charmask)) break;
if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
else
{
if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
c <= (ucp_table[mid].f0 & f0_charmask) +
(ucp_table[mid].f1 & f1_rangemask)) break;
bot = mid + 1;
}
}
/* Found an entry in the table. Return NOTACHAR for a range entry. Otherwise
return the other case if there is one, else NOTACHAR. */
if ((ucp_table[mid].f0 & f0_rangeflag) != 0) return NOTACHAR;
offset = ucp_table[mid].f1 & f1_casemask;
if ((offset & f1_caseneg) != 0) offset |= f1_caseneg;
return (offset == 0)? NOTACHAR : c + offset;
}
/* End of pcre_ucp_searchfuncs.c */

View file

@ -0,0 +1,162 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains an internal function for validating UTF-8 character
strings. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Validate a UTF-8 string *
*************************************************/
/* This function is called (optionally) at the start of compile or match, to
validate that a supposed UTF-8 string is actually valid. The early check means
that subsequent code can assume it is dealing with a valid string. The check
can be turned off for maximum performance, but the consequences of supplying
an invalid string are then undefined.
Originally, this function checked according to RFC 2279, allowing for values in
the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were in
the canonical format. Once somebody had pointed out RFC 3629 to me (it
obsoletes 2279), additional restrictions were applied. The values are now
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
subrange 0xd000 to 0xdfff is excluded.
Arguments:
string points to the string
length length of string, or -1 if the string is zero-terminated
Returns: < 0 if the string is a valid UTF-8 string
>= 0 otherwise; the value is the offset of the bad byte
*/
int
_pcre_valid_utf8(const uschar *string, int length)
{
#ifdef SUPPORT_UTF8
register const uschar *p;
if (length < 0)
{
for (p = string; *p != 0; p++);
length = p - string;
}
for (p = string; length-- > 0; p++)
{
register int ab;
register int c = *p;
if (c < 128) continue;
if (c < 0xc0) return p - string;
ab = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
if (length < ab || ab > 3) return p - string;
length -= ab;
/* Check top bits in the second byte */
if ((*(++p) & 0xc0) != 0x80) return p - string;
/* Check for overlong sequences for each different length, and for the
excluded range 0xd000 to 0xdfff. */
switch (ab)
{
/* Check for xx00 000x (overlong sequence) */
case 1:
if ((c & 0x3e) == 0) return p - string;
continue; /* We know there aren't any more bytes to check */
/* Check for 1110 0000, xx0x xxxx (overlong sequence) or
1110 1101, 1010 xxxx (0xd000 - 0xdfff) */
case 2:
if ((c == 0xe0 && (*p & 0x20) == 0) ||
(c == 0xed && *p >= 0xa0))
return p - string;
break;
/* Check for 1111 0000, xx00 xxxx (overlong sequence) or
greater than 0x0010ffff (f4 8f bf bf) */
case 3:
if ((c == 0xf0 && (*p & 0x30) == 0) ||
(c > 0xf4 ) ||
(c == 0xf4 && *p > 0x8f))
return p - string;
break;
#if 0
/* These cases can no longer occur, as we restrict to a maximum of four
bytes nowadays. Leave the code here in case we ever want to add an option
for longer sequences. */
/* Check for 1111 1000, xx00 0xxx */
case 4:
if (c == 0xf8 && (*p & 0x38) == 0) return p - string;
break;
/* Check for leading 0xfe or 0xff, and then for 1111 1100, xx00 00xx */
case 5:
if (c == 0xfe || c == 0xff ||
(c == 0xfc && (*p & 0x3c) == 0)) return p - string;
break;
#endif
}
/* Check for valid bytes after the 2nd, if any; all must start 10 */
while (--ab > 0)
{
if ((*(++p) & 0xc0) != 0x80) return p - string;
}
}
#endif
return -1;
}
/* End of pcre_valid_utf8.c */

View file

@ -0,0 +1,90 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_version(), which returns a
string that identifies the PCRE version that is in use. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Return version string *
*************************************************/
/* These macros are the standard way of turning unquoted text into C strings.
They allow macros like PCRE_MAJOR to be defined without quotes, which is
convenient for user programs that want to test its value. */
#define STRING(a) # a
#define XSTRING(s) STRING(s)
/* A problem turned up with PCRE_PRERELEASE, which is defined empty for
production releases. Originally, it was used naively in this code:
return XSTRING(PCRE_MAJOR)
"." XSTRING(PCRE_MINOR)
XSTRING(PCRE_PRERELEASE)
" " XSTRING(PCRE_DATE);
However, when PCRE_PRERELEASE is empty, this leads to an attempted expansion of
STRING(). The C standard states: "If (before argument substitution) any
argument consists of no preprocessing tokens, the behavior is undefined." It
turns out the gcc treats this case as a single empty string - which is what we
really want - but Visual C grumbles about the lack of an argument for the
macro. Unfortunately, both are within their rights. To cope with both ways of
handling this, I had resort to some messy hackery that does a test at run time.
I could find no way of detecting that a macro is defined as an empty string at
pre-processor time. This hack uses a standard trick for avoiding calling
the STRING macro with an empty argument when doing the test. */
PCRE_EXP_DEFN const char *
pcre_version(void)
{
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
XSTRING(PCRE_MAJOR.PCRE_MINOR) XSTRING(PCRE_PRERELEASE PCRE_DATE);
}
/* End of pcre_version.c */

View file

@ -0,0 +1,148 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains an internal function that is used to match an extended
class (one that contains characters whose values are > 255). It is used by both
pcre_exec() and pcre_def_exec(). */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Match character against an XCLASS *
*************************************************/
/* This function is called to match a character against an extended class that
might contain values > 255.
Arguments:
c the character
data points to the flag byte of the XCLASS data
Returns: TRUE if character matches, else FALSE
*/
BOOL
_pcre_xclass(int c, const uschar *data)
{
int t;
BOOL negated = (*data & XCL_NOT) != 0;
/* Character values < 256 are matched against a bitmap, if one is present. If
not, we still carry on, because there may be ranges that start below 256 in the
additional data. */
if (c < 256)
{
if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
return !negated; /* char found */
}
/* First skip the bit map if present. Then match against the list of Unicode
properties or large chars or ranges that end with a large char. We won't ever
encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
if ((*data++ & XCL_MAP) != 0) data += 32;
while ((t = *data++) != XCL_END)
{
int x, y;
if (t == XCL_SINGLE)
{
GETCHARINC(x, data);
if (c == x) return !negated;
}
else if (t == XCL_RANGE)
{
GETCHARINC(x, data);
GETCHARINC(y, data);
if (c >= x && c <= y) return !negated;
}
#ifdef SUPPORT_UCP
else /* XCL_PROP & XCL_NOTPROP */
{
int chartype, script;
int category = _pcre_ucp_findprop(c, &chartype, &script);
switch(*data)
{
case PT_ANY:
if (t == XCL_PROP) return !negated;
break;
case PT_LAMP:
if ((chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt) ==
(t == XCL_PROP)) return !negated;
break;
case PT_GC:
if ((data[1] == category) == (t == XCL_PROP)) return !negated;
break;
case PT_PC:
if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
break;
case PT_SC:
if ((data[1] == script) == (t == XCL_PROP)) return !negated;
break;
/* This should never occur, but compilers may mutter if there is no
default. */
default:
return FALSE;
}
data += 2;
}
#endif /* SUPPORT_UCP */
}
return negated; /* char did not match */
}
/* End of pcre_xclass.c */

871
Engine/lib/pcre/pcrecpp.cc Normal file
View file

@ -0,0 +1,871 @@
// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: Sanjay Ghemawat
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <limits.h> /* for SHRT_MIN, USHRT_MAX, etc */
#include <assert.h>
#include <errno.h>
#include <string>
#include <algorithm>
#include "pcrecpp_internal.h"
#include "pcre.h"
#include "pcrecpp.h"
#include "pcre_stringpiece.h"
namespace pcrecpp {
// Maximum number of args we can set
static const int kMaxArgs = 16;
static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace
// Special object that stands-in for no argument
Arg RE::no_arg((void*)NULL);
// If a regular expression has no error, its error_ field points here
static const string empty_string;
// If the user doesn't ask for any options, we just use this one
static RE_Options default_options;
void RE::Init(const string& pat, const RE_Options* options) {
pattern_ = pat;
if (options == NULL) {
options_ = default_options;
} else {
options_ = *options;
}
error_ = &empty_string;
re_full_ = NULL;
re_partial_ = NULL;
re_partial_ = Compile(UNANCHORED);
if (re_partial_ != NULL) {
re_full_ = Compile(ANCHOR_BOTH);
}
}
void RE::Cleanup() {
if (re_full_ != NULL) (*pcre_free)(re_full_);
if (re_partial_ != NULL) (*pcre_free)(re_partial_);
if (error_ != &empty_string) delete error_;
}
RE::~RE() {
Cleanup();
}
pcre* RE::Compile(Anchor anchor) {
// First, convert RE_Options into pcre options
int pcre_options = 0;
pcre_options = options_.all_options();
// Special treatment for anchoring. This is needed because at
// runtime pcre only provides an option for anchoring at the
// beginning of a string (unless you use offset).
//
// There are three types of anchoring we want:
// UNANCHORED Compile the original pattern, and use
// a pcre unanchored match.
// ANCHOR_START Compile the original pattern, and use
// a pcre anchored match.
// ANCHOR_BOTH Tack a "\z" to the end of the original pattern
// and use a pcre anchored match.
const char* compile_error;
int eoffset;
pcre* re;
if (anchor != ANCHOR_BOTH) {
re = pcre_compile(pattern_.c_str(), pcre_options,
&compile_error, &eoffset, NULL);
} else {
// Tack a '\z' at the end of RE. Parenthesize it first so that
// the '\z' applies to all top-level alternatives in the regexp.
string wrapped = "(?:"; // A non-counting grouping operator
wrapped += pattern_;
wrapped += ")\\z";
re = pcre_compile(wrapped.c_str(), pcre_options,
&compile_error, &eoffset, NULL);
}
if (re == NULL) {
if (error_ == &empty_string) error_ = new string(compile_error);
}
return re;
}
/***** Matching interfaces *****/
bool RE::FullMatch(const StringPiece& text,
const Arg& ptr1,
const Arg& ptr2,
const Arg& ptr3,
const Arg& ptr4,
const Arg& ptr5,
const Arg& ptr6,
const Arg& ptr7,
const Arg& ptr8,
const Arg& ptr9,
const Arg& ptr10,
const Arg& ptr11,
const Arg& ptr12,
const Arg& ptr13,
const Arg& ptr14,
const Arg& ptr15,
const Arg& ptr16) const {
const Arg* args[kMaxArgs];
int n = 0;
if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
done:
int consumed;
int vec[kVecSize];
return DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize);
}
bool RE::PartialMatch(const StringPiece& text,
const Arg& ptr1,
const Arg& ptr2,
const Arg& ptr3,
const Arg& ptr4,
const Arg& ptr5,
const Arg& ptr6,
const Arg& ptr7,
const Arg& ptr8,
const Arg& ptr9,
const Arg& ptr10,
const Arg& ptr11,
const Arg& ptr12,
const Arg& ptr13,
const Arg& ptr14,
const Arg& ptr15,
const Arg& ptr16) const {
const Arg* args[kMaxArgs];
int n = 0;
if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
done:
int consumed;
int vec[kVecSize];
return DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize);
}
bool RE::Consume(StringPiece* input,
const Arg& ptr1,
const Arg& ptr2,
const Arg& ptr3,
const Arg& ptr4,
const Arg& ptr5,
const Arg& ptr6,
const Arg& ptr7,
const Arg& ptr8,
const Arg& ptr9,
const Arg& ptr10,
const Arg& ptr11,
const Arg& ptr12,
const Arg& ptr13,
const Arg& ptr14,
const Arg& ptr15,
const Arg& ptr16) const {
const Arg* args[kMaxArgs];
int n = 0;
if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
done:
int consumed;
int vec[kVecSize];
if (DoMatchImpl(*input, ANCHOR_START, &consumed,
args, n, vec, kVecSize)) {
input->remove_prefix(consumed);
return true;
} else {
return false;
}
}
bool RE::FindAndConsume(StringPiece* input,
const Arg& ptr1,
const Arg& ptr2,
const Arg& ptr3,
const Arg& ptr4,
const Arg& ptr5,
const Arg& ptr6,
const Arg& ptr7,
const Arg& ptr8,
const Arg& ptr9,
const Arg& ptr10,
const Arg& ptr11,
const Arg& ptr12,
const Arg& ptr13,
const Arg& ptr14,
const Arg& ptr15,
const Arg& ptr16) const {
const Arg* args[kMaxArgs];
int n = 0;
if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
done:
int consumed;
int vec[kVecSize];
if (DoMatchImpl(*input, UNANCHORED, &consumed,
args, n, vec, kVecSize)) {
input->remove_prefix(consumed);
return true;
} else {
return false;
}
}
bool RE::Replace(const StringPiece& rewrite,
string *str) const {
int vec[kVecSize];
int matches = TryMatch(*str, 0, UNANCHORED, vec, kVecSize);
if (matches == 0)
return false;
string s;
if (!Rewrite(&s, rewrite, *str, vec, matches))
return false;
assert(vec[0] >= 0);
assert(vec[1] >= 0);
str->replace(vec[0], vec[1] - vec[0], s);
return true;
}
// Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF.
// Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF.
// Modified by PH to add PCRE_NEWLINE_ANY and PCRE_NEWLINE_ANYCRLF.
static int NewlineMode(int pcre_options) {
// TODO: if we can make it threadsafe, cache this var
int newline_mode = 0;
/* if (newline_mode) return newline_mode; */ // do this once it's cached
if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|
PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF)) {
newline_mode = (pcre_options &
(PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|
PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF));
} else {
int newline;
pcre_config(PCRE_CONFIG_NEWLINE, &newline);
if (newline == 10)
newline_mode = PCRE_NEWLINE_LF;
else if (newline == 13)
newline_mode = PCRE_NEWLINE_CR;
else if (newline == 3338)
newline_mode = PCRE_NEWLINE_CRLF;
else if (newline == -1)
newline_mode = PCRE_NEWLINE_ANY;
else if (newline == -2)
newline_mode = PCRE_NEWLINE_ANYCRLF;
else
assert("" == "Unexpected return value from pcre_config(NEWLINE)");
}
return newline_mode;
}
int RE::GlobalReplace(const StringPiece& rewrite,
string *str) const {
int count = 0;
int vec[kVecSize];
string out;
int start = 0;
int lastend = -1;
while (start <= static_cast<int>(str->length())) {
int matches = TryMatch(*str, start, UNANCHORED, vec, kVecSize);
if (matches <= 0)
break;
int matchstart = vec[0], matchend = vec[1];
assert(matchstart >= start);
assert(matchend >= matchstart);
if (matchstart == matchend && matchstart == lastend) {
// advance one character if we matched an empty string at the same
// place as the last match occurred
matchend = start + 1;
// If the current char is CR and we're in CRLF mode, skip LF too.
// Note it's better to call pcre_fullinfo() than to examine
// all_options(), since options_ could have changed bewteen
// compile-time and now, but this is simpler and safe enough.
// Modified by PH to add ANY and ANYCRLF.
if (start+1 < static_cast<int>(str->length()) &&
(*str)[start] == '\r' && (*str)[start+1] == '\n' &&
(NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF ||
NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY ||
NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF)
) {
matchend++;
}
// We also need to advance more than one char if we're in utf8 mode.
#ifdef SUPPORT_UTF8
if (options_.utf8()) {
while (matchend < static_cast<int>(str->length()) &&
((*str)[matchend] & 0xc0) == 0x80)
matchend++;
}
#endif
if (matchend <= static_cast<int>(str->length()))
out.append(*str, start, matchend - start);
start = matchend;
} else {
out.append(*str, start, matchstart - start);
Rewrite(&out, rewrite, *str, vec, matches);
start = matchend;
lastend = matchend;
count++;
}
}
if (count == 0)
return 0;
if (start < static_cast<int>(str->length()))
out.append(*str, start, str->length() - start);
swap(out, *str);
return count;
}
bool RE::Extract(const StringPiece& rewrite,
const StringPiece& text,
string *out) const {
int vec[kVecSize];
int matches = TryMatch(text, 0, UNANCHORED, vec, kVecSize);
if (matches == 0)
return false;
out->erase();
return Rewrite(out, rewrite, text, vec, matches);
}
/*static*/ string RE::QuoteMeta(const StringPiece& unquoted) {
string result;
// Escape any ascii character not in [A-Za-z_0-9].
//
// Note that it's legal to escape a character even if it has no
// special meaning in a regular expression -- so this function does
// that. (This also makes it identical to the perl function of the
// same name; see `perldoc -f quotemeta`.)
for (int ii = 0; ii < unquoted.size(); ++ii) {
// Note that using 'isalnum' here raises the benchmark time from
// 32ns to 58ns:
if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
(unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
(unquoted[ii] < '0' || unquoted[ii] > '9') &&
unquoted[ii] != '_' &&
// If this is the part of a UTF8 or Latin1 character, we need
// to copy this byte without escaping. Experimentally this is
// what works correctly with the regexp library.
!(unquoted[ii] & 128)) {
result += '\\';
}
result += unquoted[ii];
}
return result;
}
/***** Actual matching and rewriting code *****/
int RE::TryMatch(const StringPiece& text,
int startpos,
Anchor anchor,
int *vec,
int vecsize) const {
pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
if (re == NULL) {
//fprintf(stderr, "Matching against invalid re: %s\n", error_->c_str());
return 0;
}
pcre_extra extra = { 0, 0, 0, 0, 0, 0 };
if (options_.match_limit() > 0) {
extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
extra.match_limit = options_.match_limit();
}
if (options_.match_limit_recursion() > 0) {
extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
extra.match_limit_recursion = options_.match_limit_recursion();
}
int rc = pcre_exec(re, // The regular expression object
&extra,
(text.data() == NULL) ? "" : text.data(),
text.size(),
startpos,
(anchor == UNANCHORED) ? 0 : PCRE_ANCHORED,
vec,
vecsize);
// Handle errors
if (rc == PCRE_ERROR_NOMATCH) {
return 0;
} else if (rc < 0) {
//fprintf(stderr, "Unexpected return code: %d when matching '%s'\n",
// re, pattern_.c_str());
return 0;
} else if (rc == 0) {
// pcre_exec() returns 0 as a special case when the number of
// capturing subpatterns exceeds the size of the vector.
// When this happens, there is a match and the output vector
// is filled, but we miss out on the positions of the extra subpatterns.
rc = vecsize / 2;
}
return rc;
}
bool RE::DoMatchImpl(const StringPiece& text,
Anchor anchor,
int* consumed,
const Arg* const* args,
int n,
int* vec,
int vecsize) const {
assert((1 + n) * 3 <= vecsize); // results + PCRE workspace
int matches = TryMatch(text, 0, anchor, vec, vecsize);
assert(matches >= 0); // TryMatch never returns negatives
if (matches == 0)
return false;
*consumed = vec[1];
if (n == 0 || args == NULL) {
// We are not interested in results
return true;
}
if (NumberOfCapturingGroups() < n) {
// RE has fewer capturing groups than number of arg pointers passed in
return false;
}
// If we got here, we must have matched the whole pattern.
// We do not need (can not do) any more checks on the value of 'matches' here
// -- see the comment for TryMatch.
for (int i = 0; i < n; i++) {
const int start = vec[2*(i+1)];
const int limit = vec[2*(i+1)+1];
if (!args[i]->Parse(text.data() + start, limit-start)) {
// TODO: Should we indicate what the error was?
return false;
}
}
return true;
}
bool RE::DoMatch(const StringPiece& text,
Anchor anchor,
int* consumed,
const Arg* const args[],
int n) const {
assert(n >= 0);
size_t const vecsize = (1 + n) * 3; // results + PCRE workspace
// (as for kVecSize)
int space[21]; // use stack allocation for small vecsize (common case)
int* vec = vecsize <= 21 ? space : new int[vecsize];
bool retval = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize);
if (vec != space) delete [] vec;
return retval;
}
bool RE::Rewrite(string *out, const StringPiece &rewrite,
const StringPiece &text, int *vec, int veclen) const {
for (const char *s = rewrite.data(), *end = s + rewrite.size();
s < end; s++) {
int c = *s;
if (c == '\\') {
c = *++s;
if (isdigit(c)) {
int n = (c - '0');
if (n >= veclen) {
//fprintf(stderr, requested group %d in regexp %.*s\n",
// n, rewrite.size(), rewrite.data());
return false;
}
int start = vec[2 * n];
if (start >= 0)
out->append(text.data() + start, vec[2 * n + 1] - start);
} else if (c == '\\') {
out->push_back('\\');
} else {
//fprintf(stderr, "invalid rewrite pattern: %.*s\n",
// rewrite.size(), rewrite.data());
return false;
}
} else {
out->push_back(c);
}
}
return true;
}
// Return the number of capturing subpatterns, or -1 if the
// regexp wasn't valid on construction.
int RE::NumberOfCapturingGroups() const {
if (re_partial_ == NULL) return -1;
int result;
int pcre_retval = pcre_fullinfo(re_partial_, // The regular expression object
NULL, // We did not study the pattern
PCRE_INFO_CAPTURECOUNT,
&result);
assert(pcre_retval == 0);
return result;
}
/***** Parsers for various types *****/
bool Arg::parse_null(const char* str, int n, void* dest) {
// We fail if somebody asked us to store into a non-NULL void* pointer
return (dest == NULL);
}
bool Arg::parse_string(const char* str, int n, void* dest) {
if (dest == NULL) return true;
reinterpret_cast<string*>(dest)->assign(str, n);
return true;
}
bool Arg::parse_stringpiece(const char* str, int n, void* dest) {
if (dest == NULL) return true;
reinterpret_cast<StringPiece*>(dest)->set(str, n);
return true;
}
bool Arg::parse_char(const char* str, int n, void* dest) {
if (n != 1) return false;
if (dest == NULL) return true;
*(reinterpret_cast<char*>(dest)) = str[0];
return true;
}
bool Arg::parse_uchar(const char* str, int n, void* dest) {
if (n != 1) return false;
if (dest == NULL) return true;
*(reinterpret_cast<unsigned char*>(dest)) = str[0];
return true;
}
// Largest number spec that we are willing to parse
static const int kMaxNumberLength = 32;
// REQUIRES "buf" must have length at least kMaxNumberLength+1
// REQUIRES "n > 0"
// Copies "str" into "buf" and null-terminates if necessary.
// Returns one of:
// a. "str" if no termination is needed
// b. "buf" if the string was copied and null-terminated
// c. "" if the input was invalid and has no hope of being parsed
static const char* TerminateNumber(char* buf, const char* str, int n) {
if ((n > 0) && isspace(*str)) {
// We are less forgiving than the strtoxxx() routines and do not
// allow leading spaces.
return "";
}
// See if the character right after the input text may potentially
// look like a digit.
if (isdigit(str[n]) ||
((str[n] >= 'a') && (str[n] <= 'f')) ||
((str[n] >= 'A') && (str[n] <= 'F'))) {
if (n > kMaxNumberLength) return ""; // Input too big to be a valid number
memcpy(buf, str, n);
buf[n] = '\0';
return buf;
} else {
// We can parse right out of the supplied string, so return it.
return str;
}
}
bool Arg::parse_long_radix(const char* str,
int n,
void* dest,
int radix) {
if (n == 0) return false;
char buf[kMaxNumberLength+1];
str = TerminateNumber(buf, str, n);
char* end;
errno = 0;
long r = strtol(str, &end, radix);
if (end != str + n) return false; // Leftover junk
if (errno) return false;
if (dest == NULL) return true;
*(reinterpret_cast<long*>(dest)) = r;
return true;
}
bool Arg::parse_ulong_radix(const char* str,
int n,
void* dest,
int radix) {
if (n == 0) return false;
char buf[kMaxNumberLength+1];
str = TerminateNumber(buf, str, n);
if (str[0] == '-') return false; // strtoul() on a negative number?!
char* end;
errno = 0;
unsigned long r = strtoul(str, &end, radix);
if (end != str + n) return false; // Leftover junk
if (errno) return false;
if (dest == NULL) return true;
*(reinterpret_cast<unsigned long*>(dest)) = r;
return true;
}
bool Arg::parse_short_radix(const char* str,
int n,
void* dest,
int radix) {
long r;
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
if (r < SHRT_MIN || r > SHRT_MAX) return false; // Out of range
if (dest == NULL) return true;
*(reinterpret_cast<short*>(dest)) = static_cast<short>(r);
return true;
}
bool Arg::parse_ushort_radix(const char* str,
int n,
void* dest,
int radix) {
unsigned long r;
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
if (r > USHRT_MAX) return false; // Out of range
if (dest == NULL) return true;
*(reinterpret_cast<unsigned short*>(dest)) = static_cast<unsigned short>(r);
return true;
}
bool Arg::parse_int_radix(const char* str,
int n,
void* dest,
int radix) {
long r;
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
if (r < INT_MIN || r > INT_MAX) return false; // Out of range
if (dest == NULL) return true;
*(reinterpret_cast<int*>(dest)) = r;
return true;
}
bool Arg::parse_uint_radix(const char* str,
int n,
void* dest,
int radix) {
unsigned long r;
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
if (r > UINT_MAX) return false; // Out of range
if (dest == NULL) return true;
*(reinterpret_cast<unsigned int*>(dest)) = r;
return true;
}
bool Arg::parse_longlong_radix(const char* str,
int n,
void* dest,
int radix) {
#ifndef HAVE_LONG_LONG
return false;
#else
if (n == 0) return false;
char buf[kMaxNumberLength+1];
str = TerminateNumber(buf, str, n);
char* end;
errno = 0;
#if defined HAVE_STRTOQ
long long r = strtoq(str, &end, radix);
#elif defined HAVE_STRTOLL
long long r = strtoll(str, &end, radix);
#elif defined HAVE__STRTOI64
long long r = _strtoi64(str, &end, radix);
#else
#error parse_longlong_radix: cannot convert input to a long-long
#endif
if (end != str + n) return false; // Leftover junk
if (errno) return false;
if (dest == NULL) return true;
*(reinterpret_cast<long long*>(dest)) = r;
return true;
#endif /* HAVE_LONG_LONG */
}
bool Arg::parse_ulonglong_radix(const char* str,
int n,
void* dest,
int radix) {
#ifndef HAVE_UNSIGNED_LONG_LONG
return false;
#else
if (n == 0) return false;
char buf[kMaxNumberLength+1];
str = TerminateNumber(buf, str, n);
if (str[0] == '-') return false; // strtoull() on a negative number?!
char* end;
errno = 0;
#if defined HAVE_STRTOQ
unsigned long long r = strtouq(str, &end, radix);
#elif defined HAVE_STRTOLL
unsigned long long r = strtoull(str, &end, radix);
#elif defined HAVE__STRTOI64
unsigned long long r = _strtoui64(str, &end, radix);
#else
#error parse_ulonglong_radix: cannot convert input to a long-long
#endif
if (end != str + n) return false; // Leftover junk
if (errno) return false;
if (dest == NULL) return true;
*(reinterpret_cast<unsigned long long*>(dest)) = r;
return true;
#endif /* HAVE_UNSIGNED_LONG_LONG */
}
bool Arg::parse_double(const char* str, int n, void* dest) {
if (n == 0) return false;
static const int kMaxLength = 200;
char buf[kMaxLength];
if (n >= kMaxLength) return false;
memcpy(buf, str, n);
buf[n] = '\0';
errno = 0;
char* end;
double r = strtod(buf, &end);
if (end != buf + n) return false; // Leftover junk
if (errno) return false;
if (dest == NULL) return true;
*(reinterpret_cast<double*>(dest)) = r;
return true;
}
bool Arg::parse_float(const char* str, int n, void* dest) {
double r;
if (!parse_double(str, n, &r)) return false;
if (dest == NULL) return true;
*(reinterpret_cast<float*>(dest)) = static_cast<float>(r);
return true;
}
#define DEFINE_INTEGER_PARSERS(name) \
bool Arg::parse_##name(const char* str, int n, void* dest) { \
return parse_##name##_radix(str, n, dest, 10); \
} \
bool Arg::parse_##name##_hex(const char* str, int n, void* dest) { \
return parse_##name##_radix(str, n, dest, 16); \
} \
bool Arg::parse_##name##_octal(const char* str, int n, void* dest) { \
return parse_##name##_radix(str, n, dest, 8); \
} \
bool Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \
return parse_##name##_radix(str, n, dest, 0); \
}
DEFINE_INTEGER_PARSERS(short) /* */
DEFINE_INTEGER_PARSERS(ushort) /* */
DEFINE_INTEGER_PARSERS(int) /* Don't use semicolons after these */
DEFINE_INTEGER_PARSERS(uint) /* statements because they can cause */
DEFINE_INTEGER_PARSERS(long) /* compiler warnings if the checking */
DEFINE_INTEGER_PARSERS(ulong) /* level is turned up high enough. */
DEFINE_INTEGER_PARSERS(longlong) /* */
DEFINE_INTEGER_PARSERS(ulonglong) /* */
#undef DEFINE_INTEGER_PARSERS
} // namespace pcrecpp

700
Engine/lib/pcre/pcrecpp.h Normal file
View file

@ -0,0 +1,700 @@
// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: Sanjay Ghemawat
// Support for PCRE_XXX modifiers added by Giuseppe Maxia, July 2005
#ifndef _PCRECPP_H
#define _PCRECPP_H
// C++ interface to the pcre regular-expression library. RE supports
// Perl-style regular expressions (with extensions like \d, \w, \s,
// ...).
//
// -----------------------------------------------------------------------
// REGEXP SYNTAX:
//
// This module is part of the pcre library and hence supports its syntax
// for regular expressions.
//
// The syntax is pretty similar to Perl's. For those not familiar
// with Perl's regular expressions, here are some examples of the most
// commonly used extensions:
//
// "hello (\\w+) world" -- \w matches a "word" character
// "version (\\d+)" -- \d matches a digit
// "hello\\s+world" -- \s matches any whitespace character
// "\\b(\\w+)\\b" -- \b matches empty string at a word boundary
// "(?i)hello" -- (?i) turns on case-insensitive matching
// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible
//
// -----------------------------------------------------------------------
// MATCHING INTERFACE:
//
// The "FullMatch" operation checks that supplied text matches a
// supplied pattern exactly.
//
// Example: successful match
// pcrecpp::RE re("h.*o");
// re.FullMatch("hello");
//
// Example: unsuccessful match (requires full match):
// pcrecpp::RE re("e");
// !re.FullMatch("hello");
//
// Example: creating a temporary RE object:
// pcrecpp::RE("h.*o").FullMatch("hello");
//
// You can pass in a "const char*" or a "string" for "text". The
// examples below tend to use a const char*.
//
// You can, as in the different examples above, store the RE object
// explicitly in a variable or use a temporary RE object. The
// examples below use one mode or the other arbitrarily. Either
// could correctly be used for any of these examples.
//
// -----------------------------------------------------------------------
// MATCHING WITH SUB-STRING EXTRACTION:
//
// You can supply extra pointer arguments to extract matched subpieces.
//
// Example: extracts "ruby" into "s" and 1234 into "i"
// int i;
// string s;
// pcrecpp::RE re("(\\w+):(\\d+)");
// re.FullMatch("ruby:1234", &s, &i);
//
// Example: does not try to extract any extra sub-patterns
// re.FullMatch("ruby:1234", &s);
//
// Example: does not try to extract into NULL
// re.FullMatch("ruby:1234", NULL, &i);
//
// Example: integer overflow causes failure
// !re.FullMatch("ruby:1234567891234", NULL, &i);
//
// Example: fails because there aren't enough sub-patterns:
// !pcrecpp::RE("\\w+:\\d+").FullMatch("ruby:1234", &s);
//
// Example: fails because string cannot be stored in integer
// !pcrecpp::RE("(.*)").FullMatch("ruby", &i);
//
// The provided pointer arguments can be pointers to any scalar numeric
// type, or one of
// string (matched piece is copied to string)
// StringPiece (StringPiece is mutated to point to matched piece)
// T (where "bool T::ParseFrom(const char*, int)" exists)
// NULL (the corresponding matched sub-pattern is not copied)
//
// CAVEAT: An optional sub-pattern that does not exist in the matched
// string is assigned the empty string. Therefore, the following will
// return false (because the empty string is not a valid number):
// int number;
// pcrecpp::RE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
//
// -----------------------------------------------------------------------
// DO_MATCH
//
// The matching interface supports at most 16 arguments per call.
// If you need more, consider using the more general interface
// pcrecpp::RE::DoMatch(). See pcrecpp.h for the signature for DoMatch.
//
// -----------------------------------------------------------------------
// PARTIAL MATCHES
//
// You can use the "PartialMatch" operation when you want the pattern
// to match any substring of the text.
//
// Example: simple search for a string:
// pcrecpp::RE("ell").PartialMatch("hello");
//
// Example: find first number in a string:
// int number;
// pcrecpp::RE re("(\\d+)");
// re.PartialMatch("x*100 + 20", &number);
// assert(number == 100);
//
// -----------------------------------------------------------------------
// UTF-8 AND THE MATCHING INTERFACE:
//
// By default, pattern and text are plain text, one byte per character.
// The UTF8 flag, passed to the constructor, causes both pattern
// and string to be treated as UTF-8 text, still a byte stream but
// potentially multiple bytes per character. In practice, the text
// is likelier to be UTF-8 than the pattern, but the match returned
// may depend on the UTF8 flag, so always use it when matching
// UTF8 text. E.g., "." will match one byte normally but with UTF8
// set may match up to three bytes of a multi-byte character.
//
// Example:
// pcrecpp::RE_Options options;
// options.set_utf8();
// pcrecpp::RE re(utf8_pattern, options);
// re.FullMatch(utf8_string);
//
// Example: using the convenience function UTF8():
// pcrecpp::RE re(utf8_pattern, pcrecpp::UTF8());
// re.FullMatch(utf8_string);
//
// NOTE: The UTF8 option is ignored if pcre was not configured with the
// --enable-utf8 flag.
//
// -----------------------------------------------------------------------
// PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE
//
// PCRE defines some modifiers to change the behavior of the regular
// expression engine.
// The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle
// to pass such modifiers to a RE class.
//
// Currently, the following modifiers are supported
//
// modifier description Perl corresponding
//
// PCRE_CASELESS case insensitive match /i
// PCRE_MULTILINE multiple lines match /m
// PCRE_DOTALL dot matches newlines /s
// PCRE_DOLLAR_ENDONLY $ matches only at end N/A
// PCRE_EXTRA strict escape parsing N/A
// PCRE_EXTENDED ignore whitespaces /x
// PCRE_UTF8 handles UTF8 chars built-in
// PCRE_UNGREEDY reverses * and *? N/A
// PCRE_NO_AUTO_CAPTURE disables matching parens N/A (*)
//
// (For a full account on how each modifier works, please check the
// PCRE API reference manual).
//
// (*) Both Perl and PCRE allow non matching parentheses by means of the
// "?:" modifier within the pattern itself. e.g. (?:ab|cd) does not
// capture, while (ab|cd) does.
//
// For each modifier, there are two member functions whose name is made
// out of the modifier in lowercase, without the "PCRE_" prefix. For
// instance, PCRE_CASELESS is handled by
// bool caseless(),
// which returns true if the modifier is set, and
// RE_Options & set_caseless(bool),
// which sets or unsets the modifier.
//
// Moreover, PCRE_EXTRA_MATCH_LIMIT can be accessed through the
// set_match_limit() and match_limit() member functions.
// Setting match_limit to a non-zero value will limit the executation of
// pcre to keep it from doing bad things like blowing the stack or taking
// an eternity to return a result. A value of 5000 is good enough to stop
// stack blowup in a 2MB thread stack. Setting match_limit to zero will
// disable match limiting. Alternately, you can set match_limit_recursion()
// which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to limit how much pcre
// recurses. match_limit() caps the number of matches pcre does;
// match_limit_recrusion() caps the depth of recursion.
//
// Normally, to pass one or more modifiers to a RE class, you declare
// a RE_Options object, set the appropriate options, and pass this
// object to a RE constructor. Example:
//
// RE_options opt;
// opt.set_caseless(true);
//
// if (RE("HELLO", opt).PartialMatch("hello world")) ...
//
// RE_options has two constructors. The default constructor takes no
// arguments and creates a set of flags that are off by default.
//
// The optional parameter 'option_flags' is to facilitate transfer
// of legacy code from C programs. This lets you do
// RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
//
// But new code is better off doing
// RE(pattern,
// RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
// (See below)
//
// If you are going to pass one of the most used modifiers, there are some
// convenience functions that return a RE_Options class with the
// appropriate modifier already set:
// CASELESS(), UTF8(), MULTILINE(), DOTALL(), EXTENDED()
//
// If you need to set several options at once, and you don't want to go
// through the pains of declaring a RE_Options object and setting several
// options, there is a parallel method that give you such ability on the
// fly. You can concatenate several set_xxxxx member functions, since each
// of them returns a reference to its class object. e.g.: to pass
// PCRE_CASELESS, PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one
// statement, you may write
//
// RE(" ^ xyz \\s+ .* blah$", RE_Options()
// .set_caseless(true)
// .set_extended(true)
// .set_multiline(true)).PartialMatch(sometext);
//
// -----------------------------------------------------------------------
// SCANNING TEXT INCREMENTALLY
//
// The "Consume" operation may be useful if you want to repeatedly
// match regular expressions at the front of a string and skip over
// them as they match. This requires use of the "StringPiece" type,
// which represents a sub-range of a real string. Like RE, StringPiece
// is defined in the pcrecpp namespace.
//
// Example: read lines of the form "var = value" from a string.
// string contents = ...; // Fill string somehow
// pcrecpp::StringPiece input(contents); // Wrap in a StringPiece
//
// string var;
// int value;
// pcrecpp::RE re("(\\w+) = (\\d+)\n");
// while (re.Consume(&input, &var, &value)) {
// ...;
// }
//
// Each successful call to "Consume" will set "var/value", and also
// advance "input" so it points past the matched text.
//
// The "FindAndConsume" operation is similar to "Consume" but does not
// anchor your match at the beginning of the string. For example, you
// could extract all words from a string by repeatedly calling
// pcrecpp::RE("(\\w+)").FindAndConsume(&input, &word)
//
// -----------------------------------------------------------------------
// PARSING HEX/OCTAL/C-RADIX NUMBERS
//
// By default, if you pass a pointer to a numeric value, the
// corresponding text is interpreted as a base-10 number. You can
// instead wrap the pointer with a call to one of the operators Hex(),
// Octal(), or CRadix() to interpret the text in another base. The
// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
// prefixes, but defaults to base-10.
//
// Example:
// int a, b, c, d;
// pcrecpp::RE re("(.*) (.*) (.*) (.*)");
// re.FullMatch("100 40 0100 0x40",
// pcrecpp::Octal(&a), pcrecpp::Hex(&b),
// pcrecpp::CRadix(&c), pcrecpp::CRadix(&d));
// will leave 64 in a, b, c, and d.
//
// -----------------------------------------------------------------------
// REPLACING PARTS OF STRINGS
//
// You can replace the first match of "pattern" in "str" with
// "rewrite". Within "rewrite", backslash-escaped digits (\1 to \9)
// can be used to insert text matching corresponding parenthesized
// group from the pattern. \0 in "rewrite" refers to the entire
// matching text. E.g.,
//
// string s = "yabba dabba doo";
// pcrecpp::RE("b+").Replace("d", &s);
//
// will leave "s" containing "yada dabba doo". The result is true if
// the pattern matches and a replacement occurs, or false otherwise.
//
// GlobalReplace() is like Replace(), except that it replaces all
// occurrences of the pattern in the string with the rewrite.
// Replacements are not subject to re-matching. E.g.,
//
// string s = "yabba dabba doo";
// pcrecpp::RE("b+").GlobalReplace("d", &s);
//
// will leave "s" containing "yada dada doo". It returns the number
// of replacements made.
//
// Extract() is like Replace(), except that if the pattern matches,
// "rewrite" is copied into "out" (an additional argument) with
// substitutions. The non-matching portions of "text" are ignored.
// Returns true iff a match occurred and the extraction happened
// successfully. If no match occurs, the string is left unaffected.
#include <string>
#include <pcre.h>
#include <pcrecpparg.h> // defines the Arg class
// This isn't technically needed here, but we include it
// anyway so folks who include pcrecpp.h don't have to.
#include <pcre_stringpiece.h>
namespace pcrecpp {
#define PCRE_SET_OR_CLEAR(b, o) \
if (b) all_options_ |= (o); else all_options_ &= ~(o); \
return *this
#define PCRE_IS_SET(o) \
(all_options_ & o) == o
/***** Compiling regular expressions: the RE class *****/
// RE_Options allow you to set options to be passed along to pcre,
// along with other options we put on top of pcre.
// Only 9 modifiers, plus match_limit and match_limit_recursion,
// are supported now.
class PCRECPP_EXP_DEFN RE_Options {
public:
// constructor
RE_Options() : match_limit_(0), match_limit_recursion_(0), all_options_(0) {}
// alternative constructor.
// To facilitate transfer of legacy code from C programs
//
// This lets you do
// RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
// But new code is better off doing
// RE(pattern,
// RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
RE_Options(int option_flags) : match_limit_(0), match_limit_recursion_(0),
all_options_(option_flags) {}
// we're fine with the default destructor, copy constructor, etc.
// accessors and mutators
int match_limit() const { return match_limit_; };
RE_Options &set_match_limit(int limit) {
match_limit_ = limit;
return *this;
}
int match_limit_recursion() const { return match_limit_recursion_; };
RE_Options &set_match_limit_recursion(int limit) {
match_limit_recursion_ = limit;
return *this;
}
bool caseless() const {
return PCRE_IS_SET(PCRE_CASELESS);
}
RE_Options &set_caseless(bool x) {
PCRE_SET_OR_CLEAR(x, PCRE_CASELESS);
}
bool multiline() const {
return PCRE_IS_SET(PCRE_MULTILINE);
}
RE_Options &set_multiline(bool x) {
PCRE_SET_OR_CLEAR(x, PCRE_MULTILINE);
}
bool dotall() const {
return PCRE_IS_SET(PCRE_DOTALL);
}
RE_Options &set_dotall(bool x) {
PCRE_SET_OR_CLEAR(x, PCRE_DOTALL);
}
bool extended() const {
return PCRE_IS_SET(PCRE_EXTENDED);
}
RE_Options &set_extended(bool x) {
PCRE_SET_OR_CLEAR(x, PCRE_EXTENDED);
}
bool dollar_endonly() const {
return PCRE_IS_SET(PCRE_DOLLAR_ENDONLY);
}
RE_Options &set_dollar_endonly(bool x) {
PCRE_SET_OR_CLEAR(x, PCRE_DOLLAR_ENDONLY);
}
bool extra() const {
return PCRE_IS_SET(PCRE_EXTRA);
}
RE_Options &set_extra(bool x) {
PCRE_SET_OR_CLEAR(x, PCRE_EXTRA);
}
bool ungreedy() const {
return PCRE_IS_SET(PCRE_UNGREEDY);
}
RE_Options &set_ungreedy(bool x) {
PCRE_SET_OR_CLEAR(x, PCRE_UNGREEDY);
}
bool utf8() const {
return PCRE_IS_SET(PCRE_UTF8);
}
RE_Options &set_utf8(bool x) {
PCRE_SET_OR_CLEAR(x, PCRE_UTF8);
}
bool no_auto_capture() const {
return PCRE_IS_SET(PCRE_NO_AUTO_CAPTURE);
}
RE_Options &set_no_auto_capture(bool x) {
PCRE_SET_OR_CLEAR(x, PCRE_NO_AUTO_CAPTURE);
}
RE_Options &set_all_options(int opt) {
all_options_ = opt;
return *this;
}
int all_options() const {
return all_options_ ;
}
// TODO: add other pcre flags
private:
int match_limit_;
int match_limit_recursion_;
int all_options_;
};
// These functions return some common RE_Options
static inline RE_Options UTF8() {
return RE_Options().set_utf8(true);
}
static inline RE_Options CASELESS() {
return RE_Options().set_caseless(true);
}
static inline RE_Options MULTILINE() {
return RE_Options().set_multiline(true);
}
static inline RE_Options DOTALL() {
return RE_Options().set_dotall(true);
}
static inline RE_Options EXTENDED() {
return RE_Options().set_extended(true);
}
// Interface for regular expression matching. Also corresponds to a
// pre-compiled regular expression. An "RE" object is safe for
// concurrent use by multiple threads.
class PCRECPP_EXP_DEFN RE {
public:
// We provide implicit conversions from strings so that users can
// pass in a string or a "const char*" wherever an "RE" is expected.
RE(const string& pat) { Init(pat, NULL); }
RE(const string& pat, const RE_Options& option) { Init(pat, &option); }
RE(const char* pat) { Init(pat, NULL); }
RE(const char* pat, const RE_Options& option) { Init(pat, &option); }
RE(const unsigned char* pat) {
Init(reinterpret_cast<const char*>(pat), NULL);
}
RE(const unsigned char* pat, const RE_Options& option) {
Init(reinterpret_cast<const char*>(pat), &option);
}
// Copy constructor & assignment - note that these are expensive
// because they recompile the expression.
RE(const RE& re) { Init(re.pattern_, &re.options_); }
const RE& operator=(const RE& re) {
if (this != &re) {
Cleanup();
// This is the code that originally came from Google
// Init(re.pattern_.c_str(), &re.options_);
// This is the replacement from Ari Pollak
Init(re.pattern_, &re.options_);
}
return *this;
}
~RE();
// The string specification for this RE. E.g.
// RE re("ab*c?d+");
// re.pattern(); // "ab*c?d+"
const string& pattern() const { return pattern_; }
// If RE could not be created properly, returns an error string.
// Else returns the empty string.
const string& error() const { return *error_; }
/***** The useful part: the matching interface *****/
// This is provided so one can do pattern.ReplaceAll() just as
// easily as ReplaceAll(pattern-text, ....)
bool FullMatch(const StringPiece& text,
const Arg& ptr1 = no_arg,
const Arg& ptr2 = no_arg,
const Arg& ptr3 = no_arg,
const Arg& ptr4 = no_arg,
const Arg& ptr5 = no_arg,
const Arg& ptr6 = no_arg,
const Arg& ptr7 = no_arg,
const Arg& ptr8 = no_arg,
const Arg& ptr9 = no_arg,
const Arg& ptr10 = no_arg,
const Arg& ptr11 = no_arg,
const Arg& ptr12 = no_arg,
const Arg& ptr13 = no_arg,
const Arg& ptr14 = no_arg,
const Arg& ptr15 = no_arg,
const Arg& ptr16 = no_arg) const;
bool PartialMatch(const StringPiece& text,
const Arg& ptr1 = no_arg,
const Arg& ptr2 = no_arg,
const Arg& ptr3 = no_arg,
const Arg& ptr4 = no_arg,
const Arg& ptr5 = no_arg,
const Arg& ptr6 = no_arg,
const Arg& ptr7 = no_arg,
const Arg& ptr8 = no_arg,
const Arg& ptr9 = no_arg,
const Arg& ptr10 = no_arg,
const Arg& ptr11 = no_arg,
const Arg& ptr12 = no_arg,
const Arg& ptr13 = no_arg,
const Arg& ptr14 = no_arg,
const Arg& ptr15 = no_arg,
const Arg& ptr16 = no_arg) const;
bool Consume(StringPiece* input,
const Arg& ptr1 = no_arg,
const Arg& ptr2 = no_arg,
const Arg& ptr3 = no_arg,
const Arg& ptr4 = no_arg,
const Arg& ptr5 = no_arg,
const Arg& ptr6 = no_arg,
const Arg& ptr7 = no_arg,
const Arg& ptr8 = no_arg,
const Arg& ptr9 = no_arg,
const Arg& ptr10 = no_arg,
const Arg& ptr11 = no_arg,
const Arg& ptr12 = no_arg,
const Arg& ptr13 = no_arg,
const Arg& ptr14 = no_arg,
const Arg& ptr15 = no_arg,
const Arg& ptr16 = no_arg) const;
bool FindAndConsume(StringPiece* input,
const Arg& ptr1 = no_arg,
const Arg& ptr2 = no_arg,
const Arg& ptr3 = no_arg,
const Arg& ptr4 = no_arg,
const Arg& ptr5 = no_arg,
const Arg& ptr6 = no_arg,
const Arg& ptr7 = no_arg,
const Arg& ptr8 = no_arg,
const Arg& ptr9 = no_arg,
const Arg& ptr10 = no_arg,
const Arg& ptr11 = no_arg,
const Arg& ptr12 = no_arg,
const Arg& ptr13 = no_arg,
const Arg& ptr14 = no_arg,
const Arg& ptr15 = no_arg,
const Arg& ptr16 = no_arg) const;
bool Replace(const StringPiece& rewrite,
string *str) const;
int GlobalReplace(const StringPiece& rewrite,
string *str) const;
bool Extract(const StringPiece &rewrite,
const StringPiece &text,
string *out) const;
// Escapes all potentially meaningful regexp characters in
// 'unquoted'. The returned string, used as a regular expression,
// will exactly match the original string. For example,
// 1.5-2.0?
// may become:
// 1\.5\-2\.0\?
static string QuoteMeta(const StringPiece& unquoted);
/***** Generic matching interface *****/
// Type of match (TODO: Should be restructured as part of RE_Options)
enum Anchor {
UNANCHORED, // No anchoring
ANCHOR_START, // Anchor at start only
ANCHOR_BOTH // Anchor at start and end
};
// General matching routine. Stores the length of the match in
// "*consumed" if successful.
bool DoMatch(const StringPiece& text,
Anchor anchor,
int* consumed,
const Arg* const* args, int n) const;
// Return the number of capturing subpatterns, or -1 if the
// regexp wasn't valid on construction.
int NumberOfCapturingGroups() const;
// The default value for an argument, to indicate no arg was passed in
static Arg no_arg;
private:
void Init(const string& pattern, const RE_Options* options);
void Cleanup();
// Match against "text", filling in "vec" (up to "vecsize" * 2/3) with
// pairs of integers for the beginning and end positions of matched
// text. The first pair corresponds to the entire matched text;
// subsequent pairs correspond, in order, to parentheses-captured
// matches. Returns the number of pairs (one more than the number of
// the last subpattern with a match) if matching was successful
// and zero if the match failed.
// I.e. for RE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching
// against "foo", "bar", and "baz" respectively.
// When matching RE("(foo)|hello") against "hello", it will return 1.
// But the values for all subpattern are filled in into "vec".
int TryMatch(const StringPiece& text,
int startpos,
Anchor anchor,
int *vec,
int vecsize) const;
// Append the "rewrite" string, with backslash subsitutions from "text"
// and "vec", to string "out".
bool Rewrite(string *out,
const StringPiece& rewrite,
const StringPiece& text,
int *vec,
int veclen) const;
// internal implementation for DoMatch
bool DoMatchImpl(const StringPiece& text,
Anchor anchor,
int* consumed,
const Arg* const args[],
int n,
int* vec,
int vecsize) const;
// Compile the regexp for the specified anchoring mode
pcre* Compile(Anchor anchor);
string pattern_;
RE_Options options_;
pcre* re_full_; // For full matches
pcre* re_partial_; // For partial matches
const string* error_; // Error indicator (or points to empty string)
};
} // namespace pcrecpp
#endif /* _PCRECPP_H */

View file

@ -0,0 +1,68 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/*
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef PCRECPP_INTERNAL_H
#define PCRECPP_INTERNAL_H
/* When compiling a DLL for Windows, the exported symbols have to be declared
using some MS magic. I found some useful information on this web page:
http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
information there, using __declspec(dllexport) without "extern" we have a
definition; with "extern" we have a declaration. The settings here override the
setting in pcre.h. We use:
PCRECPP_EXP_DECL for declarations
PCRECPP_EXP_DEFN for definitions of exported functions
*/
#ifndef PCRECPP_EXP_DECL
# ifdef _WIN32
# ifndef PCRE_STATIC
# define PCRECPP_EXP_DECL extern __declspec(dllexport)
# define PCRECPP_EXP_DEFN __declspec(dllexport)
# else
# define PCRECPP_EXP_DECL extern
# define PCRECPP_EXP_DEFN
# endif
# else
# define PCRECPP_EXP_DECL extern
# define PCRECPP_EXP_DEFN
# endif
#endif
#endif /* PCRECPP_INTERNAL_H */
/* End of pcrecpp_internal.h */

View file

@ -0,0 +1,174 @@
// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: Sanjay Ghemawat
#ifndef _PCRECPPARG_H
#define _PCRECPPARG_H
#include <stdlib.h> // for NULL
#include <string>
#include <pcre.h>
namespace pcrecpp {
class StringPiece;
// Hex/Octal/Binary?
// Special class for parsing into objects that define a ParseFrom() method
template <class T>
class _RE_MatchObject {
public:
static inline bool Parse(const char* str, int n, void* dest) {
if (dest == NULL) return true;
T* object = reinterpret_cast<T*>(dest);
return object->ParseFrom(str, n);
}
};
class PCRECPP_EXP_DEFN Arg {
public:
// Empty constructor so we can declare arrays of Arg
Arg();
// Constructor specially designed for NULL arguments
Arg(void*);
typedef bool (*Parser)(const char* str, int n, void* dest);
// Type-specific parsers
#define PCRE_MAKE_PARSER(type,name) \
Arg(type* p) : arg_(p), parser_(name) { } \
Arg(type* p, Parser parser) : arg_(p), parser_(parser) { }
PCRE_MAKE_PARSER(char, parse_char);
PCRE_MAKE_PARSER(unsigned char, parse_uchar);
PCRE_MAKE_PARSER(short, parse_short);
PCRE_MAKE_PARSER(unsigned short, parse_ushort);
PCRE_MAKE_PARSER(int, parse_int);
PCRE_MAKE_PARSER(unsigned int, parse_uint);
PCRE_MAKE_PARSER(long, parse_long);
PCRE_MAKE_PARSER(unsigned long, parse_ulong);
#if 1
PCRE_MAKE_PARSER(long long, parse_longlong);
#endif
#if 1
PCRE_MAKE_PARSER(unsigned long long, parse_ulonglong);
#endif
PCRE_MAKE_PARSER(float, parse_float);
PCRE_MAKE_PARSER(double, parse_double);
PCRE_MAKE_PARSER(std::string, parse_string);
PCRE_MAKE_PARSER(StringPiece, parse_stringpiece);
#undef PCRE_MAKE_PARSER
// Generic constructor
template <class T> Arg(T*, Parser parser);
// Generic constructor template
template <class T> Arg(T* p)
: arg_(p), parser_(_RE_MatchObject<T>::Parse) {
}
// Parse the data
bool Parse(const char* str, int n) const;
private:
void* arg_;
Parser parser_;
static bool parse_null (const char* str, int n, void* dest);
static bool parse_char (const char* str, int n, void* dest);
static bool parse_uchar (const char* str, int n, void* dest);
static bool parse_float (const char* str, int n, void* dest);
static bool parse_double (const char* str, int n, void* dest);
static bool parse_string (const char* str, int n, void* dest);
static bool parse_stringpiece (const char* str, int n, void* dest);
#define PCRE_DECLARE_INTEGER_PARSER(name) \
private: \
static bool parse_ ## name(const char* str, int n, void* dest); \
static bool parse_ ## name ## _radix( \
const char* str, int n, void* dest, int radix); \
public: \
static bool parse_ ## name ## _hex(const char* str, int n, void* dest); \
static bool parse_ ## name ## _octal(const char* str, int n, void* dest); \
static bool parse_ ## name ## _cradix(const char* str, int n, void* dest)
PCRE_DECLARE_INTEGER_PARSER(short);
PCRE_DECLARE_INTEGER_PARSER(ushort);
PCRE_DECLARE_INTEGER_PARSER(int);
PCRE_DECLARE_INTEGER_PARSER(uint);
PCRE_DECLARE_INTEGER_PARSER(long);
PCRE_DECLARE_INTEGER_PARSER(ulong);
PCRE_DECLARE_INTEGER_PARSER(longlong);
PCRE_DECLARE_INTEGER_PARSER(ulonglong);
#undef PCRE_DECLARE_INTEGER_PARSER
};
inline Arg::Arg() : arg_(NULL), parser_(parse_null) { }
inline Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }
inline bool Arg::Parse(const char* str, int n) const {
return (*parser_)(str, n, arg_);
}
// This part of the parser, appropriate only for ints, deals with bases
#define MAKE_INTEGER_PARSER(type, name) \
inline Arg Hex(type* ptr) { \
return Arg(ptr, Arg::parse_ ## name ## _hex); } \
inline Arg Octal(type* ptr) { \
return Arg(ptr, Arg::parse_ ## name ## _octal); } \
inline Arg CRadix(type* ptr) { \
return Arg(ptr, Arg::parse_ ## name ## _cradix); }
MAKE_INTEGER_PARSER(short, short) /* */
MAKE_INTEGER_PARSER(unsigned short, ushort) /* */
MAKE_INTEGER_PARSER(int, int) /* Don't use semicolons */
MAKE_INTEGER_PARSER(unsigned int, uint) /* after these statement */
MAKE_INTEGER_PARSER(long, long) /* because they can cause */
MAKE_INTEGER_PARSER(unsigned long, ulong) /* compiler warnings if */
#if 1 /* the checking level is */
MAKE_INTEGER_PARSER(long long, longlong) /* turned up high enough. */
#endif /* */
#if 1 /* */
MAKE_INTEGER_PARSER(unsigned long long, ulonglong) /* */
#endif
#undef PCRE_IS_SET
#undef PCRE_SET_OR_CLEAR
#undef MAKE_INTEGER_PARSER
} // namespace pcrecpp
#endif /* _PCRECPPARG_H */

142
Engine/lib/pcre/pcreposix.h Normal file
View file

@ -0,0 +1,142 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
#ifndef _PCREPOSIX_H
#define _PCREPOSIX_H
/* This is the header for the POSIX wrapper interface to the PCRE Perl-
Compatible Regular Expression library. It defines the things POSIX says should
be there. I hope.
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* Have to include stdlib.h in order to ensure that size_t is defined. */
#include <stdlib.h>
/* Allow for C++ users */
#ifdef __cplusplus
extern "C" {
#endif
/* Options, mostly defined by POSIX, but with a couple of extras. */
#define REG_ICASE 0x0001
#define REG_NEWLINE 0x0002
#define REG_NOTBOL 0x0004
#define REG_NOTEOL 0x0008
#define REG_DOTALL 0x0010 /* NOT defined by POSIX. */
#define REG_NOSUB 0x0020
#define REG_UTF8 0x0040 /* NOT defined by POSIX. */
/* This is not used by PCRE, but by defining it we make it easier
to slot PCRE into existing programs that make POSIX calls. */
#define REG_EXTENDED 0
/* Error values. Not all these are relevant or used by the wrapper. */
enum {
REG_ASSERT = 1, /* internal error ? */
REG_BADBR, /* invalid repeat counts in {} */
REG_BADPAT, /* pattern error */
REG_BADRPT, /* ? * + invalid */
REG_EBRACE, /* unbalanced {} */
REG_EBRACK, /* unbalanced [] */
REG_ECOLLATE, /* collation error - not relevant */
REG_ECTYPE, /* bad class */
REG_EESCAPE, /* bad escape sequence */
REG_EMPTY, /* empty expression */
REG_EPAREN, /* unbalanced () */
REG_ERANGE, /* bad range inside [] */
REG_ESIZE, /* expression too big */
REG_ESPACE, /* failed to get memory */
REG_ESUBREG, /* bad back reference */
REG_INVARG, /* bad argument */
REG_NOMATCH /* match failed */
};
/* The structure representing a compiled regular expression. */
typedef struct {
void *re_pcre;
size_t re_nsub;
size_t re_erroffset;
} regex_t;
/* The structure in which a captured offset is returned. */
typedef int regoff_t;
typedef struct {
regoff_t rm_so;
regoff_t rm_eo;
} regmatch_t;
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
export settings are needed, and are set in pcreposix.c before including this
file. */
#if defined(_WIN32) && !defined(PCRE_STATIC) && !defined(PCREPOSIX_EXP_DECL)
# define PCREPOSIX_EXP_DECL extern __declspec(dllimport)
# define PCREPOSIX_EXP_DEFN __declspec(dllimport)
#endif
/* By default, we use the standard "extern" declarations. */
#ifndef PCREPOSIX_EXP_DECL
# ifdef __cplusplus
# define PCREPOSIX_EXP_DECL extern "C"
# define PCREPOSIX_EXP_DEFN extern "C"
# else
# define PCREPOSIX_EXP_DECL extern
# define PCREPOSIX_EXP_DEFN extern
# endif
#endif
/* The functions */
PCREPOSIX_EXP_DECL int regcomp(regex_t *, const char *, int);
PCREPOSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t,
regmatch_t *, int);
PCREPOSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t);
PCREPOSIX_EXP_DECL void regfree(regex_t *);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* End of pcreposix.h */

133
Engine/lib/pcre/ucp.h Normal file
View file

@ -0,0 +1,133 @@
/*************************************************
* Unicode Property Table handler *
*************************************************/
#ifndef _UCP_H
#define _UCP_H
/* This file contains definitions of the property values that are returned by
the function _pcre_ucp_findprop(). New values that are added for new releases
of Unicode should always be at the end of each enum, for backwards
compatibility. */
/* These are the general character categories. */
enum {
ucp_C, /* Other */
ucp_L, /* Letter */
ucp_M, /* Mark */
ucp_N, /* Number */
ucp_P, /* Punctuation */
ucp_S, /* Symbol */
ucp_Z /* Separator */
};
/* These are the particular character types. */
enum {
ucp_Cc, /* Control */
ucp_Cf, /* Format */
ucp_Cn, /* Unassigned */
ucp_Co, /* Private use */
ucp_Cs, /* Surrogate */
ucp_Ll, /* Lower case letter */
ucp_Lm, /* Modifier letter */
ucp_Lo, /* Other letter */
ucp_Lt, /* Title case letter */
ucp_Lu, /* Upper case letter */
ucp_Mc, /* Spacing mark */
ucp_Me, /* Enclosing mark */
ucp_Mn, /* Non-spacing mark */
ucp_Nd, /* Decimal number */
ucp_Nl, /* Letter number */
ucp_No, /* Other number */
ucp_Pc, /* Connector punctuation */
ucp_Pd, /* Dash punctuation */
ucp_Pe, /* Close punctuation */
ucp_Pf, /* Final punctuation */
ucp_Pi, /* Initial punctuation */
ucp_Po, /* Other punctuation */
ucp_Ps, /* Open punctuation */
ucp_Sc, /* Currency symbol */
ucp_Sk, /* Modifier symbol */
ucp_Sm, /* Mathematical symbol */
ucp_So, /* Other symbol */
ucp_Zl, /* Line separator */
ucp_Zp, /* Paragraph separator */
ucp_Zs /* Space separator */
};
/* These are the script identifications. */
enum {
ucp_Arabic,
ucp_Armenian,
ucp_Bengali,
ucp_Bopomofo,
ucp_Braille,
ucp_Buginese,
ucp_Buhid,
ucp_Canadian_Aboriginal,
ucp_Cherokee,
ucp_Common,
ucp_Coptic,
ucp_Cypriot,
ucp_Cyrillic,
ucp_Deseret,
ucp_Devanagari,
ucp_Ethiopic,
ucp_Georgian,
ucp_Glagolitic,
ucp_Gothic,
ucp_Greek,
ucp_Gujarati,
ucp_Gurmukhi,
ucp_Han,
ucp_Hangul,
ucp_Hanunoo,
ucp_Hebrew,
ucp_Hiragana,
ucp_Inherited,
ucp_Kannada,
ucp_Katakana,
ucp_Kharoshthi,
ucp_Khmer,
ucp_Lao,
ucp_Latin,
ucp_Limbu,
ucp_Linear_B,
ucp_Malayalam,
ucp_Mongolian,
ucp_Myanmar,
ucp_New_Tai_Lue,
ucp_Ogham,
ucp_Old_Italic,
ucp_Old_Persian,
ucp_Oriya,
ucp_Osmanya,
ucp_Runic,
ucp_Shavian,
ucp_Sinhala,
ucp_Syloti_Nagri,
ucp_Syriac,
ucp_Tagalog,
ucp_Tagbanwa,
ucp_Tai_Le,
ucp_Tamil,
ucp_Telugu,
ucp_Thaana,
ucp_Thai,
ucp_Tibetan,
ucp_Tifinagh,
ucp_Ugaritic,
ucp_Yi,
ucp_Balinese, /* New for Unicode 5.0.0 */
ucp_Cuneiform, /* New for Unicode 5.0.0 */
ucp_Nko, /* New for Unicode 5.0.0 */
ucp_Phags_Pa, /* New for Unicode 5.0.0 */
ucp_Phoenician /* New for Unicode 5.0.0 */
};
#endif
/* End of ucp.h */

View file

@ -0,0 +1,92 @@
/*************************************************
* Unicode Property Table handler *
*************************************************/
#ifndef _UCPINTERNAL_H
#define _UCPINTERNAL_H
/* Internal header file defining the layout of the bits in each pair of 32-bit
words that form a data item in the table. */
typedef struct cnode {
pcre_uint32 f0;
pcre_uint32 f1;
} cnode;
/* Things for the f0 field */
#define f0_scriptmask 0xff000000 /* Mask for script field */
#define f0_scriptshift 24 /* Shift for script value */
#define f0_rangeflag 0x00f00000 /* Flag for a range item */
#define f0_charmask 0x001fffff /* Mask for code point value */
/* Things for the f1 field */
#define f1_typemask 0xfc000000 /* Mask for char type field */
#define f1_typeshift 26 /* Shift for the type field */
#define f1_rangemask 0x0000ffff /* Mask for a range offset */
#define f1_casemask 0x0000ffff /* Mask for a case offset */
#define f1_caseneg 0xffff8000 /* Bits for negation */
/* The data consists of a vector of structures of type cnode. The two unsigned
32-bit integers are used as follows:
(f0) (1) The most significant byte holds the script number. The numbers are
defined by the enum in ucp.h.
(2) The 0x00800000 bit is set if this entry defines a range of characters.
It is not set if this entry defines a single character
(3) The 0x00600000 bits are spare.
(4) The 0x001fffff bits contain the code point. No Unicode code point will
ever be greater than 0x0010ffff, so this should be OK for ever.
(f1) (1) The 0xfc000000 bits contain the character type number. The numbers are
defined by an enum in ucp.h.
(2) The 0x03ff0000 bits are spare.
(3) The 0x0000ffff bits contain EITHER the unsigned offset to the top of
range if this entry defines a range, OR the *signed* offset to the
character's "other case" partner if this entry defines a single
character. There is no partner if the value is zero.
-------------------------------------------------------------------------------
| script (8) |.|.|.| codepoint (21) || type (6) |.|.| spare (8) | offset (16) |
-------------------------------------------------------------------------------
| | | | |
| | |-> spare | |-> spare
| | |
| |-> spare |-> spare
|
|-> range flag
The upper/lower casing information is set only for characters that come in
pairs. The non-one-to-one mappings in the Unicode data are ignored.
When searching the data, proceed as follows:
(1) Set up for a binary chop search.
(2) If the top is not greater than the bottom, the character is not in the
table. Its type must therefore be "Cn" ("Undefined").
(3) Find the middle vector element.
(4) Extract the code point and compare. If equal, we are done.
(5) If the test character is smaller, set the top to the current point, and
goto (2).
(6) If the current entry defines a range, compute the last character by adding
the offset, and see if the test character is within the range. If it is,
we are done.
(7) Otherwise, set the bottom to one element past the current point and goto
(2).
*/
#endif /* _UCPINTERNAL_H */
/* End of ucpinternal.h */

3088
Engine/lib/pcre/ucptable.h Normal file

File diff suppressed because it is too large Load diff