mirror of
https://github.com/TorqueGameEngines/Torque3D.git
synced 2026-04-29 16:25:42 +00:00
Engine directory for ticket #1
This commit is contained in:
parent
352279af7a
commit
7dbfe6994d
3795 changed files with 1363358 additions and 0 deletions
5
Engine/lib/pcre/changes.txt
Normal file
5
Engine/lib/pcre/changes.txt
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
Don't use bits/type_traits.h on Linux - Andrew Galante, GG 8/2/2009: pcre_stringpiece.h
|
||||
|
||||
Both Mac and Linux support strtoq - Andrew Galante, GG 8/2/2009: config.h
|
||||
|
||||
Neither Mac nor Linux support _strtoi64 - Andrew Galante, GG 8/2/2009: config.h
|
||||
251
Engine/lib/pcre/config.h
Normal file
251
Engine/lib/pcre/config.h
Normal file
|
|
@ -0,0 +1,251 @@
|
|||
/* config.h. Generated from config.h.in by configure. */
|
||||
/* config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
|
||||
/* On Unix-like systems config.h.in is converted by "configure" into config.h.
|
||||
Some other environments also support the use of "configure". PCRE is written in
|
||||
Standard C, but there are a few non-standard things it can cope with, allowing
|
||||
it to run on SunOS4 and other "close to standard" systems.
|
||||
|
||||
If you are going to build PCRE "by hand" on a system without "configure" you
|
||||
should copy the distributed config.h.generic to config.h, and then set up the
|
||||
macro definitions the way you need them. You must then add -DHAVE_CONFIG_H to
|
||||
all of your compile commands, so that config.h is included at the start of
|
||||
every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
|
||||
|
||||
PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
|
||||
them both to 0; an emulation function will be used. */
|
||||
|
||||
/* By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined, this is
|
||||
changed so that backslash-R matches only CR, LF, or CRLF. The build- time
|
||||
default can be overridden by the user of PCRE at runtime. On systems that
|
||||
support it, "configure" can be used to override the default. */
|
||||
/* #undef BSR_ANYCRLF */
|
||||
|
||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro as 1. On systems that can use
|
||||
"configure", this can be done via --enable-ebcdic. */
|
||||
/* #undef EBCDIC */
|
||||
|
||||
/* Define to 1 if you have the `bcopy' function. */
|
||||
/* #undef HAVE_BCOPY */
|
||||
|
||||
/* Define to 1 if you have the <bits/type_traits.h> header file. */
|
||||
#define HAVE_BITS_TYPE_TRAITS_H 1
|
||||
|
||||
/* Define to 1 if you have the <bzlib.h> header file. */
|
||||
/* #undef HAVE_BZLIB_H */
|
||||
|
||||
/* Define to 1 if you have the <dirent.h> header file. */
|
||||
#define HAVE_DIRENT_H 1
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
/* #undef HAVE_DLFCN_H */
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#define HAVE_INTTYPES_H 1
|
||||
|
||||
/* Define to 1 if you have the <limits.h> header file. */
|
||||
#define HAVE_LIMITS_H 1
|
||||
|
||||
/* Define to 1 if the system has the type `long long'. */
|
||||
#define HAVE_LONG_LONG 1
|
||||
|
||||
/* Define to 1 if you have the `memmove' function. */
|
||||
#define HAVE_MEMMOVE 1
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#define HAVE_MEMORY_H 1
|
||||
|
||||
/* Define to 1 if you have the <readline/history.h> header file. */
|
||||
/* #undef HAVE_READLINE_HISTORY_H */
|
||||
|
||||
/* Define to 1 if you have the <readline/readline.h> header file. */
|
||||
/* #undef HAVE_READLINE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#define HAVE_STDINT_H 1
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#define HAVE_STDLIB_H 1
|
||||
|
||||
/* Define to 1 if you have the `strerror' function. */
|
||||
#define HAVE_STRERROR 1
|
||||
|
||||
/* Define to 1 if you have the <string> header file. */
|
||||
#define HAVE_STRING 1
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#define HAVE_STRINGS_H 1
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#define HAVE_STRING_H 1
|
||||
|
||||
/* Define to 1 if you have the `strtoll' function. */
|
||||
#if defined(SN_TARGET_PS3)
|
||||
#define HAVE_STRTOLL 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the `strtoq' function. */
|
||||
/* Both Mac and Linux support strtoq - Andrew Galante, GG 8/2/2009 */
|
||||
#ifdef __GNUC__
|
||||
#define HAVE_STRTOQ 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#define HAVE_SYS_STAT_H 1
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#define HAVE_SYS_TYPES_H 1
|
||||
|
||||
/* Define to 1 if you have the <type_traits.h> header file. */
|
||||
/* #undef HAVE_TYPE_TRAITS_H */
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#define HAVE_UNISTD_H 1
|
||||
|
||||
/* Define to 1 if the system has the type `unsigned long long'. */
|
||||
#define HAVE_UNSIGNED_LONG_LONG 1
|
||||
|
||||
/* Define to 1 if you have the <windows.h> header file. */
|
||||
#define HAVE_WINDOWS_H 1
|
||||
|
||||
/* Define to 1 if you have the <zlib.h> header file. */
|
||||
/* #undef HAVE_ZLIB_H */
|
||||
|
||||
/* Define to 1 if you have the `_strtoi64' function. */
|
||||
/* Neither Mac nor Linux support _strtoi64 - Andrew Galante, GG 8/2/2009 */
|
||||
#ifndef __GNUC__
|
||||
#define HAVE__STRTOI64 1
|
||||
#endif
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
|
||||
for longer patterns in extreme cases. On systems that support it,
|
||||
"configure" can be used to override this default. */
|
||||
#define LINK_SIZE 2
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
internal match() function can be called during a single execution of
|
||||
pcre_exec(). There is a runtime interface for setting a different limit.
|
||||
The limit exists in order to catch runaway regular expressions that take
|
||||
for ever to determine that they do not match. The default is set very large
|
||||
so that it does not accidentally catch legitimate cases. On systems that
|
||||
support it, "configure" can be used to override this default default. */
|
||||
#define MATCH_LIMIT 10000000
|
||||
|
||||
/* The above limit applies to all calls of match(), whether or not they
|
||||
increase the recursion depth. In some environments it is desirable to limit
|
||||
the depth of recursive calls of match() more strictly, in order to restrict
|
||||
the maximum amount of stack (or heap, if NO_RECURSE is defined) that is
|
||||
used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
|
||||
match(). To have any useful effect, it must be less than the value of
|
||||
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
|
||||
a runtime method for setting a different limit. On systems that support it,
|
||||
"configure" can be used to override the default. */
|
||||
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#define MAX_NAME_COUNT 10000
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#define MAX_NAME_SIZE 32
|
||||
|
||||
/* The value of NEWLINE determines the newline character sequence. On systems
|
||||
that support it, "configure" can be used to override the default, which is
|
||||
10. The possible values are 10 (LF), 13 (CR), 3338 (CRLF), -1 (ANY), or -2
|
||||
(ANYCRLF). */
|
||||
#define NEWLINE 10
|
||||
|
||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
||||
This can sometimes be a problem on systems that have stacks of limited
|
||||
size. Define NO_RECURSE to get a version that doesn't use recursion in the
|
||||
match() function; instead it creates its own stack by steam using
|
||||
pcre_recurse_malloc() to obtain memory from the heap. For more detail, see
|
||||
the comments and other stuff just above the match() function. On systems
|
||||
that support it, "configure" can be used to set this in the Makefile (use
|
||||
--disable-stack-for-recursion). */
|
||||
/* #undef NO_RECURSE */
|
||||
|
||||
/* Name of package */
|
||||
#define PACKAGE "pcre"
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#define PACKAGE_BUGREPORT ""
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#define PACKAGE_NAME "PCRE"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE 7.6"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre"
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "7.6"
|
||||
|
||||
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
Win32, and it needs some magic to be inserted before the definition
|
||||
of a function that is exported by the library, define this macro to
|
||||
contain the relevant magic. If you do not define this macro, it
|
||||
defaults to "extern" for a C compiler and "extern C" for a C++
|
||||
compiler on non-Win32 systems. This macro apears at the start of
|
||||
every exported function that is part of the external API. It does
|
||||
not appear on functions that are "external" in the C sense, but
|
||||
which are internal to the library. */
|
||||
/* #undef PCRE_EXP_DEFN */
|
||||
|
||||
/* Define if linking statically (TODO: make nice with Libtool) */
|
||||
#define PCRE_STATIC 1
|
||||
|
||||
/* When calling PCRE via the POSIX interface, additional working storage is
|
||||
required for holding the pointers to capturing substrings because PCRE
|
||||
requires three integers per substring, whereas the POSIX interface provides
|
||||
only two. If the number of expected substrings is small, the wrapper
|
||||
function uses space on the stack, because this is faster than using
|
||||
malloc() for each call. The threshold above which the stack is no longer
|
||||
used is defined by POSIX_MALLOC_THRESHOLD. On systems that support it,
|
||||
"configure" can be used to override this default. */
|
||||
#define POSIX_MALLOC_THRESHOLD 10
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#define STDC_HEADERS 1
|
||||
|
||||
/* Define to allow pcregrep to be linked with libbz2, so that it is able to
|
||||
handle .bz2 files. */
|
||||
/* #undef SUPPORT_LIBBZ2 */
|
||||
|
||||
/* Define to allow pcretest to be linked with libreadline. */
|
||||
/* #undef SUPPORT_LIBREADLINE */
|
||||
|
||||
/* Define to allow pcregrep to be linked with libz, so that it is able to
|
||||
handle .gz files. */
|
||||
/* #undef SUPPORT_LIBZ */
|
||||
|
||||
/* Define to enable support for Unicode properties */
|
||||
/* #undef SUPPORT_UCP */
|
||||
|
||||
/* Define to enable support for the UTF-8 Unicode encoding. */
|
||||
/* #undef SUPPORT_UTF8 */
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "7.6"
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||
/* #undef size_t */
|
||||
BIN
Engine/lib/pcre/lib/mac/libpcre.a
Normal file
BIN
Engine/lib/pcre/lib/mac/libpcre.a
Normal file
Binary file not shown.
BIN
Engine/lib/pcre/lib/mac/libpcrecpp.a
Normal file
BIN
Engine/lib/pcre/lib/mac/libpcrecpp.a
Normal file
Binary file not shown.
BIN
Engine/lib/pcre/lib/mingw/libpcre.a
Normal file
BIN
Engine/lib/pcre/lib/mingw/libpcre.a
Normal file
Binary file not shown.
BIN
Engine/lib/pcre/lib/mingw/libpcrecpp.a
Normal file
BIN
Engine/lib/pcre/lib/mingw/libpcrecpp.a
Normal file
Binary file not shown.
BIN
Engine/lib/pcre/lib/vc8/pcre-d.lib
Normal file
BIN
Engine/lib/pcre/lib/vc8/pcre-d.lib
Normal file
Binary file not shown.
BIN
Engine/lib/pcre/lib/vc8/pcre.lib
Normal file
BIN
Engine/lib/pcre/lib/vc8/pcre.lib
Normal file
Binary file not shown.
BIN
Engine/lib/pcre/lib/vc8/pcrecpp-d.lib
Normal file
BIN
Engine/lib/pcre/lib/vc8/pcrecpp-d.lib
Normal file
Binary file not shown.
BIN
Engine/lib/pcre/lib/vc8/pcrecpp.lib
Normal file
BIN
Engine/lib/pcre/lib/vc8/pcrecpp.lib
Normal file
Binary file not shown.
BIN
Engine/lib/pcre/lib/vc9/pcre-d.lib
Normal file
BIN
Engine/lib/pcre/lib/vc9/pcre-d.lib
Normal file
Binary file not shown.
BIN
Engine/lib/pcre/lib/vc9/pcre.lib
Normal file
BIN
Engine/lib/pcre/lib/vc9/pcre.lib
Normal file
Binary file not shown.
BIN
Engine/lib/pcre/lib/vc9/pcrecpp-d.lib
Normal file
BIN
Engine/lib/pcre/lib/vc9/pcrecpp-d.lib
Normal file
Binary file not shown.
BIN
Engine/lib/pcre/lib/vc9/pcrecpp.lib
Normal file
BIN
Engine/lib/pcre/lib/vc9/pcrecpp.lib
Normal file
Binary file not shown.
303
Engine/lib/pcre/pcre.h
Normal file
303
Engine/lib/pcre/pcre.h
Normal file
|
|
@ -0,0 +1,303 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This is the public header file for the PCRE library, to be #included by
|
||||
applications that call the PCRE functions.
|
||||
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef _PCRE_H
|
||||
#define _PCRE_H
|
||||
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE_MAJOR 7
|
||||
#define PCRE_MINOR 6
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 2008-01-28
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
export setting is defined in pcre_internal.h, which includes this file. So we
|
||||
don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE_STATIC)
|
||||
# ifndef PCRE_EXP_DECL
|
||||
# define PCRE_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
# ifdef __cplusplus
|
||||
# ifndef PCRECPP_EXP_DECL
|
||||
# define PCRECPP_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
# ifndef PCRECPP_EXP_DEFN
|
||||
# define PCRECPP_EXP_DEFN __declspec(dllimport)
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE_EXP_DECL extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
# ifndef PCRECPP_EXP_DECL
|
||||
# define PCRECPP_EXP_DECL extern
|
||||
# endif
|
||||
# ifndef PCRECPP_EXP_DEFN
|
||||
# define PCRECPP_EXP_DEFN
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Have to include stdlib.h in order to ensure that size_t is defined;
|
||||
it is needed here for malloc. */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Allow for C++ users */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Options */
|
||||
|
||||
#define PCRE_CASELESS 0x00000001
|
||||
#define PCRE_MULTILINE 0x00000002
|
||||
#define PCRE_DOTALL 0x00000004
|
||||
#define PCRE_EXTENDED 0x00000008
|
||||
#define PCRE_ANCHORED 0x00000010
|
||||
#define PCRE_DOLLAR_ENDONLY 0x00000020
|
||||
#define PCRE_EXTRA 0x00000040
|
||||
#define PCRE_NOTBOL 0x00000080
|
||||
#define PCRE_NOTEOL 0x00000100
|
||||
#define PCRE_UNGREEDY 0x00000200
|
||||
#define PCRE_NOTEMPTY 0x00000400
|
||||
#define PCRE_UTF8 0x00000800
|
||||
#define PCRE_NO_AUTO_CAPTURE 0x00001000
|
||||
#define PCRE_NO_UTF8_CHECK 0x00002000
|
||||
#define PCRE_AUTO_CALLOUT 0x00004000
|
||||
#define PCRE_PARTIAL 0x00008000
|
||||
#define PCRE_DFA_SHORTEST 0x00010000
|
||||
#define PCRE_DFA_RESTART 0x00020000
|
||||
#define PCRE_FIRSTLINE 0x00040000
|
||||
#define PCRE_DUPNAMES 0x00080000
|
||||
#define PCRE_NEWLINE_CR 0x00100000
|
||||
#define PCRE_NEWLINE_LF 0x00200000
|
||||
#define PCRE_NEWLINE_CRLF 0x00300000
|
||||
#define PCRE_NEWLINE_ANY 0x00400000
|
||||
#define PCRE_NEWLINE_ANYCRLF 0x00500000
|
||||
#define PCRE_BSR_ANYCRLF 0x00800000
|
||||
#define PCRE_BSR_UNICODE 0x01000000
|
||||
|
||||
/* Exec-time and get/set-time error codes */
|
||||
|
||||
#define PCRE_ERROR_NOMATCH (-1)
|
||||
#define PCRE_ERROR_NULL (-2)
|
||||
#define PCRE_ERROR_BADOPTION (-3)
|
||||
#define PCRE_ERROR_BADMAGIC (-4)
|
||||
#define PCRE_ERROR_UNKNOWN_OPCODE (-5)
|
||||
#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */
|
||||
#define PCRE_ERROR_NOMEMORY (-6)
|
||||
#define PCRE_ERROR_NOSUBSTRING (-7)
|
||||
#define PCRE_ERROR_MATCHLIMIT (-8)
|
||||
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
|
||||
#define PCRE_ERROR_BADUTF8 (-10)
|
||||
#define PCRE_ERROR_BADUTF8_OFFSET (-11)
|
||||
#define PCRE_ERROR_PARTIAL (-12)
|
||||
#define PCRE_ERROR_BADPARTIAL (-13)
|
||||
#define PCRE_ERROR_INTERNAL (-14)
|
||||
#define PCRE_ERROR_BADCOUNT (-15)
|
||||
#define PCRE_ERROR_DFA_UITEM (-16)
|
||||
#define PCRE_ERROR_DFA_UCOND (-17)
|
||||
#define PCRE_ERROR_DFA_UMLIMIT (-18)
|
||||
#define PCRE_ERROR_DFA_WSSIZE (-19)
|
||||
#define PCRE_ERROR_DFA_RECURSE (-20)
|
||||
#define PCRE_ERROR_RECURSIONLIMIT (-21)
|
||||
#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */
|
||||
#define PCRE_ERROR_BADNEWLINE (-23)
|
||||
|
||||
/* Request types for pcre_fullinfo() */
|
||||
|
||||
#define PCRE_INFO_OPTIONS 0
|
||||
#define PCRE_INFO_SIZE 1
|
||||
#define PCRE_INFO_CAPTURECOUNT 2
|
||||
#define PCRE_INFO_BACKREFMAX 3
|
||||
#define PCRE_INFO_FIRSTBYTE 4
|
||||
#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
|
||||
#define PCRE_INFO_FIRSTTABLE 5
|
||||
#define PCRE_INFO_LASTLITERAL 6
|
||||
#define PCRE_INFO_NAMEENTRYSIZE 7
|
||||
#define PCRE_INFO_NAMECOUNT 8
|
||||
#define PCRE_INFO_NAMETABLE 9
|
||||
#define PCRE_INFO_STUDYSIZE 10
|
||||
#define PCRE_INFO_DEFAULT_TABLES 11
|
||||
#define PCRE_INFO_OKPARTIAL 12
|
||||
#define PCRE_INFO_JCHANGED 13
|
||||
#define PCRE_INFO_HASCRORLF 14
|
||||
|
||||
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
||||
compatible. */
|
||||
|
||||
#define PCRE_CONFIG_UTF8 0
|
||||
#define PCRE_CONFIG_NEWLINE 1
|
||||
#define PCRE_CONFIG_LINK_SIZE 2
|
||||
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3
|
||||
#define PCRE_CONFIG_MATCH_LIMIT 4
|
||||
#define PCRE_CONFIG_STACKRECURSE 5
|
||||
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
|
||||
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
|
||||
#define PCRE_CONFIG_BSR 8
|
||||
|
||||
/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
|
||||
these bits, just add new ones on the end, in order to remain compatible. */
|
||||
|
||||
#define PCRE_EXTRA_STUDY_DATA 0x0001
|
||||
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
|
||||
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
|
||||
#define PCRE_EXTRA_TABLES 0x0008
|
||||
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
|
||||
|
||||
/* Types */
|
||||
|
||||
struct real_pcre; /* declaration; the definition is private */
|
||||
typedef struct real_pcre pcre;
|
||||
|
||||
/* When PCRE is compiled as a C++ library, the subject pointer type can be
|
||||
replaced with a custom type. For conventional use, the public interface is a
|
||||
const char *. */
|
||||
|
||||
#ifndef PCRE_SPTR
|
||||
#define PCRE_SPTR const char *
|
||||
#endif
|
||||
|
||||
/* The structure for passing additional data to pcre_exec(). This is defined in
|
||||
such as way as to be extensible. Always add new fields at the end, in order to
|
||||
remain compatible. */
|
||||
|
||||
typedef struct pcre_extra {
|
||||
unsigned long int flags; /* Bits for which fields are set */
|
||||
void *study_data; /* Opaque data from pcre_study() */
|
||||
unsigned long int match_limit; /* Maximum number of calls to match() */
|
||||
void *callout_data; /* Data passed back in callouts */
|
||||
const unsigned char *tables; /* Pointer to character tables */
|
||||
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
||||
} pcre_extra;
|
||||
|
||||
/* The structure for passing out data via the pcre_callout_function. We use a
|
||||
structure so that new fields can be added on the end in future versions,
|
||||
without changing the API of the function, thereby allowing old clients to work
|
||||
without modification. */
|
||||
|
||||
typedef struct pcre_callout_block {
|
||||
int version; /* Identifies version of block */
|
||||
/* ------------------------ Version 0 ------------------------------- */
|
||||
int callout_number; /* Number compiled into pattern */
|
||||
int *offset_vector; /* The offset vector */
|
||||
PCRE_SPTR subject; /* The subject being matched */
|
||||
int subject_length; /* The length of the subject */
|
||||
int start_match; /* Offset to start of this match attempt */
|
||||
int current_position; /* Where we currently are in the subject */
|
||||
int capture_top; /* Max current capture */
|
||||
int capture_last; /* Most recently closed capture */
|
||||
void *callout_data; /* Data passed in with the call */
|
||||
/* ------------------- Added for Version 1 -------------------------- */
|
||||
int pattern_position; /* Offset to next item in the pattern */
|
||||
int next_item_length; /* Length of next item in the pattern */
|
||||
/* ------------------------------------------------------------------ */
|
||||
} pcre_callout_block;
|
||||
|
||||
/* Indirection for store get and free functions. These can be set to
|
||||
alternative malloc/free functions if required. Special ones are used in the
|
||||
non-recursive case for "frames". There is also an optional callout function
|
||||
that is triggered by the (?) regex item. For Virtual Pascal, these definitions
|
||||
have to take another form. */
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
PCRE_EXP_DECL void *(*pcre_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre_free)(void *);
|
||||
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre_stack_free)(void *);
|
||||
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
|
||||
#else /* VPCOMPAT */
|
||||
PCRE_EXP_DECL void *pcre_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre_free(void *);
|
||||
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre_stack_free(void *);
|
||||
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
|
||||
#endif /* VPCOMPAT */
|
||||
|
||||
/* Exported PCRE functions */
|
||||
|
||||
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
|
||||
const unsigned char *);
|
||||
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
|
||||
int *, const unsigned char *);
|
||||
PCRE_EXP_DECL int pcre_config(int, void *);
|
||||
PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, char *, int);
|
||||
PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, char *,
|
||||
int);
|
||||
PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
||||
const char *, int, int, int, int *, int , int *, int);
|
||||
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
|
||||
int, int, int, int *, int);
|
||||
PCRE_EXP_DECL void pcre_free_substring(const char *);
|
||||
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
|
||||
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||
void *);
|
||||
PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, const char **);
|
||||
PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
|
||||
PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
|
||||
char **, char **);
|
||||
PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
|
||||
const char **);
|
||||
PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
|
||||
const char ***);
|
||||
PCRE_EXP_DECL int pcre_info(const pcre *, int *, int *);
|
||||
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
|
||||
PCRE_EXP_DECL int pcre_refcount(pcre *, int);
|
||||
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||
PCRE_EXP_DECL const char *pcre_version(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* End of pcre.h */
|
||||
193
Engine/lib/pcre/pcre_chartables.c
Normal file
193
Engine/lib/pcre/pcre_chartables.c
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This file was automatically written by the dftables auxiliary
|
||||
program. It contains character tables that are used when no external
|
||||
tables are passed to PCRE by the application that calls it. The tables
|
||||
are used only for characters whose code values are less than 256.
|
||||
|
||||
The following #includes are present because without them gcc 4.x may remove
|
||||
the array definition from the final binary if PCRE is built into a static
|
||||
library and dead code stripping is activated. This leads to link errors.
|
||||
Pulling in the header ensures that the array gets flagged as "someone
|
||||
outside this compilation unit might reference this" and so it will always
|
||||
be supplied to the linker. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
const unsigned char _pcre_default_tables[] = {
|
||||
|
||||
/* This table is a lower casing table. */
|
||||
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122, 91, 92, 93, 94, 95,
|
||||
96, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,
|
||||
136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,
|
||||
152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,
|
||||
168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,
|
||||
184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,
|
||||
200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,
|
||||
216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,
|
||||
232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table is a case flipping table. */
|
||||
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122, 91, 92, 93, 94, 95,
|
||||
96, 65, 66, 67, 68, 69, 70, 71,
|
||||
72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87,
|
||||
88, 89, 90,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,
|
||||
136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,
|
||||
152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,
|
||||
168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,
|
||||
184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,
|
||||
200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,
|
||||
216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,
|
||||
232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table contains bit maps for various character classes.
|
||||
Each map is 32 bytes long and the bits run from the least
|
||||
significant end of each byte. The classes that have their own
|
||||
maps are: space, xdigit, digit, upper, lower, word, graph
|
||||
print, punct, and cntrl. Other classes are built from combinations. */
|
||||
|
||||
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
|
||||
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
/* This table identifies various classes of character by individual bits:
|
||||
0x01 white space character
|
||||
0x02 letter
|
||||
0x04 decimal digit
|
||||
0x08 hexadecimal digit
|
||||
0x10 alphanumeric or '_'
|
||||
0x80 regular expression metacharacter or binary zero
|
||||
*/
|
||||
|
||||
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
||||
0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
||||
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
|
||||
0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
|
||||
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
|
||||
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
|
||||
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
|
||||
0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
|
||||
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
|
||||
0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
||||
|
||||
/* End of pcre_chartables.c */
|
||||
6221
Engine/lib/pcre/pcre_compile.c
Normal file
6221
Engine/lib/pcre/pcre_compile.c
Normal file
File diff suppressed because it is too large
Load diff
128
Engine/lib/pcre/pcre_config.c
Normal file
128
Engine/lib/pcre/pcre_config.c
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_config(). */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about what features are configured *
|
||||
*************************************************/
|
||||
|
||||
/* This function has an extensible interface so that additional items can be
|
||||
added compatibly.
|
||||
|
||||
Arguments:
|
||||
what what information is required
|
||||
where where to put the information
|
||||
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN int
|
||||
pcre_config(int what, void *where)
|
||||
{
|
||||
switch (what)
|
||||
{
|
||||
case PCRE_CONFIG_UTF8:
|
||||
#ifdef SUPPORT_UTF8
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_UNICODE_PROPERTIES:
|
||||
#ifdef SUPPORT_UCP
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_NEWLINE:
|
||||
*((int *)where) = NEWLINE;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_BSR:
|
||||
#ifdef BSR_ANYCRLF
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_LINK_SIZE:
|
||||
*((int *)where) = LINK_SIZE;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
|
||||
*((int *)where) = POSIX_MALLOC_THRESHOLD;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_MATCH_LIMIT:
|
||||
*((unsigned int *)where) = MATCH_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
|
||||
*((unsigned int *)where) = MATCH_LIMIT_RECURSION;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_STACKRECURSE:
|
||||
#ifdef NO_RECURSE
|
||||
*((int *)where) = 0;
|
||||
#else
|
||||
*((int *)where) = 1;
|
||||
#endif
|
||||
break;
|
||||
|
||||
default: return PCRE_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre_config.c */
|
||||
2896
Engine/lib/pcre/pcre_dfa_exec.c
Normal file
2896
Engine/lib/pcre/pcre_dfa_exec.c
Normal file
File diff suppressed because it is too large
Load diff
4940
Engine/lib/pcre/pcre_exec.c
Normal file
4940
Engine/lib/pcre/pcre_exec.c
Normal file
File diff suppressed because it is too large
Load diff
165
Engine/lib/pcre/pcre_fullinfo.c
Normal file
165
Engine/lib/pcre/pcre_fullinfo.c
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_fullinfo(), which returns
|
||||
information about a compiled pattern. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about compiled pattern *
|
||||
*************************************************/
|
||||
|
||||
/* This is a newer "info" function which has an extensible interface so
|
||||
that additional items can be added compatibly.
|
||||
|
||||
Arguments:
|
||||
argument_re points to compiled code
|
||||
extra_data points extra data, or NULL
|
||||
what what information is required
|
||||
where where to put the information
|
||||
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN int
|
||||
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
|
||||
void *where)
|
||||
{
|
||||
real_pcre internal_re;
|
||||
pcre_study_data internal_study;
|
||||
const real_pcre *re = (const real_pcre *)argument_re;
|
||||
const pcre_study_data *study = NULL;
|
||||
|
||||
if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
|
||||
|
||||
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
||||
study = (const pcre_study_data *)extra_data->study_data;
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER)
|
||||
{
|
||||
re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
|
||||
if (re == NULL) return PCRE_ERROR_BADMAGIC;
|
||||
if (study != NULL) study = &internal_study;
|
||||
}
|
||||
|
||||
switch (what)
|
||||
{
|
||||
case PCRE_INFO_OPTIONS:
|
||||
*((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_SIZE:
|
||||
*((size_t *)where) = re->size;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_STUDYSIZE:
|
||||
*((size_t *)where) = (study == NULL)? 0 : study->size;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_CAPTURECOUNT:
|
||||
*((int *)where) = re->top_bracket;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_BACKREFMAX:
|
||||
*((int *)where) = re->top_backref;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_FIRSTBYTE:
|
||||
*((int *)where) =
|
||||
((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
|
||||
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||
break;
|
||||
|
||||
/* Make sure we pass back the pointer to the bit vector in the external
|
||||
block, not the internal copy (with flipped integer fields). */
|
||||
|
||||
case PCRE_INFO_FIRSTTABLE:
|
||||
*((const uschar **)where) =
|
||||
(study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?
|
||||
((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_LASTLITERAL:
|
||||
*((int *)where) =
|
||||
((re->flags & PCRE_REQCHSET) != 0)? re->req_byte : -1;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMEENTRYSIZE:
|
||||
*((int *)where) = re->name_entry_size;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMECOUNT:
|
||||
*((int *)where) = re->name_count;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMETABLE:
|
||||
*((const uschar **)where) = (const uschar *)re + re->name_table_offset;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_DEFAULT_TABLES:
|
||||
*((const uschar **)where) = (const uschar *)(_pcre_default_tables);
|
||||
break;
|
||||
|
||||
case PCRE_INFO_OKPARTIAL:
|
||||
*((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_JCHANGED:
|
||||
*((int *)where) = (re->flags & PCRE_JCHANGED) != 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_HASCRORLF:
|
||||
*((int *)where) = (re->flags & PCRE_HASCRORLF) != 0;
|
||||
break;
|
||||
|
||||
default: return PCRE_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre_fullinfo.c */
|
||||
465
Engine/lib/pcre/pcre_get.c
Normal file
465
Engine/lib/pcre/pcre_get.c
Normal file
|
|
@ -0,0 +1,465 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains some convenience functions for extracting substrings
|
||||
from the subject string after a regex match has succeeded. The original idea
|
||||
for these functions came from Scott Wimer. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find number for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is used by the get_first_set() function below, as well
|
||||
as being generally available. It assumes that names are unique.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose number is required
|
||||
|
||||
Returns: the number of the named parentheses, or a negative number
|
||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_stringnumber(const pcre *code, const char *stringname)
|
||||
{
|
||||
int rc;
|
||||
int entrysize;
|
||||
int top, bot;
|
||||
uschar *nametable;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
|
||||
bot = 0;
|
||||
while (top > bot)
|
||||
{
|
||||
int mid = (top + bot) / 2;
|
||||
uschar *entry = nametable + entrysize*mid;
|
||||
int c = strcmp(stringname, (char *)(entry + 2));
|
||||
if (c == 0) return (entry[0] << 8) + entry[1];
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find (multiple) entries for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This is used by the get_first_set() function below, as well as being
|
||||
generally available. It is used when duplicated names are permitted.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose entries required
|
||||
firstptr where to put the pointer to the first entry
|
||||
lastptr where to put the pointer to the last entry
|
||||
|
||||
Returns: the length of each entry, or a negative number
|
||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
|
||||
char **firstptr, char **lastptr)
|
||||
{
|
||||
int rc;
|
||||
int entrysize;
|
||||
int top, bot;
|
||||
uschar *nametable, *lastentry;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
|
||||
lastentry = nametable + entrysize * (top - 1);
|
||||
bot = 0;
|
||||
while (top > bot)
|
||||
{
|
||||
int mid = (top + bot) / 2;
|
||||
uschar *entry = nametable + entrysize*mid;
|
||||
int c = strcmp(stringname, (char *)(entry + 2));
|
||||
if (c == 0)
|
||||
{
|
||||
uschar *first = entry;
|
||||
uschar *last = entry;
|
||||
while (first > nametable)
|
||||
{
|
||||
if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
|
||||
first -= entrysize;
|
||||
}
|
||||
while (last < lastentry)
|
||||
{
|
||||
if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
|
||||
last += entrysize;
|
||||
}
|
||||
*firstptr = (char *)first;
|
||||
*lastptr = (char *)last;
|
||||
return entrysize;
|
||||
}
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find first set of multiple named strings *
|
||||
*************************************************/
|
||||
|
||||
/* This function allows for duplicate names in the table of named substrings.
|
||||
It returns the number of the first one that was set in a pattern match.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name of the capturing substring
|
||||
ovector the vector of matched substrings
|
||||
|
||||
Returns: the number of the first that is set,
|
||||
or the number of the last one if none are set,
|
||||
or a negative number on error
|
||||
*/
|
||||
|
||||
static int
|
||||
get_first_set(const pcre *code, const char *stringname, int *ovector)
|
||||
{
|
||||
const real_pcre *re = (const real_pcre *)code;
|
||||
int entrysize;
|
||||
char *first, *last;
|
||||
uschar *entry;
|
||||
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
|
||||
return pcre_get_stringnumber(code, stringname);
|
||||
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
|
||||
if (entrysize <= 0) return entrysize;
|
||||
for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
|
||||
{
|
||||
int n = (entry[0] << 8) + entry[1];
|
||||
if (ovector[n*2] >= 0) return n;
|
||||
}
|
||||
return (first[0] << 8) + first[1];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer.
|
||||
Note that we use memcpy() rather than strncpy() in case there are binary zeros
|
||||
in the string.
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringnumber the number of the required substring
|
||||
buffer where to put the substring
|
||||
size the size of the buffer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
|
||||
int stringnumber, char *buffer, int size)
|
||||
{
|
||||
int yield;
|
||||
if (stringnumber < 0 || stringnumber >= stringcount)
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
stringnumber *= 2;
|
||||
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
||||
if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
|
||||
memcpy(buffer, subject + ovector[stringnumber], yield);
|
||||
buffer[yield] = 0;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy named captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by name. If the regex permits duplicate names, the first
|
||||
substring that is set is chosen.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringname the name of the required substring
|
||||
buffer where to put the substring
|
||||
size the size of the buffer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||
int stringcount, const char *stringname, char *buffer, int size)
|
||||
{
|
||||
int n = get_first_set(code, stringname, ovector);
|
||||
if (n <= 0) return n;
|
||||
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy all captured strings to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function gets one chunk of store and builds a list of pointers and all
|
||||
of the captured substrings in it. A NULL pointer is put on the end of the list.
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
listptr set to point to the list of pointers
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
|
||||
const char ***listptr)
|
||||
{
|
||||
int i;
|
||||
int size = sizeof(char *);
|
||||
int double_count = stringcount * 2;
|
||||
char **stringlist;
|
||||
char *p;
|
||||
|
||||
for (i = 0; i < double_count; i += 2)
|
||||
size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
|
||||
|
||||
stringlist = (char **)(pcre_malloc)(size);
|
||||
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
|
||||
|
||||
*listptr = (const char **)stringlist;
|
||||
p = (char *)(stringlist + stringcount + 1);
|
||||
|
||||
for (i = 0; i < double_count; i += 2)
|
||||
{
|
||||
int len = ovector[i+1] - ovector[i];
|
||||
memcpy(p, subject + ovector[i], len);
|
||||
*stringlist++ = p;
|
||||
p += len;
|
||||
*p++ = 0;
|
||||
}
|
||||
|
||||
*stringlist = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free store obtained by get_substring_list *
|
||||
*************************************************/
|
||||
|
||||
/* This function exists for the benefit of people calling PCRE from non-C
|
||||
programs that can call its functions, but not free() or (pcre_free)() directly.
|
||||
|
||||
Argument: the result of a previous pcre_get_substring_list()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
void
|
||||
pcre_free_substring_list(const char **pointer)
|
||||
{
|
||||
(pcre_free)((void *)pointer);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy captured string to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a piece of new
|
||||
store
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringnumber the number of the required substring
|
||||
stringptr where to put a pointer to the substring
|
||||
|
||||
Returns: if successful:
|
||||
the length of the string, not including the zero that
|
||||
is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
||||
PCRE_ERROR_NOSUBSTRING (-7) substring not present
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_substring(const char *subject, int *ovector, int stringcount,
|
||||
int stringnumber, const char **stringptr)
|
||||
{
|
||||
int yield;
|
||||
char *substring;
|
||||
if (stringnumber < 0 || stringnumber >= stringcount)
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
stringnumber *= 2;
|
||||
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
||||
substring = (char *)(pcre_malloc)(yield + 1);
|
||||
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
|
||||
memcpy(substring, subject + ovector[stringnumber], yield);
|
||||
substring[yield] = 0;
|
||||
*stringptr = substring;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy named captured string to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring, identified by name, into
|
||||
new store. If the regex permits duplicate names, the first substring that is
|
||||
set is chosen.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringname the name of the required substring
|
||||
stringptr where to put the pointer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) couldn't get memory
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||
int stringcount, const char *stringname, const char **stringptr)
|
||||
{
|
||||
int n = get_first_set(code, stringname, ovector);
|
||||
if (n <= 0) return n;
|
||||
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free store obtained by get_substring *
|
||||
*************************************************/
|
||||
|
||||
/* This function exists for the benefit of people calling PCRE from non-C
|
||||
programs that can call its functions, but not free() or (pcre_free)() directly.
|
||||
|
||||
Argument: the result of a previous pcre_get_substring()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
void
|
||||
pcre_free_substring(const char *pointer)
|
||||
{
|
||||
(pcre_free)((void *)pointer);
|
||||
}
|
||||
|
||||
/* End of pcre_get.c */
|
||||
63
Engine/lib/pcre/pcre_globals.c
Normal file
63
Engine/lib/pcre/pcre_globals.c
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains global variables that are exported by the PCRE library.
|
||||
PCRE is thread-clean and doesn't use any global variables in the normal sense.
|
||||
However, it calls memory allocation and freeing functions via the four
|
||||
indirections below, and it can optionally do callouts, using the fifth
|
||||
indirection. These values can be changed by the caller, but are shared between
|
||||
all threads. However, when compiling for Virtual Pascal, things are done
|
||||
differently, and global variables are not used (see pcre.in). */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = malloc;
|
||||
PCRE_EXP_DATA_DEFN void (*pcre_free)(void *) = free;
|
||||
PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = malloc;
|
||||
PCRE_EXP_DATA_DEFN void (*pcre_stack_free)(void *) = free;
|
||||
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
#endif
|
||||
|
||||
/* End of pcre_globals.c */
|
||||
93
Engine/lib/pcre/pcre_info.c
Normal file
93
Engine/lib/pcre/pcre_info.c
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_info(), which gives some
|
||||
information about a compiled pattern. However, use of this function is now
|
||||
deprecated, as it has been superseded by pcre_fullinfo(). */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* (Obsolete) Return info about compiled pattern *
|
||||
*************************************************/
|
||||
|
||||
/* This is the original "info" function. It picks potentially useful data out
|
||||
of the private structure, but its interface was too rigid. It remains for
|
||||
backwards compatibility. The public options are passed back in an int - though
|
||||
the re->options field has been expanded to a long int, all the public options
|
||||
at the low end of it, and so even on 16-bit systems this will still be OK.
|
||||
Therefore, I haven't changed the API for pcre_info().
|
||||
|
||||
Arguments:
|
||||
argument_re points to compiled code
|
||||
optptr where to pass back the options
|
||||
first_byte where to pass back the first character,
|
||||
or -1 if multiline and all branches start ^,
|
||||
or -2 otherwise
|
||||
|
||||
Returns: number of capturing subpatterns
|
||||
or negative values on error
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN int
|
||||
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
|
||||
{
|
||||
real_pcre internal_re;
|
||||
const real_pcre *re = (const real_pcre *)argument_re;
|
||||
if (re == NULL) return PCRE_ERROR_NULL;
|
||||
if (re->magic_number != MAGIC_NUMBER)
|
||||
{
|
||||
re = _pcre_try_flipped(re, &internal_re, NULL, NULL);
|
||||
if (re == NULL) return PCRE_ERROR_BADMAGIC;
|
||||
}
|
||||
if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
|
||||
if (first_byte != NULL)
|
||||
*first_byte = ((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
|
||||
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||
return re->top_bracket;
|
||||
}
|
||||
|
||||
/* End of pcre_info.c */
|
||||
1126
Engine/lib/pcre/pcre_internal.h
Normal file
1126
Engine/lib/pcre/pcre_internal.h
Normal file
File diff suppressed because it is too large
Load diff
143
Engine/lib/pcre/pcre_maketables.c
Normal file
143
Engine/lib/pcre/pcre_maketables.c
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_maketables(), which builds
|
||||
character tables for PCRE in the current locale. The file is compiled on its
|
||||
own as part of the PCRE library. However, it is also included in the
|
||||
compilation of dftables.c, in which case the macro DFTABLES is defined. */
|
||||
|
||||
|
||||
#ifndef DFTABLES
|
||||
# ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
# endif
|
||||
# include "pcre_internal.h"
|
||||
#endif
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create PCRE character tables *
|
||||
*************************************************/
|
||||
|
||||
/* This function builds a set of character tables for use by PCRE and returns
|
||||
a pointer to them. They are build using the ctype functions, and consequently
|
||||
their contents will depend upon the current locale setting. When compiled as
|
||||
part of the library, the store is obtained via pcre_malloc(), but when compiled
|
||||
inside dftables, use malloc().
|
||||
|
||||
Arguments: none
|
||||
Returns: pointer to the contiguous block of data
|
||||
*/
|
||||
|
||||
const unsigned char *
|
||||
pcre_maketables(void)
|
||||
{
|
||||
unsigned char *yield, *p;
|
||||
int i;
|
||||
|
||||
#ifndef DFTABLES
|
||||
yield = (unsigned char*)(pcre_malloc)(tables_length);
|
||||
#else
|
||||
yield = (unsigned char*)malloc(tables_length);
|
||||
#endif
|
||||
|
||||
if (yield == NULL) return NULL;
|
||||
p = yield;
|
||||
|
||||
/* First comes the lower casing table */
|
||||
|
||||
for (i = 0; i < 256; i++) *p++ = tolower(i);
|
||||
|
||||
/* Next the case-flipping table */
|
||||
|
||||
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
|
||||
|
||||
/* Then the character class tables. Don't try to be clever and save effort on
|
||||
exclusive ones - in some locales things may be different. Note that the table
|
||||
for "space" includes everything "isspace" gives, including VT in the default
|
||||
locale. This makes it work for the POSIX class [:space:]. Note also that it is
|
||||
possible for a character to be alnum or alpha without being lower or upper,
|
||||
such as "male and female ordinals" (\xAA and \xBA) in the fr_FR locale (at
|
||||
least under Debian Linux's locales as of 12/2005). So we must test for alnum
|
||||
specially. */
|
||||
|
||||
memset(p, 0, cbit_length);
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7);
|
||||
if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7);
|
||||
if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7);
|
||||
if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7);
|
||||
if (i == '_') p[cbit_word + i/8] |= 1 << (i&7);
|
||||
if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7);
|
||||
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
|
||||
if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7);
|
||||
if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7);
|
||||
if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7);
|
||||
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7);
|
||||
}
|
||||
p += cbit_length;
|
||||
|
||||
/* Finally, the character type table. In this, we exclude VT from the white
|
||||
space chars, because Perl doesn't recognize it as such for \s and for comments
|
||||
within regexes. */
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
int x = 0;
|
||||
if (i != 0x0b && isspace(i)) x += ctype_space;
|
||||
if (isalpha(i)) x += ctype_letter;
|
||||
if (isdigit(i)) x += ctype_digit;
|
||||
if (isxdigit(i)) x += ctype_xdigit;
|
||||
if (isalnum(i) || i == '_') x += ctype_word;
|
||||
|
||||
/* Note: strchr includes the terminating zero in the characters it considers.
|
||||
In this instance, that is ok because we want binary zero to be flagged as a
|
||||
meta-character, which in this sense is any character that terminates a run
|
||||
of data characters. */
|
||||
|
||||
if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
|
||||
*p++ = x;
|
||||
}
|
||||
|
||||
return yield;
|
||||
}
|
||||
|
||||
/* End of pcre_maketables.c */
|
||||
164
Engine/lib/pcre/pcre_newline.c
Normal file
164
Engine/lib/pcre/pcre_newline.c
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains internal functions for testing newlines when more than
|
||||
one kind of newline is to be recognized. When a newline is found, its length is
|
||||
returned. In principle, we could implement several newline "types", each
|
||||
referring to a different set of newline characters. At present, PCRE supports
|
||||
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
|
||||
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
|
||||
http://unicode.org/unicode/reports/tr18/. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at given position *
|
||||
*************************************************/
|
||||
|
||||
/* It is guaranteed that the initial value of ptr is less than the end of the
|
||||
string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
endptr pointer to the end of the string
|
||||
lenptr where to return the length
|
||||
utf8 TRUE if in utf8 mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
_pcre_is_newline(const uschar *ptr, int type, const uschar *endptr,
|
||||
int *lenptr, BOOL utf8)
|
||||
{
|
||||
int c;
|
||||
if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case 0x000a: *lenptr = 1; return TRUE; /* LF */
|
||||
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
|
||||
return TRUE; /* CR */
|
||||
default: return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
case 0x000a: /* LF */
|
||||
case 0x000b: /* VT */
|
||||
case 0x000c: *lenptr = 1; return TRUE; /* FF */
|
||||
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
|
||||
return TRUE; /* CR */
|
||||
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||
default: return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at previous position *
|
||||
*************************************************/
|
||||
|
||||
/* It is guaranteed that the initial value of ptr is greater than the start of
|
||||
the string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
startptr pointer to the start of the string
|
||||
lenptr where to return the length
|
||||
utf8 TRUE if in utf8 mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
_pcre_was_newline(const uschar *ptr, int type, const uschar *startptr,
|
||||
int *lenptr, BOOL utf8)
|
||||
{
|
||||
int c;
|
||||
ptr--;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
BACKCHAR(ptr);
|
||||
GETCHAR(c, ptr);
|
||||
}
|
||||
else c = *ptr;
|
||||
#else /* no UTF-8 support */
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UTF8 */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
|
||||
return TRUE; /* LF */
|
||||
case 0x000d: *lenptr = 1; return TRUE; /* CR */
|
||||
default: return FALSE;
|
||||
}
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
|
||||
return TRUE; /* LF */
|
||||
case 0x000b: /* VT */
|
||||
case 0x000c: /* FF */
|
||||
case 0x000d: *lenptr = 1; return TRUE; /* CR */
|
||||
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||
default: return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre_newline.c */
|
||||
85
Engine/lib/pcre/pcre_ord2utf8.c
Normal file
85
Engine/lib/pcre/pcre_ord2utf8.c
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This file contains a private PCRE function that converts an ordinal
|
||||
character value into a UTF8 string. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert character value to UTF-8 *
|
||||
*************************************************/
|
||||
|
||||
/* This function takes an integer value in the range 0 - 0x7fffffff
|
||||
and encodes it as a UTF-8 character in 0 to 6 bytes.
|
||||
|
||||
Arguments:
|
||||
cvalue the character value
|
||||
buffer pointer to buffer for result - at least 6 bytes long
|
||||
|
||||
Returns: number of characters placed in the buffer
|
||||
*/
|
||||
|
||||
int
|
||||
_pcre_ord2utf8(int cvalue, uschar *buffer)
|
||||
{
|
||||
#ifdef SUPPORT_UTF8
|
||||
register int i, j;
|
||||
for (i = 0; i < _pcre_utf8_table1_size; i++)
|
||||
if (cvalue <= _pcre_utf8_table1[i]) break;
|
||||
buffer += i;
|
||||
for (j = i; j > 0; j--)
|
||||
{
|
||||
*buffer-- = 0x80 | (cvalue & 0x3f);
|
||||
cvalue >>= 6;
|
||||
}
|
||||
*buffer = _pcre_utf8_table2[i] | cvalue;
|
||||
return i + 1;
|
||||
#else
|
||||
return 0; /* Keep compiler happy; this function won't ever be */
|
||||
#endif /* called when SUPPORT_UTF8 is not defined. */
|
||||
}
|
||||
|
||||
/* End of pcre_ord2utf8.c */
|
||||
512
Engine/lib/pcre/pcre_printint.src
Normal file
512
Engine/lib/pcre/pcre_printint.src
Normal file
|
|
@ -0,0 +1,512 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains a PCRE private debugging function for printing out the
|
||||
internal form of a compiled regular expression, along with some supporting
|
||||
local functions. This source file is used in two places:
|
||||
|
||||
(1) It is #included by pcre_compile.c when it is compiled in debugging mode
|
||||
(DEBUG defined in pcre_internal.h). It is not included in production compiles.
|
||||
|
||||
(2) It is always #included by pcretest.c, which can be asked to print out a
|
||||
compiled regex for debugging purposes. */
|
||||
|
||||
|
||||
/* Macro that decides whether a character should be output as a literal or in
|
||||
hexadecimal. We don't use isprint() because that can vary from system to system
|
||||
(even without the use of locales) and we want the output always to be the same,
|
||||
for testing purposes. This macro is used in pcretest as well as in this file. */
|
||||
|
||||
#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
|
||||
|
||||
/* The table of operator names. */
|
||||
|
||||
static const char *OP_names[] = { OP_NAME_LIST };
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print single- or multi-byte character *
|
||||
*************************************************/
|
||||
|
||||
static int
|
||||
print_char(FILE *f, uschar *ptr, BOOL utf8)
|
||||
{
|
||||
int c = *ptr;
|
||||
|
||||
#ifndef SUPPORT_UTF8
|
||||
utf8 = utf8; /* Avoid compiler warning */
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
|
||||
return 0;
|
||||
|
||||
#else
|
||||
if (!utf8 || (c & 0xc0) != 0xc0)
|
||||
{
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||
int s = 6*a;
|
||||
c = (c & _pcre_utf8_table3[a]) << s;
|
||||
for (i = 1; i <= a; i++)
|
||||
{
|
||||
/* This is a check for malformed UTF-8; it should only occur if the sanity
|
||||
check has been turned off. Rather than swallow random bytes, just stop if
|
||||
we hit a bad one. Print it with \X instead of \x as an indication. */
|
||||
|
||||
if ((ptr[i] & 0xc0) != 0x80)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c);
|
||||
return i - 1;
|
||||
}
|
||||
|
||||
/* The byte is OK */
|
||||
|
||||
s -= 6;
|
||||
c |= (ptr[i] & 0x3f) << s;
|
||||
}
|
||||
if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
|
||||
return a;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find Unicode property name *
|
||||
*************************************************/
|
||||
|
||||
static const char *
|
||||
get_ucpname(int ptype, int pvalue)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
int i;
|
||||
for (i = _pcre_utt_size - 1; i >= 0; i--)
|
||||
{
|
||||
if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
|
||||
}
|
||||
return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
|
||||
#else
|
||||
/* It gets harder and harder to shut off unwanted compiler warnings. */
|
||||
ptype = ptype * pvalue;
|
||||
return (ptype == pvalue)? "??" : "??";
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print compiled regex *
|
||||
*************************************************/
|
||||
|
||||
/* Make this function work for a regex with integers either byte order.
|
||||
However, we assume that what we are passed is a compiled regex. The
|
||||
print_lengths flag controls whether offsets and lengths of items are printed.
|
||||
They can be turned off from pcretest so that automatic tests on bytecode can be
|
||||
written that do not depend on the value of LINK_SIZE. */
|
||||
|
||||
static void
|
||||
pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
|
||||
{
|
||||
real_pcre *re = (real_pcre *)external_re;
|
||||
uschar *codestart, *code;
|
||||
BOOL utf8;
|
||||
|
||||
unsigned int options = re->options;
|
||||
int offset = re->name_table_offset;
|
||||
int count = re->name_count;
|
||||
int size = re->name_entry_size;
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER)
|
||||
{
|
||||
offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
|
||||
count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
|
||||
size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
|
||||
options = ((options << 24) & 0xff000000) |
|
||||
((options << 8) & 0x00ff0000) |
|
||||
((options >> 8) & 0x0000ff00) |
|
||||
((options >> 24) & 0x000000ff);
|
||||
}
|
||||
|
||||
code = codestart = (uschar *)re + offset + count * size;
|
||||
utf8 = (options & PCRE_UTF8) != 0;
|
||||
|
||||
for(;;)
|
||||
{
|
||||
uschar *ccode;
|
||||
int c;
|
||||
int extra = 0;
|
||||
|
||||
if (print_lengths)
|
||||
fprintf(f, "%3d ", (int)(code - codestart));
|
||||
else
|
||||
fprintf(f, " ");
|
||||
|
||||
switch(*code)
|
||||
{
|
||||
case OP_END:
|
||||
fprintf(f, " %s\n", OP_names[*code]);
|
||||
fprintf(f, "------------------------------------------------------------------\n");
|
||||
return;
|
||||
|
||||
case OP_OPT:
|
||||
fprintf(f, " %.2x %s", code[1], OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CHAR:
|
||||
fprintf(f, " ");
|
||||
do
|
||||
{
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf8);
|
||||
}
|
||||
while (*code == OP_CHAR);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
|
||||
case OP_CHARNC:
|
||||
fprintf(f, " NC ");
|
||||
do
|
||||
{
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf8);
|
||||
}
|
||||
while (*code == OP_CHARNC);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
|
||||
case OP_CBRA:
|
||||
case OP_SCBRA:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_BRA:
|
||||
case OP_SBRA:
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRMIN:
|
||||
case OP_ALT:
|
||||
case OP_KET:
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ONCE:
|
||||
case OP_COND:
|
||||
case OP_SCOND:
|
||||
case OP_REVERSE:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CREF:
|
||||
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_RREF:
|
||||
c = GET2(code, 1);
|
||||
if (c == RREF_ANY)
|
||||
fprintf(f, " Cond recurse any");
|
||||
else
|
||||
fprintf(f, " Cond recurse %d", c);
|
||||
break;
|
||||
|
||||
case OP_DEF:
|
||||
fprintf(f, " Cond def");
|
||||
break;
|
||||
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_POSSTAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_POSPLUS:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_POSQUERY:
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSQUERY:
|
||||
fprintf(f, " ");
|
||||
if (*code >= OP_TYPESTAR)
|
||||
{
|
||||
fprintf(f, "%s", OP_names[code[1]]);
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
|
||||
{
|
||||
fprintf(f, " %s ", get_ucpname(code[2], code[3]));
|
||||
extra = 2;
|
||||
}
|
||||
}
|
||||
else extra = print_char(f, code+1, utf8);
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_EXACT:
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_POSUPTO:
|
||||
fprintf(f, " ");
|
||||
extra = print_char(f, code+3, utf8);
|
||||
fprintf(f, "{");
|
||||
if (*code != OP_EXACT) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_MINUPTO) fprintf(f, "?");
|
||||
else if (*code == OP_POSUPTO) fprintf(f, "+");
|
||||
break;
|
||||
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEPOSUPTO:
|
||||
fprintf(f, " %s", OP_names[code[3]]);
|
||||
if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
|
||||
{
|
||||
fprintf(f, " %s ", get_ucpname(code[4], code[5]));
|
||||
extra = 2;
|
||||
}
|
||||
fprintf(f, "{");
|
||||
if (*code != OP_TYPEEXACT) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
|
||||
else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
|
||||
break;
|
||||
|
||||
case OP_NOT:
|
||||
c = code[1];
|
||||
if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
|
||||
else fprintf(f, " [^\\x%02x]", c);
|
||||
break;
|
||||
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTPOSQUERY:
|
||||
c = code[1];
|
||||
if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
|
||||
else fprintf(f, " [^\\x%02x]", c);
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTPOSUPTO:
|
||||
c = code[3];
|
||||
if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
|
||||
else fprintf(f, " [^\\x%02x]{", c);
|
||||
if (*code != OP_NOTEXACT) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_NOTMINUPTO) fprintf(f, "?");
|
||||
else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
|
||||
break;
|
||||
|
||||
case OP_RECURSE:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_REF:
|
||||
fprintf(f, " \\%d", GET2(code,1));
|
||||
ccode = code + _pcre_OP_lengths[*code];
|
||||
goto CLASS_REF_REPEAT;
|
||||
|
||||
case OP_CALLOUT:
|
||||
fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
|
||||
GET(code, 2 + LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
|
||||
break;
|
||||
|
||||
/* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
|
||||
having this code always here, and it makes it less messy without all those
|
||||
#ifdefs. */
|
||||
|
||||
case OP_CLASS:
|
||||
case OP_NCLASS:
|
||||
case OP_XCLASS:
|
||||
{
|
||||
int i, min, max;
|
||||
BOOL printmap;
|
||||
|
||||
fprintf(f, " [");
|
||||
|
||||
if (*code == OP_XCLASS)
|
||||
{
|
||||
extra = GET(code, 1);
|
||||
ccode = code + LINK_SIZE + 1;
|
||||
printmap = (*ccode & XCL_MAP) != 0;
|
||||
if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
|
||||
}
|
||||
else
|
||||
{
|
||||
printmap = TRUE;
|
||||
ccode = code + 1;
|
||||
}
|
||||
|
||||
/* Print a bit map */
|
||||
|
||||
if (printmap)
|
||||
{
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((ccode[i/8] & (1 << (i&7))) != 0)
|
||||
{
|
||||
int j;
|
||||
for (j = i+1; j < 256; j++)
|
||||
if ((ccode[j/8] & (1 << (j&7))) == 0) break;
|
||||
if (i == '-' || i == ']') fprintf(f, "\\");
|
||||
if (PRINTABLE(i)) fprintf(f, "%c", i);
|
||||
else fprintf(f, "\\x%02x", i);
|
||||
if (--j > i)
|
||||
{
|
||||
if (j != i + 1) fprintf(f, "-");
|
||||
if (j == '-' || j == ']') fprintf(f, "\\");
|
||||
if (PRINTABLE(j)) fprintf(f, "%c", j);
|
||||
else fprintf(f, "\\x%02x", j);
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
ccode += 32;
|
||||
}
|
||||
|
||||
/* For an XCLASS there is always some additional data */
|
||||
|
||||
if (*code == OP_XCLASS)
|
||||
{
|
||||
int ch;
|
||||
while ((ch = *ccode++) != XCL_END)
|
||||
{
|
||||
if (ch == XCL_PROP)
|
||||
{
|
||||
int ptype = *ccode++;
|
||||
int pvalue = *ccode++;
|
||||
fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
|
||||
}
|
||||
else if (ch == XCL_NOTPROP)
|
||||
{
|
||||
int ptype = *ccode++;
|
||||
int pvalue = *ccode++;
|
||||
fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
|
||||
}
|
||||
else
|
||||
{
|
||||
ccode += 1 + print_char(f, ccode, TRUE);
|
||||
if (ch == XCL_RANGE)
|
||||
{
|
||||
fprintf(f, "-");
|
||||
ccode += 1 + print_char(f, ccode, TRUE);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Indicate a non-UTF8 class which was created by negation */
|
||||
|
||||
fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
|
||||
|
||||
/* Handle repeats after a class or a back reference */
|
||||
|
||||
CLASS_REF_REPEAT:
|
||||
switch(*ccode)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
fprintf(f, "%s", OP_names[*ccode]);
|
||||
extra += _pcre_OP_lengths[*ccode];
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
min = GET2(ccode,1);
|
||||
max = GET2(ccode,3);
|
||||
if (max == 0) fprintf(f, "{%d,}", min);
|
||||
else fprintf(f, "{%d,%d}", min, max);
|
||||
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
|
||||
extra += _pcre_OP_lengths[*ccode];
|
||||
break;
|
||||
|
||||
/* Do nothing if it's not a repeat; this code stops picky compilers
|
||||
warning about the lack of a default code path. */
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/* Anything else is just an item with no data*/
|
||||
|
||||
default:
|
||||
fprintf(f, " %s", OP_names[*code]);
|
||||
break;
|
||||
}
|
||||
|
||||
code += _pcre_OP_lengths[*code] + extra;
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre_printint.src */
|
||||
82
Engine/lib/pcre/pcre_refcount.c
Normal file
82
Engine/lib/pcre/pcre_refcount.c
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_refcount(), which is an
|
||||
auxiliary function that can be used to maintain a reference count in a compiled
|
||||
pattern data block. This might be helpful in applications where the block is
|
||||
shared by different users. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Maintain reference count *
|
||||
*************************************************/
|
||||
|
||||
/* The reference count is a 16-bit field, initialized to zero. It is not
|
||||
possible to transfer a non-zero count from one host to a different host that
|
||||
has a different byte order - though I can't see why anyone in their right mind
|
||||
would ever want to do that!
|
||||
|
||||
Arguments:
|
||||
argument_re points to compiled code
|
||||
adjust value to add to the count
|
||||
|
||||
Returns: the (possibly updated) count value (a non-negative number), or
|
||||
a negative error number
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN int
|
||||
pcre_refcount(pcre *argument_re, int adjust)
|
||||
{
|
||||
real_pcre *re = (real_pcre *)argument_re;
|
||||
if (re == NULL) return PCRE_ERROR_NULL;
|
||||
re->ref_count = (-adjust > re->ref_count)? 0 :
|
||||
(adjust + re->ref_count > 65535)? 65535 :
|
||||
re->ref_count + adjust;
|
||||
return re->ref_count;
|
||||
}
|
||||
|
||||
/* End of pcre_refcount.c */
|
||||
199
Engine/lib/pcre/pcre_scanner.cc
Normal file
199
Engine/lib/pcre/pcre_scanner.cc
Normal file
|
|
@ -0,0 +1,199 @@
|
|||
// Copyright (c) 2005, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: Sanjay Ghemawat
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <vector>
|
||||
#include <assert.h>
|
||||
|
||||
#include "pcrecpp_internal.h"
|
||||
#include "pcre_scanner.h"
|
||||
|
||||
using std::vector;
|
||||
|
||||
namespace pcrecpp {
|
||||
|
||||
Scanner::Scanner()
|
||||
: data_(),
|
||||
input_(data_),
|
||||
skip_(NULL),
|
||||
should_skip_(false),
|
||||
skip_repeat_(false),
|
||||
save_comments_(false),
|
||||
comments_(NULL),
|
||||
comments_offset_(0) {
|
||||
}
|
||||
|
||||
Scanner::Scanner(const string& in)
|
||||
: data_(in),
|
||||
input_(data_),
|
||||
skip_(NULL),
|
||||
should_skip_(false),
|
||||
skip_repeat_(false),
|
||||
save_comments_(false),
|
||||
comments_(NULL),
|
||||
comments_offset_(0) {
|
||||
}
|
||||
|
||||
Scanner::~Scanner() {
|
||||
delete skip_;
|
||||
delete comments_;
|
||||
}
|
||||
|
||||
void Scanner::SetSkipExpression(const char* re) {
|
||||
delete skip_;
|
||||
if (re != NULL) {
|
||||
skip_ = new RE(re);
|
||||
should_skip_ = true;
|
||||
skip_repeat_ = true;
|
||||
ConsumeSkip();
|
||||
} else {
|
||||
skip_ = NULL;
|
||||
should_skip_ = false;
|
||||
skip_repeat_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
void Scanner::Skip(const char* re) {
|
||||
delete skip_;
|
||||
if (re != NULL) {
|
||||
skip_ = new RE(re);
|
||||
should_skip_ = true;
|
||||
skip_repeat_ = false;
|
||||
ConsumeSkip();
|
||||
} else {
|
||||
skip_ = NULL;
|
||||
should_skip_ = false;
|
||||
skip_repeat_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
void Scanner::DisableSkip() {
|
||||
assert(skip_ != NULL);
|
||||
should_skip_ = false;
|
||||
}
|
||||
|
||||
void Scanner::EnableSkip() {
|
||||
assert(skip_ != NULL);
|
||||
should_skip_ = true;
|
||||
ConsumeSkip();
|
||||
}
|
||||
|
||||
int Scanner::LineNumber() const {
|
||||
// TODO: Make it more efficient by keeping track of the last point
|
||||
// where we computed line numbers and counting newlines since then.
|
||||
// We could use std:count, but not all systems have it. :-(
|
||||
int count = 1;
|
||||
for (const char* p = data_.data(); p < input_.data(); ++p)
|
||||
if (*p == '\n')
|
||||
++count;
|
||||
return count;
|
||||
}
|
||||
|
||||
int Scanner::Offset() const {
|
||||
return input_.data() - data_.c_str();
|
||||
}
|
||||
|
||||
bool Scanner::LookingAt(const RE& re) const {
|
||||
int consumed;
|
||||
return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0);
|
||||
}
|
||||
|
||||
|
||||
bool Scanner::Consume(const RE& re,
|
||||
const Arg& arg0,
|
||||
const Arg& arg1,
|
||||
const Arg& arg2) {
|
||||
const bool result = re.Consume(&input_, arg0, arg1, arg2);
|
||||
if (result && should_skip_) ConsumeSkip();
|
||||
return result;
|
||||
}
|
||||
|
||||
// helper function to consume *skip_ and honour save_comments_
|
||||
void Scanner::ConsumeSkip() {
|
||||
const char* start_data = input_.data();
|
||||
while (skip_->Consume(&input_)) {
|
||||
if (!skip_repeat_) {
|
||||
// Only one skip allowed.
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (save_comments_) {
|
||||
if (comments_ == NULL) {
|
||||
comments_ = new vector<StringPiece>;
|
||||
}
|
||||
// already pointing one past end, so no need to +1
|
||||
int length = input_.data() - start_data;
|
||||
if (length > 0) {
|
||||
comments_->push_back(StringPiece(start_data, length));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) {
|
||||
// short circuit out if we've not yet initialized comments_
|
||||
// (e.g., when save_comments is false)
|
||||
if (!comments_) {
|
||||
return;
|
||||
}
|
||||
// TODO: if we guarantee that comments_ will contain StringPieces
|
||||
// that are ordered by their start, then we can do a binary search
|
||||
// for the first StringPiece at or past start and then scan for the
|
||||
// ones contained in the range, quit early (use equal_range or
|
||||
// lower_bound)
|
||||
for (vector<StringPiece>::const_iterator it = comments_->begin();
|
||||
it != comments_->end(); ++it) {
|
||||
if ((it->data() >= data_.c_str() + start &&
|
||||
it->data() + it->size() <= data_.c_str() + end)) {
|
||||
ranges->push_back(*it);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Scanner::GetNextComments(vector<StringPiece> *ranges) {
|
||||
// short circuit out if we've not yet initialized comments_
|
||||
// (e.g., when save_comments is false)
|
||||
if (!comments_) {
|
||||
return;
|
||||
}
|
||||
for (vector<StringPiece>::const_iterator it =
|
||||
comments_->begin() + comments_offset_;
|
||||
it != comments_->end(); ++it) {
|
||||
ranges->push_back(*it);
|
||||
++comments_offset_;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace pcrecpp
|
||||
172
Engine/lib/pcre/pcre_scanner.h
Normal file
172
Engine/lib/pcre/pcre_scanner.h
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
// Copyright (c) 2005, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: Sanjay Ghemawat
|
||||
//
|
||||
// Regular-expression based scanner for parsing an input stream.
|
||||
//
|
||||
// Example 1: parse a sequence of "var = number" entries from input:
|
||||
//
|
||||
// Scanner scanner(input);
|
||||
// string var;
|
||||
// int number;
|
||||
// scanner.SetSkipExpression("\\s+"); // Skip any white space we encounter
|
||||
// while (scanner.Consume("(\\w+) = (\\d+)", &var, &number)) {
|
||||
// ...;
|
||||
// }
|
||||
|
||||
#ifndef _PCRE_SCANNER_H
|
||||
#define _PCRE_SCANNER_H
|
||||
|
||||
#include <assert.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <pcrecpp.h>
|
||||
#include <pcre_stringpiece.h>
|
||||
|
||||
namespace pcrecpp {
|
||||
|
||||
class PCRECPP_EXP_DEFN Scanner {
|
||||
public:
|
||||
Scanner();
|
||||
explicit Scanner(const std::string& input);
|
||||
~Scanner();
|
||||
|
||||
// Return current line number. The returned line-number is
|
||||
// one-based. I.e. it returns 1 + the number of consumed newlines.
|
||||
//
|
||||
// Note: this method may be slow. It may take time proportional to
|
||||
// the size of the input.
|
||||
int LineNumber() const;
|
||||
|
||||
// Return the byte-offset that the scanner is looking in the
|
||||
// input data;
|
||||
int Offset() const;
|
||||
|
||||
// Return true iff the start of the remaining input matches "re"
|
||||
bool LookingAt(const RE& re) const;
|
||||
|
||||
// Return true iff all of the following are true
|
||||
// a. the start of the remaining input matches "re",
|
||||
// b. if any arguments are supplied, matched sub-patterns can be
|
||||
// parsed and stored into the arguments.
|
||||
// If it returns true, it skips over the matched input and any
|
||||
// following input that matches the "skip" regular expression.
|
||||
bool Consume(const RE& re,
|
||||
const Arg& arg0 = RE::no_arg,
|
||||
const Arg& arg1 = RE::no_arg,
|
||||
const Arg& arg2 = RE::no_arg
|
||||
// TODO: Allow more arguments?
|
||||
);
|
||||
|
||||
// Set the "skip" regular expression. If after consuming some data,
|
||||
// a prefix of the input matches this RE, it is automatically
|
||||
// skipped. For example, a programming language scanner would use
|
||||
// a skip RE that matches white space and comments.
|
||||
//
|
||||
// scanner.SetSkipExpression("\\s+|//.*|/[*](.|\n)*?[*]/");
|
||||
//
|
||||
// Skipping repeats as long as it succeeds. We used to let people do
|
||||
// this by writing "(...)*" in the regular expression, but that added
|
||||
// up to lots of recursive calls within the pcre library, so now we
|
||||
// control repetition explicitly via the function call API.
|
||||
//
|
||||
// You can pass NULL for "re" if you do not want any data to be skipped.
|
||||
void Skip(const char* re); // DEPRECATED; does *not* repeat
|
||||
void SetSkipExpression(const char* re);
|
||||
|
||||
// Temporarily pause "skip"ing. This
|
||||
// Skip("Foo"); code ; DisableSkip(); code; EnableSkip()
|
||||
// is similar to
|
||||
// Skip("Foo"); code ; Skip(NULL); code ; Skip("Foo");
|
||||
// but avoids creating/deleting new RE objects.
|
||||
void DisableSkip();
|
||||
|
||||
// Reenable previously paused skipping. Any prefix of the input
|
||||
// that matches the skip pattern is immediately dropped.
|
||||
void EnableSkip();
|
||||
|
||||
/***** Special wrappers around SetSkip() for some common idioms *****/
|
||||
|
||||
// Arranges to skip whitespace, C comments, C++ comments.
|
||||
// The overall RE is a disjunction of the following REs:
|
||||
// \\s whitespace
|
||||
// //.*\n C++ comment
|
||||
// /[*](.|\n)*?[*]/ C comment (x*? means minimal repetitions of x)
|
||||
// We get repetition via the semantics of SetSkipExpression, not by using *
|
||||
void SkipCXXComments() {
|
||||
SetSkipExpression("\\s|//.*\n|/[*](?:\n|.)*?[*]/");
|
||||
}
|
||||
|
||||
void set_save_comments(bool comments) {
|
||||
save_comments_ = comments;
|
||||
}
|
||||
|
||||
bool save_comments() {
|
||||
return save_comments_;
|
||||
}
|
||||
|
||||
// Append to vector ranges the comments found in the
|
||||
// byte range [start,end] (inclusive) of the input data.
|
||||
// Only comments that were extracted entirely within that
|
||||
// range are returned: no range splitting of atomically-extracted
|
||||
// comments is performed.
|
||||
void GetComments(int start, int end, std::vector<StringPiece> *ranges);
|
||||
|
||||
// Append to vector ranges the comments added
|
||||
// since the last time this was called. This
|
||||
// functionality is provided for efficiency when
|
||||
// interleaving scanning with parsing.
|
||||
void GetNextComments(std::vector<StringPiece> *ranges);
|
||||
|
||||
private:
|
||||
std::string data_; // All the input data
|
||||
StringPiece input_; // Unprocessed input
|
||||
RE* skip_; // If non-NULL, RE for skipping input
|
||||
bool should_skip_; // If true, use skip_
|
||||
bool skip_repeat_; // If true, repeat skip_ as long as it works
|
||||
bool save_comments_; // If true, aggregate the skip expression
|
||||
|
||||
// the skipped comments
|
||||
// TODO: later consider requiring that the StringPieces be added
|
||||
// in order by their start position
|
||||
std::vector<StringPiece> *comments_;
|
||||
|
||||
// the offset into comments_ that has been returned by GetNextComments
|
||||
int comments_offset_;
|
||||
|
||||
// helper function to consume *skip_ and honour
|
||||
// save_comments_
|
||||
void ConsumeSkip();
|
||||
};
|
||||
|
||||
} // namespace pcrecpp
|
||||
|
||||
#endif /* _PCRE_SCANNER_H */
|
||||
43
Engine/lib/pcre/pcre_stringpiece.cc
Normal file
43
Engine/lib/pcre/pcre_stringpiece.cc
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
// Copyright (c) 2005, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: wilsonh@google.com (Wilson Hsieh)
|
||||
//
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
#include "pcrecpp_internal.h"
|
||||
#include "pcre_stringpiece.h"
|
||||
|
||||
std::ostream& operator<<(std::ostream& o, const pcrecpp::StringPiece& piece) {
|
||||
return (o << piece.as_string());
|
||||
}
|
||||
180
Engine/lib/pcre/pcre_stringpiece.h
Normal file
180
Engine/lib/pcre/pcre_stringpiece.h
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
// Copyright (c) 2005, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: Sanjay Ghemawat
|
||||
//
|
||||
// A string like object that points into another piece of memory.
|
||||
// Useful for providing an interface that allows clients to easily
|
||||
// pass in either a "const char*" or a "string".
|
||||
//
|
||||
// Arghh! I wish C++ literals were automatically of type "string".
|
||||
|
||||
#ifndef _PCRE_STRINGPIECE_H
|
||||
#define _PCRE_STRINGPIECE_H
|
||||
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include <iosfwd> // for ostream forward-declaration
|
||||
|
||||
// Don't use bits/type_traits.h on Linux - Andrew Galante, GG 8/2/2009
|
||||
#if !defined(_MSC_VER) && !defined(__CELLOS_LV2__) && !defined(__APPLE__) && !defined(__linux__)
|
||||
#ifdef __MINGW32__
|
||||
#define HAVE_TYPE_TRAITS
|
||||
#include <bits/type_traits.h>
|
||||
#else
|
||||
#define HAVE_TYPE_TRAITS
|
||||
#include <type_traits.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include <pcre.h>
|
||||
|
||||
using std::string;
|
||||
|
||||
namespace pcrecpp {
|
||||
|
||||
class PCRECPP_EXP_DEFN StringPiece {
|
||||
private:
|
||||
const char* ptr_;
|
||||
int length_;
|
||||
|
||||
public:
|
||||
// We provide non-explicit singleton constructors so users can pass
|
||||
// in a "const char*" or a "string" wherever a "StringPiece" is
|
||||
// expected.
|
||||
StringPiece()
|
||||
: ptr_(NULL), length_(0) { }
|
||||
StringPiece(const char* str)
|
||||
: ptr_(str), length_(static_cast<int>(strlen(ptr_))) { }
|
||||
StringPiece(const unsigned char* str)
|
||||
: ptr_(reinterpret_cast<const char*>(str)),
|
||||
length_(static_cast<int>(strlen(ptr_))) { }
|
||||
StringPiece(const string& str)
|
||||
: ptr_(str.data()), length_(static_cast<int>(str.size())) { }
|
||||
StringPiece(const char* offset, int len)
|
||||
: ptr_(offset), length_(len) { }
|
||||
|
||||
// data() may return a pointer to a buffer with embedded NULs, and the
|
||||
// returned buffer may or may not be null terminated. Therefore it is
|
||||
// typically a mistake to pass data() to a routine that expects a NUL
|
||||
// terminated string. Use "as_string().c_str()" if you really need to do
|
||||
// this. Or better yet, change your routine so it does not rely on NUL
|
||||
// termination.
|
||||
const char* data() const { return ptr_; }
|
||||
int size() const { return length_; }
|
||||
bool empty() const { return length_ == 0; }
|
||||
|
||||
void clear() { ptr_ = NULL; length_ = 0; }
|
||||
void set(const char* buffer, int len) { ptr_ = buffer; length_ = len; }
|
||||
void set(const char* str) {
|
||||
ptr_ = str;
|
||||
length_ = static_cast<int>(strlen(str));
|
||||
}
|
||||
void set(const void* buffer, int len) {
|
||||
ptr_ = reinterpret_cast<const char*>(buffer);
|
||||
length_ = len;
|
||||
}
|
||||
|
||||
char operator[](int i) const { return ptr_[i]; }
|
||||
|
||||
void remove_prefix(int n) {
|
||||
ptr_ += n;
|
||||
length_ -= n;
|
||||
}
|
||||
|
||||
void remove_suffix(int n) {
|
||||
length_ -= n;
|
||||
}
|
||||
|
||||
bool operator==(const StringPiece& x) const {
|
||||
return ((length_ == x.length_) &&
|
||||
(memcmp(ptr_, x.ptr_, length_) == 0));
|
||||
}
|
||||
bool operator!=(const StringPiece& x) const {
|
||||
return !(*this == x);
|
||||
}
|
||||
|
||||
#define STRINGPIECE_BINARY_PREDICATE(cmp,auxcmp) \
|
||||
bool operator cmp (const StringPiece& x) const { \
|
||||
int r = memcmp(ptr_, x.ptr_, length_ < x.length_ ? length_ : x.length_); \
|
||||
return ((r auxcmp 0) || ((r == 0) && (length_ cmp x.length_))); \
|
||||
}
|
||||
STRINGPIECE_BINARY_PREDICATE(<, <);
|
||||
STRINGPIECE_BINARY_PREDICATE(<=, <);
|
||||
STRINGPIECE_BINARY_PREDICATE(>=, >);
|
||||
STRINGPIECE_BINARY_PREDICATE(>, >);
|
||||
#undef STRINGPIECE_BINARY_PREDICATE
|
||||
|
||||
int compare(const StringPiece& x) const {
|
||||
int r = memcmp(ptr_, x.ptr_, length_ < x.length_ ? length_ : x.length_);
|
||||
if (r == 0) {
|
||||
if (length_ < x.length_) r = -1;
|
||||
else if (length_ > x.length_) r = +1;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
string as_string() const {
|
||||
return string(data(), size());
|
||||
}
|
||||
|
||||
void CopyToString(string* target) const {
|
||||
target->assign(ptr_, length_);
|
||||
}
|
||||
|
||||
// Does "this" start with "x"
|
||||
bool starts_with(const StringPiece& x) const {
|
||||
return ((length_ >= x.length_) && (memcmp(ptr_, x.ptr_, x.length_) == 0));
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace pcrecpp
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Functions used to create STL containers that use StringPiece
|
||||
// Remember that a StringPiece's lifetime had better be less than
|
||||
// that of the underlying string or char*. If it is not, then you
|
||||
// cannot safely store a StringPiece into an STL container
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
#ifdef HAVE_TYPE_TRAITS
|
||||
// This makes vector<StringPiece> really fast for some STL implementations
|
||||
template<> struct __type_traits<pcrecpp::StringPiece> {
|
||||
typedef __true_type has_trivial_default_constructor;
|
||||
typedef __true_type has_trivial_copy_constructor;
|
||||
typedef __true_type has_trivial_assignment_operator;
|
||||
typedef __true_type has_trivial_destructor;
|
||||
typedef __true_type is_POD_type;
|
||||
};
|
||||
#endif
|
||||
|
||||
// allow StringPiece to be logged
|
||||
std::ostream& operator<<(std::ostream& o, const pcrecpp::StringPiece& piece);
|
||||
|
||||
#endif /* _PCRE_STRINGPIECE_H */
|
||||
579
Engine/lib/pcre/pcre_study.c
Normal file
579
Engine/lib/pcre/pcre_study.c
Normal file
|
|
@ -0,0 +1,579 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_study(), along with local
|
||||
supporting functions. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/* Returns from set_start_bits() */
|
||||
|
||||
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE };
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Set a bit and maybe its alternate case *
|
||||
*************************************************/
|
||||
|
||||
/* Given a character, set its bit in the table, and also the bit for the other
|
||||
version of a letter if we are caseless.
|
||||
|
||||
Arguments:
|
||||
start_bits points to the bit map
|
||||
c is the character
|
||||
caseless the caseless flag
|
||||
cd the block with char table pointers
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd)
|
||||
{
|
||||
start_bits[c/8] |= (1 << (c&7));
|
||||
if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
|
||||
start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create bitmap of starting bytes *
|
||||
*************************************************/
|
||||
|
||||
/* This function scans a compiled unanchored expression recursively and
|
||||
attempts to build a bitmap of the set of possible starting bytes. As time goes
|
||||
by, we may be able to get more clever at doing this. The SSB_CONTINUE return is
|
||||
useful for parenthesized groups in patterns such as (a*)b where the group
|
||||
provides some optional starting bytes but scanning must continue at the outer
|
||||
level to find at least one mandatory byte. At the outermost level, this
|
||||
function fails unless the result is SSB_DONE.
|
||||
|
||||
Arguments:
|
||||
code points to an expression
|
||||
start_bits points to a 32-byte table, initialized to 0
|
||||
caseless the current state of the caseless flag
|
||||
utf8 TRUE if in UTF-8 mode
|
||||
cd the block with char table pointers
|
||||
|
||||
Returns: SSB_FAIL => Failed to find any starting bytes
|
||||
SSB_DONE => Found mandatory starting bytes
|
||||
SSB_CONTINUE => Found optional starting bytes
|
||||
*/
|
||||
|
||||
static int
|
||||
set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
|
||||
BOOL utf8, compile_data *cd)
|
||||
{
|
||||
register int c;
|
||||
int yield = SSB_DONE;
|
||||
|
||||
#if 0
|
||||
/* ========================================================================= */
|
||||
/* The following comment and code was inserted in January 1999. In May 2006,
|
||||
when it was observed to cause compiler warnings about unused values, I took it
|
||||
out again. If anybody is still using OS/2, they will have to put it back
|
||||
manually. */
|
||||
|
||||
/* This next statement and the later reference to dummy are here in order to
|
||||
trick the optimizer of the IBM C compiler for OS/2 into generating correct
|
||||
code. Apparently IBM isn't going to fix the problem, and we would rather not
|
||||
disable optimization (in this module it actually makes a big difference, and
|
||||
the pcre module can use all the optimization it can get). */
|
||||
|
||||
volatile int dummy;
|
||||
/* ========================================================================= */
|
||||
#endif
|
||||
|
||||
do
|
||||
{
|
||||
const uschar *tcode = code + (((int)*code == OP_CBRA)? 3:1) + LINK_SIZE;
|
||||
BOOL try_next = TRUE;
|
||||
|
||||
while (try_next) /* Loop for items in this branch */
|
||||
{
|
||||
int rc;
|
||||
switch(*tcode)
|
||||
{
|
||||
/* Fail if we reach something we don't understand */
|
||||
|
||||
default:
|
||||
return SSB_FAIL;
|
||||
|
||||
/* If we hit a bracket or a positive lookahead assertion, recurse to set
|
||||
bits from within the subpattern. If it can't find anything, we have to
|
||||
give up. If it finds some mandatory character(s), we are done for this
|
||||
branch. Otherwise, carry on scanning after the subpattern. */
|
||||
|
||||
case OP_BRA:
|
||||
case OP_SBRA:
|
||||
case OP_CBRA:
|
||||
case OP_SCBRA:
|
||||
case OP_ONCE:
|
||||
case OP_ASSERT:
|
||||
rc = set_start_bits(tcode, start_bits, caseless, utf8, cd);
|
||||
if (rc == SSB_FAIL) return SSB_FAIL;
|
||||
if (rc == SSB_DONE) try_next = FALSE; else
|
||||
{
|
||||
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
}
|
||||
break;
|
||||
|
||||
/* If we hit ALT or KET, it means we haven't found anything mandatory in
|
||||
this branch, though we might have found something optional. For ALT, we
|
||||
continue with the next alternative, but we have to arrange that the final
|
||||
result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET,
|
||||
return SSB_CONTINUE: if this is the top level, that indicates failure,
|
||||
but after a nested subpattern, it causes scanning to continue. */
|
||||
|
||||
case OP_ALT:
|
||||
yield = SSB_CONTINUE;
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_KET:
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRMIN:
|
||||
return SSB_CONTINUE;
|
||||
|
||||
/* Skip over callout */
|
||||
|
||||
case OP_CALLOUT:
|
||||
tcode += 2 + 2*LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Skip over lookbehind and negative lookahead assertions */
|
||||
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Skip over an option setting, changing the caseless flag */
|
||||
|
||||
case OP_OPT:
|
||||
caseless = (tcode[1] & PCRE_CASELESS) != 0;
|
||||
tcode += 2;
|
||||
break;
|
||||
|
||||
/* BRAZERO does the bracket, but carries on. */
|
||||
|
||||
case OP_BRAZERO:
|
||||
case OP_BRAMINZERO:
|
||||
if (set_start_bits(++tcode, start_bits, caseless, utf8, cd) == SSB_FAIL)
|
||||
return SSB_FAIL;
|
||||
/* =========================================================================
|
||||
See the comment at the head of this function concerning the next line,
|
||||
which was an old fudge for the benefit of OS/2.
|
||||
dummy = 1;
|
||||
========================================================================= */
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Single-char * or ? sets the bit and tries the next item */
|
||||
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_POSSTAR:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_POSQUERY:
|
||||
set_bit(start_bits, tcode[1], caseless, cd);
|
||||
tcode += 2;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8 && tcode[-1] >= 0xc0)
|
||||
tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* Single-char upto sets the bit and tries the next */
|
||||
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_POSUPTO:
|
||||
set_bit(start_bits, tcode[3], caseless, cd);
|
||||
tcode += 4;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8 && tcode[-1] >= 0xc0)
|
||||
tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* At least one single char sets the bit and stops */
|
||||
|
||||
case OP_EXACT: /* Fall through */
|
||||
tcode += 2;
|
||||
|
||||
case OP_CHAR:
|
||||
case OP_CHARNC:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_POSPLUS:
|
||||
set_bit(start_bits, tcode[1], caseless, cd);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* Single character type sets the bits and stops */
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_digit];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_DIGIT:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_digit];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= ~d;
|
||||
}
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= d;
|
||||
}
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_word];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_WORDCHAR:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_word];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* One or more character type fudges the pointer and restarts, knowing
|
||||
it will hit a single character type and stop there. */
|
||||
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
tcode++;
|
||||
break;
|
||||
|
||||
case OP_TYPEEXACT:
|
||||
tcode += 3;
|
||||
break;
|
||||
|
||||
/* Zero or more repeats of character types set the bits and then
|
||||
try again. */
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEPOSUPTO:
|
||||
tcode += 2; /* Fall through */
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSQUERY:
|
||||
switch(tcode[1])
|
||||
{
|
||||
case OP_ANY:
|
||||
return SSB_FAIL;
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_digit];
|
||||
break;
|
||||
|
||||
case OP_DIGIT:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_digit];
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= ~d;
|
||||
}
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= d;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_word];
|
||||
break;
|
||||
|
||||
case OP_WORDCHAR:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_word];
|
||||
break;
|
||||
}
|
||||
|
||||
tcode += 2;
|
||||
break;
|
||||
|
||||
/* Character class where all the information is in a bit map: set the
|
||||
bits and either carry on or not, according to the repeat count. If it was
|
||||
a negative class, and we are operating with UTF-8 characters, any byte
|
||||
with a value >= 0xc4 is a potentially valid starter because it starts a
|
||||
character with a value > 255. */
|
||||
|
||||
case OP_NCLASS:
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
|
||||
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
|
||||
}
|
||||
#endif
|
||||
/* Fall through */
|
||||
|
||||
case OP_CLASS:
|
||||
{
|
||||
tcode++;
|
||||
|
||||
/* In UTF-8 mode, the bits in a bit map correspond to character
|
||||
values, not to byte values. However, the bit map we are constructing is
|
||||
for byte values. So we have to do a conversion for characters whose
|
||||
value is > 127. In fact, there are only two possible starting bytes for
|
||||
characters in the range 128 - 255. */
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
|
||||
for (c = 128; c < 256; c++)
|
||||
{
|
||||
if ((tcode[c/8] && (1 << (c&7))) != 0)
|
||||
{
|
||||
int d = (c >> 6) | 0xc0; /* Set bit for this starter */
|
||||
start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */
|
||||
c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* In non-UTF-8 mode, the two bit maps are completely compatible. */
|
||||
|
||||
else
|
||||
#endif
|
||||
{
|
||||
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
|
||||
}
|
||||
|
||||
/* Advance past the bit map, and act on what follows */
|
||||
|
||||
tcode += 32;
|
||||
switch (*tcode)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
tcode++;
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
|
||||
else try_next = FALSE;
|
||||
break;
|
||||
|
||||
default:
|
||||
try_next = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break; /* End of bitmap class handling */
|
||||
|
||||
} /* End of switch */
|
||||
} /* End of try_next loop */
|
||||
|
||||
code += GET(code, 1); /* Advance to next branch */
|
||||
}
|
||||
while (*code == OP_ALT);
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Study a compiled expression *
|
||||
*************************************************/
|
||||
|
||||
/* This function is handed a compiled expression that it must study to produce
|
||||
information that will speed up the matching. It returns a pcre_extra block
|
||||
which then gets handed back to pcre_exec().
|
||||
|
||||
Arguments:
|
||||
re points to the compiled expression
|
||||
options contains option bits
|
||||
errorptr points to where to place error messages;
|
||||
set NULL unless error
|
||||
|
||||
Returns: pointer to a pcre_extra block, with study_data filled in and the
|
||||
appropriate flag set;
|
||||
NULL on error or if no optimization possible
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN pcre_extra *
|
||||
pcre_study(const pcre *external_re, int options, const char **errorptr)
|
||||
{
|
||||
uschar start_bits[32];
|
||||
pcre_extra *extra;
|
||||
pcre_study_data *study;
|
||||
const uschar *tables;
|
||||
uschar *code;
|
||||
compile_data compile_block;
|
||||
const real_pcre *re = (const real_pcre *)external_re;
|
||||
|
||||
*errorptr = NULL;
|
||||
|
||||
if (re == NULL || re->magic_number != MAGIC_NUMBER)
|
||||
{
|
||||
*errorptr = "argument is not a compiled regular expression";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
|
||||
{
|
||||
*errorptr = "unknown or incorrect option bit(s) set";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
code = (uschar *)re + re->name_table_offset +
|
||||
(re->name_count * re->name_entry_size);
|
||||
|
||||
/* For an anchored pattern, or an unanchored pattern that has a first char, or
|
||||
a multiline pattern that matches only at "line starts", no further processing
|
||||
at present. */
|
||||
|
||||
if ((re->options & PCRE_ANCHORED) != 0 ||
|
||||
(re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
|
||||
return NULL;
|
||||
|
||||
/* Set the character tables in the block that is passed around */
|
||||
|
||||
tables = re->tables;
|
||||
if (tables == NULL)
|
||||
(void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
|
||||
(void *)(&tables));
|
||||
|
||||
compile_block.lcc = tables + lcc_offset;
|
||||
compile_block.fcc = tables + fcc_offset;
|
||||
compile_block.cbits = tables + cbits_offset;
|
||||
compile_block.ctypes = tables + ctypes_offset;
|
||||
|
||||
/* See if we can find a fixed set of initial characters for the pattern. */
|
||||
|
||||
memset(start_bits, 0, 32 * sizeof(uschar));
|
||||
if (set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
|
||||
(re->options & PCRE_UTF8) != 0, &compile_block) != SSB_DONE) return NULL;
|
||||
|
||||
/* Get a pcre_extra block and a pcre_study_data block. The study data is put in
|
||||
the latter, which is pointed to by the former, which may also get additional
|
||||
data set later by the calling program. At the moment, the size of
|
||||
pcre_study_data is fixed. We nevertheless save it in a field for returning via
|
||||
the pcre_fullinfo() function so that if it becomes variable in the future, we
|
||||
don't have to change that code. */
|
||||
|
||||
extra = (pcre_extra *)(pcre_malloc)
|
||||
(sizeof(pcre_extra) + sizeof(pcre_study_data));
|
||||
|
||||
if (extra == NULL)
|
||||
{
|
||||
*errorptr = "failed to get memory";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));
|
||||
extra->flags = PCRE_EXTRA_STUDY_DATA;
|
||||
extra->study_data = study;
|
||||
|
||||
study->size = sizeof(pcre_study_data);
|
||||
study->options = PCRE_STUDY_MAPPED;
|
||||
memcpy(study->start_bits, start_bits, sizeof(start_bits));
|
||||
|
||||
return extra;
|
||||
}
|
||||
|
||||
/* End of pcre_study.c */
|
||||
318
Engine/lib/pcre/pcre_tables.c
Normal file
318
Engine/lib/pcre/pcre_tables.c
Normal file
|
|
@ -0,0 +1,318 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains some fixed tables that are used by more than one of the
|
||||
PCRE code modules. The tables are also #included by the pcretest program, which
|
||||
uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
|
||||
clashes with the library. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||
the definition is next to the definition of the opcodes in pcre_internal.h. */
|
||||
|
||||
const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Tables for UTF-8 support *
|
||||
*************************************************/
|
||||
|
||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||
character. */
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
|
||||
const int _pcre_utf8_table1[] =
|
||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||
|
||||
const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int);
|
||||
|
||||
/* These are the indicator bits and the mask for the data bits to set in the
|
||||
first byte of a character, indexed by the number of additional bytes. */
|
||||
|
||||
const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
|
||||
const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
||||
|
||||
/* Table of the number of extra bytes, indexed by the first byte masked with
|
||||
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
|
||||
|
||||
const uschar _pcre_utf8_table4[] = {
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||
|
||||
/* The pcre_utt[] table below translates Unicode property names into type and
|
||||
code values. It is searched by binary chop, so must be in collating sequence of
|
||||
name. Originally, the table contained pointers to the name strings in the first
|
||||
field of each entry. However, that leads to a large number of relocations when
|
||||
a shared library is dynamically loaded. A significant reduction is made by
|
||||
putting all the names into a single, large string and then using offsets in the
|
||||
table itself. Maintenance is more error-prone, but frequent changes to this
|
||||
data is unlikely. */
|
||||
|
||||
const char _pcre_utt_names[] =
|
||||
"Any\0"
|
||||
"Arabic\0"
|
||||
"Armenian\0"
|
||||
"Balinese\0"
|
||||
"Bengali\0"
|
||||
"Bopomofo\0"
|
||||
"Braille\0"
|
||||
"Buginese\0"
|
||||
"Buhid\0"
|
||||
"C\0"
|
||||
"Canadian_Aboriginal\0"
|
||||
"Cc\0"
|
||||
"Cf\0"
|
||||
"Cherokee\0"
|
||||
"Cn\0"
|
||||
"Co\0"
|
||||
"Common\0"
|
||||
"Coptic\0"
|
||||
"Cs\0"
|
||||
"Cuneiform\0"
|
||||
"Cypriot\0"
|
||||
"Cyrillic\0"
|
||||
"Deseret\0"
|
||||
"Devanagari\0"
|
||||
"Ethiopic\0"
|
||||
"Georgian\0"
|
||||
"Glagolitic\0"
|
||||
"Gothic\0"
|
||||
"Greek\0"
|
||||
"Gujarati\0"
|
||||
"Gurmukhi\0"
|
||||
"Han\0"
|
||||
"Hangul\0"
|
||||
"Hanunoo\0"
|
||||
"Hebrew\0"
|
||||
"Hiragana\0"
|
||||
"Inherited\0"
|
||||
"Kannada\0"
|
||||
"Katakana\0"
|
||||
"Kharoshthi\0"
|
||||
"Khmer\0"
|
||||
"L\0"
|
||||
"L&\0"
|
||||
"Lao\0"
|
||||
"Latin\0"
|
||||
"Limbu\0"
|
||||
"Linear_B\0"
|
||||
"Ll\0"
|
||||
"Lm\0"
|
||||
"Lo\0"
|
||||
"Lt\0"
|
||||
"Lu\0"
|
||||
"M\0"
|
||||
"Malayalam\0"
|
||||
"Mc\0"
|
||||
"Me\0"
|
||||
"Mn\0"
|
||||
"Mongolian\0"
|
||||
"Myanmar\0"
|
||||
"N\0"
|
||||
"Nd\0"
|
||||
"New_Tai_Lue\0"
|
||||
"Nko\0"
|
||||
"Nl\0"
|
||||
"No\0"
|
||||
"Ogham\0"
|
||||
"Old_Italic\0"
|
||||
"Old_Persian\0"
|
||||
"Oriya\0"
|
||||
"Osmanya\0"
|
||||
"P\0"
|
||||
"Pc\0"
|
||||
"Pd\0"
|
||||
"Pe\0"
|
||||
"Pf\0"
|
||||
"Phags_Pa\0"
|
||||
"Phoenician\0"
|
||||
"Pi\0"
|
||||
"Po\0"
|
||||
"Ps\0"
|
||||
"Runic\0"
|
||||
"S\0"
|
||||
"Sc\0"
|
||||
"Shavian\0"
|
||||
"Sinhala\0"
|
||||
"Sk\0"
|
||||
"Sm\0"
|
||||
"So\0"
|
||||
"Syloti_Nagri\0"
|
||||
"Syriac\0"
|
||||
"Tagalog\0"
|
||||
"Tagbanwa\0"
|
||||
"Tai_Le\0"
|
||||
"Tamil\0"
|
||||
"Telugu\0"
|
||||
"Thaana\0"
|
||||
"Thai\0"
|
||||
"Tibetan\0"
|
||||
"Tifinagh\0"
|
||||
"Ugaritic\0"
|
||||
"Yi\0"
|
||||
"Z\0"
|
||||
"Zl\0"
|
||||
"Zp\0"
|
||||
"Zs\0";
|
||||
|
||||
const ucp_type_table _pcre_utt[] = {
|
||||
{ 0, PT_ANY, 0 },
|
||||
{ 4, PT_SC, ucp_Arabic },
|
||||
{ 11, PT_SC, ucp_Armenian },
|
||||
{ 20, PT_SC, ucp_Balinese },
|
||||
{ 29, PT_SC, ucp_Bengali },
|
||||
{ 37, PT_SC, ucp_Bopomofo },
|
||||
{ 46, PT_SC, ucp_Braille },
|
||||
{ 54, PT_SC, ucp_Buginese },
|
||||
{ 63, PT_SC, ucp_Buhid },
|
||||
{ 69, PT_GC, ucp_C },
|
||||
{ 71, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 91, PT_PC, ucp_Cc },
|
||||
{ 94, PT_PC, ucp_Cf },
|
||||
{ 97, PT_SC, ucp_Cherokee },
|
||||
{ 106, PT_PC, ucp_Cn },
|
||||
{ 109, PT_PC, ucp_Co },
|
||||
{ 112, PT_SC, ucp_Common },
|
||||
{ 119, PT_SC, ucp_Coptic },
|
||||
{ 126, PT_PC, ucp_Cs },
|
||||
{ 129, PT_SC, ucp_Cuneiform },
|
||||
{ 139, PT_SC, ucp_Cypriot },
|
||||
{ 147, PT_SC, ucp_Cyrillic },
|
||||
{ 156, PT_SC, ucp_Deseret },
|
||||
{ 164, PT_SC, ucp_Devanagari },
|
||||
{ 175, PT_SC, ucp_Ethiopic },
|
||||
{ 184, PT_SC, ucp_Georgian },
|
||||
{ 193, PT_SC, ucp_Glagolitic },
|
||||
{ 204, PT_SC, ucp_Gothic },
|
||||
{ 211, PT_SC, ucp_Greek },
|
||||
{ 217, PT_SC, ucp_Gujarati },
|
||||
{ 226, PT_SC, ucp_Gurmukhi },
|
||||
{ 235, PT_SC, ucp_Han },
|
||||
{ 239, PT_SC, ucp_Hangul },
|
||||
{ 246, PT_SC, ucp_Hanunoo },
|
||||
{ 254, PT_SC, ucp_Hebrew },
|
||||
{ 261, PT_SC, ucp_Hiragana },
|
||||
{ 270, PT_SC, ucp_Inherited },
|
||||
{ 280, PT_SC, ucp_Kannada },
|
||||
{ 288, PT_SC, ucp_Katakana },
|
||||
{ 297, PT_SC, ucp_Kharoshthi },
|
||||
{ 308, PT_SC, ucp_Khmer },
|
||||
{ 314, PT_GC, ucp_L },
|
||||
{ 316, PT_LAMP, 0 },
|
||||
{ 319, PT_SC, ucp_Lao },
|
||||
{ 323, PT_SC, ucp_Latin },
|
||||
{ 329, PT_SC, ucp_Limbu },
|
||||
{ 335, PT_SC, ucp_Linear_B },
|
||||
{ 344, PT_PC, ucp_Ll },
|
||||
{ 347, PT_PC, ucp_Lm },
|
||||
{ 350, PT_PC, ucp_Lo },
|
||||
{ 353, PT_PC, ucp_Lt },
|
||||
{ 356, PT_PC, ucp_Lu },
|
||||
{ 359, PT_GC, ucp_M },
|
||||
{ 361, PT_SC, ucp_Malayalam },
|
||||
{ 371, PT_PC, ucp_Mc },
|
||||
{ 374, PT_PC, ucp_Me },
|
||||
{ 377, PT_PC, ucp_Mn },
|
||||
{ 380, PT_SC, ucp_Mongolian },
|
||||
{ 390, PT_SC, ucp_Myanmar },
|
||||
{ 398, PT_GC, ucp_N },
|
||||
{ 400, PT_PC, ucp_Nd },
|
||||
{ 403, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 415, PT_SC, ucp_Nko },
|
||||
{ 419, PT_PC, ucp_Nl },
|
||||
{ 422, PT_PC, ucp_No },
|
||||
{ 425, PT_SC, ucp_Ogham },
|
||||
{ 431, PT_SC, ucp_Old_Italic },
|
||||
{ 442, PT_SC, ucp_Old_Persian },
|
||||
{ 454, PT_SC, ucp_Oriya },
|
||||
{ 460, PT_SC, ucp_Osmanya },
|
||||
{ 468, PT_GC, ucp_P },
|
||||
{ 470, PT_PC, ucp_Pc },
|
||||
{ 473, PT_PC, ucp_Pd },
|
||||
{ 476, PT_PC, ucp_Pe },
|
||||
{ 479, PT_PC, ucp_Pf },
|
||||
{ 482, PT_SC, ucp_Phags_Pa },
|
||||
{ 491, PT_SC, ucp_Phoenician },
|
||||
{ 502, PT_PC, ucp_Pi },
|
||||
{ 505, PT_PC, ucp_Po },
|
||||
{ 508, PT_PC, ucp_Ps },
|
||||
{ 511, PT_SC, ucp_Runic },
|
||||
{ 517, PT_GC, ucp_S },
|
||||
{ 519, PT_PC, ucp_Sc },
|
||||
{ 522, PT_SC, ucp_Shavian },
|
||||
{ 530, PT_SC, ucp_Sinhala },
|
||||
{ 538, PT_PC, ucp_Sk },
|
||||
{ 541, PT_PC, ucp_Sm },
|
||||
{ 544, PT_PC, ucp_So },
|
||||
{ 547, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 560, PT_SC, ucp_Syriac },
|
||||
{ 567, PT_SC, ucp_Tagalog },
|
||||
{ 575, PT_SC, ucp_Tagbanwa },
|
||||
{ 584, PT_SC, ucp_Tai_Le },
|
||||
{ 591, PT_SC, ucp_Tamil },
|
||||
{ 597, PT_SC, ucp_Telugu },
|
||||
{ 604, PT_SC, ucp_Thaana },
|
||||
{ 611, PT_SC, ucp_Thai },
|
||||
{ 616, PT_SC, ucp_Tibetan },
|
||||
{ 624, PT_SC, ucp_Tifinagh },
|
||||
{ 633, PT_SC, ucp_Ugaritic },
|
||||
{ 642, PT_SC, ucp_Yi },
|
||||
{ 645, PT_GC, ucp_Z },
|
||||
{ 647, PT_PC, ucp_Zl },
|
||||
{ 650, PT_PC, ucp_Zp },
|
||||
{ 653, PT_PC, ucp_Zs }
|
||||
};
|
||||
|
||||
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
|
||||
|
||||
#endif /* SUPPORT_UTF8 */
|
||||
|
||||
/* End of pcre_tables.c */
|
||||
137
Engine/lib/pcre/pcre_try_flipped.c
Normal file
137
Engine/lib/pcre/pcre_try_flipped.c
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function that tests a compiled pattern to
|
||||
see if it was compiled with the opposite endianness. If so, it uses an
|
||||
auxiliary local function to flip the appropriate bytes. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Flip bytes in an integer *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called when the magic number in a regex doesn't match, in
|
||||
order to flip its bytes to see if we are dealing with a pattern that was
|
||||
compiled on a host of different endianness. If so, this function is used to
|
||||
flip other byte values.
|
||||
|
||||
Arguments:
|
||||
value the number to flip
|
||||
n the number of bytes to flip (assumed to be 2 or 4)
|
||||
|
||||
Returns: the flipped value
|
||||
*/
|
||||
|
||||
static unsigned long int
|
||||
byteflip(unsigned long int value, int n)
|
||||
{
|
||||
if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
|
||||
return ((value & 0x000000ff) << 24) |
|
||||
((value & 0x0000ff00) << 8) |
|
||||
((value & 0x00ff0000) >> 8) |
|
||||
((value & 0xff000000) >> 24);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Test for a byte-flipped compiled regex *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called from pcre_exec(), pcre_dfa_exec(), and also from
|
||||
pcre_fullinfo(). Its job is to test whether the regex is byte-flipped - that
|
||||
is, it was compiled on a system of opposite endianness. The function is called
|
||||
only when the native MAGIC_NUMBER test fails. If the regex is indeed flipped,
|
||||
we flip all the relevant values into a different data block, and return it.
|
||||
|
||||
Arguments:
|
||||
re points to the regex
|
||||
study points to study data, or NULL
|
||||
internal_re points to a new regex block
|
||||
internal_study points to a new study block
|
||||
|
||||
Returns: the new block if is is indeed a byte-flipped regex
|
||||
NULL if it is not
|
||||
*/
|
||||
|
||||
real_pcre *
|
||||
_pcre_try_flipped(const real_pcre *re, real_pcre *internal_re,
|
||||
const pcre_study_data *study, pcre_study_data *internal_study)
|
||||
{
|
||||
if (byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER)
|
||||
return NULL;
|
||||
|
||||
*internal_re = *re; /* To copy other fields */
|
||||
internal_re->size = byteflip(re->size, sizeof(re->size));
|
||||
internal_re->options = byteflip(re->options, sizeof(re->options));
|
||||
internal_re->flags = (pcre_uint16)byteflip(re->flags, sizeof(re->flags));
|
||||
internal_re->top_bracket =
|
||||
(pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket));
|
||||
internal_re->top_backref =
|
||||
(pcre_uint16)byteflip(re->top_backref, sizeof(re->top_backref));
|
||||
internal_re->first_byte =
|
||||
(pcre_uint16)byteflip(re->first_byte, sizeof(re->first_byte));
|
||||
internal_re->req_byte =
|
||||
(pcre_uint16)byteflip(re->req_byte, sizeof(re->req_byte));
|
||||
internal_re->name_table_offset =
|
||||
(pcre_uint16)byteflip(re->name_table_offset, sizeof(re->name_table_offset));
|
||||
internal_re->name_entry_size =
|
||||
(pcre_uint16)byteflip(re->name_entry_size, sizeof(re->name_entry_size));
|
||||
internal_re->name_count =
|
||||
(pcre_uint16)byteflip(re->name_count, sizeof(re->name_count));
|
||||
|
||||
if (study != NULL)
|
||||
{
|
||||
*internal_study = *study; /* To copy other fields */
|
||||
internal_study->size = byteflip(study->size, sizeof(study->size));
|
||||
internal_study->options = byteflip(study->options, sizeof(study->options));
|
||||
}
|
||||
|
||||
return internal_re;
|
||||
}
|
||||
|
||||
/* End of pcre_tryflipped.c */
|
||||
179
Engine/lib/pcre/pcre_ucp_searchfuncs.c
Normal file
179
Engine/lib/pcre/pcre_ucp_searchfuncs.c
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains code for searching the table of Unicode character
|
||||
properties. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#include "ucp.h" /* Category definitions */
|
||||
#include "ucpinternal.h" /* Internal table details */
|
||||
#include "ucptable.h" /* The table itself */
|
||||
|
||||
|
||||
/* Table to translate from particular type value to the general value. */
|
||||
|
||||
static const int ucp_gentype[] = {
|
||||
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
|
||||
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
|
||||
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
|
||||
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
|
||||
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
|
||||
ucp_P, ucp_P, /* Ps, Po */
|
||||
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
|
||||
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Search table and return type *
|
||||
*************************************************/
|
||||
|
||||
/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
|
||||
character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
|
||||
|
||||
Arguments:
|
||||
c the character value
|
||||
type_ptr the detailed character type is returned here
|
||||
script_ptr the script is returned here
|
||||
|
||||
Returns: the character type category
|
||||
*/
|
||||
|
||||
int
|
||||
_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
|
||||
{
|
||||
int bot = 0;
|
||||
int top = sizeof(ucp_table)/sizeof(cnode);
|
||||
int mid;
|
||||
|
||||
/* The table is searched using a binary chop. You might think that using
|
||||
intermediate variables to hold some of the common expressions would speed
|
||||
things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
|
||||
makes things a lot slower. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (top <= bot)
|
||||
{
|
||||
*type_ptr = ucp_Cn;
|
||||
*script_ptr = ucp_Common;
|
||||
return ucp_C;
|
||||
}
|
||||
mid = (bot + top) >> 1;
|
||||
if (c == (ucp_table[mid].f0 & f0_charmask)) break;
|
||||
if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
|
||||
else
|
||||
{
|
||||
if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
|
||||
c <= (ucp_table[mid].f0 & f0_charmask) +
|
||||
(ucp_table[mid].f1 & f1_rangemask)) break;
|
||||
bot = mid + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Found an entry in the table. Set the script and detailed type values, and
|
||||
return the general type. */
|
||||
|
||||
*script_ptr = (ucp_table[mid].f0 & f0_scriptmask) >> f0_scriptshift;
|
||||
*type_ptr = (ucp_table[mid].f1 & f1_typemask) >> f1_typeshift;
|
||||
|
||||
return ucp_gentype[*type_ptr];
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Search table and return other case *
|
||||
*************************************************/
|
||||
|
||||
/* If the given character is a letter, and there is another case for the
|
||||
letter, return the other case. Otherwise, return -1.
|
||||
|
||||
Arguments:
|
||||
c the character value
|
||||
|
||||
Returns: the other case or NOTACHAR if none
|
||||
*/
|
||||
|
||||
unsigned int
|
||||
_pcre_ucp_othercase(const unsigned int c)
|
||||
{
|
||||
int bot = 0;
|
||||
int top = sizeof(ucp_table)/sizeof(cnode);
|
||||
int mid, offset;
|
||||
|
||||
/* The table is searched using a binary chop. You might think that using
|
||||
intermediate variables to hold some of the common expressions would speed
|
||||
things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
|
||||
makes things a lot slower. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (top <= bot) return -1;
|
||||
mid = (bot + top) >> 1;
|
||||
if (c == (ucp_table[mid].f0 & f0_charmask)) break;
|
||||
if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
|
||||
else
|
||||
{
|
||||
if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
|
||||
c <= (ucp_table[mid].f0 & f0_charmask) +
|
||||
(ucp_table[mid].f1 & f1_rangemask)) break;
|
||||
bot = mid + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Found an entry in the table. Return NOTACHAR for a range entry. Otherwise
|
||||
return the other case if there is one, else NOTACHAR. */
|
||||
|
||||
if ((ucp_table[mid].f0 & f0_rangeflag) != 0) return NOTACHAR;
|
||||
|
||||
offset = ucp_table[mid].f1 & f1_casemask;
|
||||
if ((offset & f1_caseneg) != 0) offset |= f1_caseneg;
|
||||
return (offset == 0)? NOTACHAR : c + offset;
|
||||
}
|
||||
|
||||
|
||||
/* End of pcre_ucp_searchfuncs.c */
|
||||
162
Engine/lib/pcre/pcre_valid_utf8.c
Normal file
162
Engine/lib/pcre/pcre_valid_utf8.c
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function for validating UTF-8 character
|
||||
strings. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Validate a UTF-8 string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called (optionally) at the start of compile or match, to
|
||||
validate that a supposed UTF-8 string is actually valid. The early check means
|
||||
that subsequent code can assume it is dealing with a valid string. The check
|
||||
can be turned off for maximum performance, but the consequences of supplying
|
||||
an invalid string are then undefined.
|
||||
|
||||
Originally, this function checked according to RFC 2279, allowing for values in
|
||||
the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were in
|
||||
the canonical format. Once somebody had pointed out RFC 3629 to me (it
|
||||
obsoletes 2279), additional restrictions were applied. The values are now
|
||||
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
|
||||
subrange 0xd000 to 0xdfff is excluded.
|
||||
|
||||
Arguments:
|
||||
string points to the string
|
||||
length length of string, or -1 if the string is zero-terminated
|
||||
|
||||
Returns: < 0 if the string is a valid UTF-8 string
|
||||
>= 0 otherwise; the value is the offset of the bad byte
|
||||
*/
|
||||
|
||||
int
|
||||
_pcre_valid_utf8(const uschar *string, int length)
|
||||
{
|
||||
#ifdef SUPPORT_UTF8
|
||||
register const uschar *p;
|
||||
|
||||
if (length < 0)
|
||||
{
|
||||
for (p = string; *p != 0; p++);
|
||||
length = p - string;
|
||||
}
|
||||
|
||||
for (p = string; length-- > 0; p++)
|
||||
{
|
||||
register int ab;
|
||||
register int c = *p;
|
||||
if (c < 128) continue;
|
||||
if (c < 0xc0) return p - string;
|
||||
ab = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||
if (length < ab || ab > 3) return p - string;
|
||||
length -= ab;
|
||||
|
||||
/* Check top bits in the second byte */
|
||||
if ((*(++p) & 0xc0) != 0x80) return p - string;
|
||||
|
||||
/* Check for overlong sequences for each different length, and for the
|
||||
excluded range 0xd000 to 0xdfff. */
|
||||
|
||||
switch (ab)
|
||||
{
|
||||
/* Check for xx00 000x (overlong sequence) */
|
||||
|
||||
case 1:
|
||||
if ((c & 0x3e) == 0) return p - string;
|
||||
continue; /* We know there aren't any more bytes to check */
|
||||
|
||||
/* Check for 1110 0000, xx0x xxxx (overlong sequence) or
|
||||
1110 1101, 1010 xxxx (0xd000 - 0xdfff) */
|
||||
|
||||
case 2:
|
||||
if ((c == 0xe0 && (*p & 0x20) == 0) ||
|
||||
(c == 0xed && *p >= 0xa0))
|
||||
return p - string;
|
||||
break;
|
||||
|
||||
/* Check for 1111 0000, xx00 xxxx (overlong sequence) or
|
||||
greater than 0x0010ffff (f4 8f bf bf) */
|
||||
|
||||
case 3:
|
||||
if ((c == 0xf0 && (*p & 0x30) == 0) ||
|
||||
(c > 0xf4 ) ||
|
||||
(c == 0xf4 && *p > 0x8f))
|
||||
return p - string;
|
||||
break;
|
||||
|
||||
#if 0
|
||||
/* These cases can no longer occur, as we restrict to a maximum of four
|
||||
bytes nowadays. Leave the code here in case we ever want to add an option
|
||||
for longer sequences. */
|
||||
|
||||
/* Check for 1111 1000, xx00 0xxx */
|
||||
case 4:
|
||||
if (c == 0xf8 && (*p & 0x38) == 0) return p - string;
|
||||
break;
|
||||
|
||||
/* Check for leading 0xfe or 0xff, and then for 1111 1100, xx00 00xx */
|
||||
case 5:
|
||||
if (c == 0xfe || c == 0xff ||
|
||||
(c == 0xfc && (*p & 0x3c) == 0)) return p - string;
|
||||
break;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
/* Check for valid bytes after the 2nd, if any; all must start 10 */
|
||||
while (--ab > 0)
|
||||
{
|
||||
if ((*(++p) & 0xc0) != 0x80) return p - string;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* End of pcre_valid_utf8.c */
|
||||
90
Engine/lib/pcre/pcre_version.c
Normal file
90
Engine/lib/pcre/pcre_version.c
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_version(), which returns a
|
||||
string that identifies the PCRE version that is in use. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return version string *
|
||||
*************************************************/
|
||||
|
||||
/* These macros are the standard way of turning unquoted text into C strings.
|
||||
They allow macros like PCRE_MAJOR to be defined without quotes, which is
|
||||
convenient for user programs that want to test its value. */
|
||||
|
||||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
/* A problem turned up with PCRE_PRERELEASE, which is defined empty for
|
||||
production releases. Originally, it was used naively in this code:
|
||||
|
||||
return XSTRING(PCRE_MAJOR)
|
||||
"." XSTRING(PCRE_MINOR)
|
||||
XSTRING(PCRE_PRERELEASE)
|
||||
" " XSTRING(PCRE_DATE);
|
||||
|
||||
However, when PCRE_PRERELEASE is empty, this leads to an attempted expansion of
|
||||
STRING(). The C standard states: "If (before argument substitution) any
|
||||
argument consists of no preprocessing tokens, the behavior is undefined." It
|
||||
turns out the gcc treats this case as a single empty string - which is what we
|
||||
really want - but Visual C grumbles about the lack of an argument for the
|
||||
macro. Unfortunately, both are within their rights. To cope with both ways of
|
||||
handling this, I had resort to some messy hackery that does a test at run time.
|
||||
I could find no way of detecting that a macro is defined as an empty string at
|
||||
pre-processor time. This hack uses a standard trick for avoiding calling
|
||||
the STRING macro with an empty argument when doing the test. */
|
||||
|
||||
PCRE_EXP_DEFN const char *
|
||||
pcre_version(void)
|
||||
{
|
||||
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
|
||||
XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
|
||||
XSTRING(PCRE_MAJOR.PCRE_MINOR) XSTRING(PCRE_PRERELEASE PCRE_DATE);
|
||||
}
|
||||
|
||||
/* End of pcre_version.c */
|
||||
148
Engine/lib/pcre/pcre_xclass.c
Normal file
148
Engine/lib/pcre/pcre_xclass.c
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function that is used to match an extended
|
||||
class (one that contains characters whose values are > 255). It is used by both
|
||||
pcre_exec() and pcre_def_exec(). */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match character against an XCLASS *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called to match a character against an extended class that
|
||||
might contain values > 255.
|
||||
|
||||
Arguments:
|
||||
c the character
|
||||
data points to the flag byte of the XCLASS data
|
||||
|
||||
Returns: TRUE if character matches, else FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
_pcre_xclass(int c, const uschar *data)
|
||||
{
|
||||
int t;
|
||||
BOOL negated = (*data & XCL_NOT) != 0;
|
||||
|
||||
/* Character values < 256 are matched against a bitmap, if one is present. If
|
||||
not, we still carry on, because there may be ranges that start below 256 in the
|
||||
additional data. */
|
||||
|
||||
if (c < 256)
|
||||
{
|
||||
if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
|
||||
return !negated; /* char found */
|
||||
}
|
||||
|
||||
/* First skip the bit map if present. Then match against the list of Unicode
|
||||
properties or large chars or ranges that end with a large char. We won't ever
|
||||
encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
|
||||
|
||||
if ((*data++ & XCL_MAP) != 0) data += 32;
|
||||
|
||||
while ((t = *data++) != XCL_END)
|
||||
{
|
||||
int x, y;
|
||||
if (t == XCL_SINGLE)
|
||||
{
|
||||
GETCHARINC(x, data);
|
||||
if (c == x) return !negated;
|
||||
}
|
||||
else if (t == XCL_RANGE)
|
||||
{
|
||||
GETCHARINC(x, data);
|
||||
GETCHARINC(y, data);
|
||||
if (c >= x && c <= y) return !negated;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UCP
|
||||
else /* XCL_PROP & XCL_NOTPROP */
|
||||
{
|
||||
int chartype, script;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
|
||||
switch(*data)
|
||||
{
|
||||
case PT_ANY:
|
||||
if (t == XCL_PROP) return !negated;
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
if ((chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt) ==
|
||||
(t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
if ((data[1] == category) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
if ((data[1] == script) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
/* This should never occur, but compilers may mutter if there is no
|
||||
default. */
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
data += 2;
|
||||
}
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
|
||||
return negated; /* char did not match */
|
||||
}
|
||||
|
||||
/* End of pcre_xclass.c */
|
||||
871
Engine/lib/pcre/pcrecpp.cc
Normal file
871
Engine/lib/pcre/pcrecpp.cc
Normal file
|
|
@ -0,0 +1,871 @@
|
|||
// Copyright (c) 2005, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: Sanjay Ghemawat
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <limits.h> /* for SHRT_MIN, USHRT_MAX, etc */
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
|
||||
#include "pcrecpp_internal.h"
|
||||
#include "pcre.h"
|
||||
#include "pcrecpp.h"
|
||||
#include "pcre_stringpiece.h"
|
||||
|
||||
|
||||
namespace pcrecpp {
|
||||
|
||||
// Maximum number of args we can set
|
||||
static const int kMaxArgs = 16;
|
||||
static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace
|
||||
|
||||
// Special object that stands-in for no argument
|
||||
Arg RE::no_arg((void*)NULL);
|
||||
|
||||
// If a regular expression has no error, its error_ field points here
|
||||
static const string empty_string;
|
||||
|
||||
// If the user doesn't ask for any options, we just use this one
|
||||
static RE_Options default_options;
|
||||
|
||||
void RE::Init(const string& pat, const RE_Options* options) {
|
||||
pattern_ = pat;
|
||||
if (options == NULL) {
|
||||
options_ = default_options;
|
||||
} else {
|
||||
options_ = *options;
|
||||
}
|
||||
error_ = &empty_string;
|
||||
re_full_ = NULL;
|
||||
re_partial_ = NULL;
|
||||
|
||||
re_partial_ = Compile(UNANCHORED);
|
||||
if (re_partial_ != NULL) {
|
||||
re_full_ = Compile(ANCHOR_BOTH);
|
||||
}
|
||||
}
|
||||
|
||||
void RE::Cleanup() {
|
||||
if (re_full_ != NULL) (*pcre_free)(re_full_);
|
||||
if (re_partial_ != NULL) (*pcre_free)(re_partial_);
|
||||
if (error_ != &empty_string) delete error_;
|
||||
}
|
||||
|
||||
|
||||
RE::~RE() {
|
||||
Cleanup();
|
||||
}
|
||||
|
||||
|
||||
pcre* RE::Compile(Anchor anchor) {
|
||||
// First, convert RE_Options into pcre options
|
||||
int pcre_options = 0;
|
||||
pcre_options = options_.all_options();
|
||||
|
||||
// Special treatment for anchoring. This is needed because at
|
||||
// runtime pcre only provides an option for anchoring at the
|
||||
// beginning of a string (unless you use offset).
|
||||
//
|
||||
// There are three types of anchoring we want:
|
||||
// UNANCHORED Compile the original pattern, and use
|
||||
// a pcre unanchored match.
|
||||
// ANCHOR_START Compile the original pattern, and use
|
||||
// a pcre anchored match.
|
||||
// ANCHOR_BOTH Tack a "\z" to the end of the original pattern
|
||||
// and use a pcre anchored match.
|
||||
|
||||
const char* compile_error;
|
||||
int eoffset;
|
||||
pcre* re;
|
||||
if (anchor != ANCHOR_BOTH) {
|
||||
re = pcre_compile(pattern_.c_str(), pcre_options,
|
||||
&compile_error, &eoffset, NULL);
|
||||
} else {
|
||||
// Tack a '\z' at the end of RE. Parenthesize it first so that
|
||||
// the '\z' applies to all top-level alternatives in the regexp.
|
||||
string wrapped = "(?:"; // A non-counting grouping operator
|
||||
wrapped += pattern_;
|
||||
wrapped += ")\\z";
|
||||
re = pcre_compile(wrapped.c_str(), pcre_options,
|
||||
&compile_error, &eoffset, NULL);
|
||||
}
|
||||
if (re == NULL) {
|
||||
if (error_ == &empty_string) error_ = new string(compile_error);
|
||||
}
|
||||
return re;
|
||||
}
|
||||
|
||||
/***** Matching interfaces *****/
|
||||
|
||||
bool RE::FullMatch(const StringPiece& text,
|
||||
const Arg& ptr1,
|
||||
const Arg& ptr2,
|
||||
const Arg& ptr3,
|
||||
const Arg& ptr4,
|
||||
const Arg& ptr5,
|
||||
const Arg& ptr6,
|
||||
const Arg& ptr7,
|
||||
const Arg& ptr8,
|
||||
const Arg& ptr9,
|
||||
const Arg& ptr10,
|
||||
const Arg& ptr11,
|
||||
const Arg& ptr12,
|
||||
const Arg& ptr13,
|
||||
const Arg& ptr14,
|
||||
const Arg& ptr15,
|
||||
const Arg& ptr16) const {
|
||||
const Arg* args[kMaxArgs];
|
||||
int n = 0;
|
||||
if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
|
||||
if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
|
||||
if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
|
||||
if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
|
||||
if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
|
||||
if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
|
||||
if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
|
||||
if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
|
||||
if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
|
||||
if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
|
||||
if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
|
||||
if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
|
||||
if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
|
||||
if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
|
||||
if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
|
||||
if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
|
||||
done:
|
||||
|
||||
int consumed;
|
||||
int vec[kVecSize];
|
||||
return DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize);
|
||||
}
|
||||
|
||||
bool RE::PartialMatch(const StringPiece& text,
|
||||
const Arg& ptr1,
|
||||
const Arg& ptr2,
|
||||
const Arg& ptr3,
|
||||
const Arg& ptr4,
|
||||
const Arg& ptr5,
|
||||
const Arg& ptr6,
|
||||
const Arg& ptr7,
|
||||
const Arg& ptr8,
|
||||
const Arg& ptr9,
|
||||
const Arg& ptr10,
|
||||
const Arg& ptr11,
|
||||
const Arg& ptr12,
|
||||
const Arg& ptr13,
|
||||
const Arg& ptr14,
|
||||
const Arg& ptr15,
|
||||
const Arg& ptr16) const {
|
||||
const Arg* args[kMaxArgs];
|
||||
int n = 0;
|
||||
if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
|
||||
if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
|
||||
if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
|
||||
if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
|
||||
if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
|
||||
if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
|
||||
if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
|
||||
if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
|
||||
if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
|
||||
if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
|
||||
if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
|
||||
if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
|
||||
if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
|
||||
if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
|
||||
if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
|
||||
if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
|
||||
done:
|
||||
|
||||
int consumed;
|
||||
int vec[kVecSize];
|
||||
return DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize);
|
||||
}
|
||||
|
||||
bool RE::Consume(StringPiece* input,
|
||||
const Arg& ptr1,
|
||||
const Arg& ptr2,
|
||||
const Arg& ptr3,
|
||||
const Arg& ptr4,
|
||||
const Arg& ptr5,
|
||||
const Arg& ptr6,
|
||||
const Arg& ptr7,
|
||||
const Arg& ptr8,
|
||||
const Arg& ptr9,
|
||||
const Arg& ptr10,
|
||||
const Arg& ptr11,
|
||||
const Arg& ptr12,
|
||||
const Arg& ptr13,
|
||||
const Arg& ptr14,
|
||||
const Arg& ptr15,
|
||||
const Arg& ptr16) const {
|
||||
const Arg* args[kMaxArgs];
|
||||
int n = 0;
|
||||
if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
|
||||
if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
|
||||
if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
|
||||
if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
|
||||
if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
|
||||
if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
|
||||
if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
|
||||
if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
|
||||
if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
|
||||
if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
|
||||
if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
|
||||
if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
|
||||
if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
|
||||
if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
|
||||
if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
|
||||
if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
|
||||
done:
|
||||
|
||||
int consumed;
|
||||
int vec[kVecSize];
|
||||
if (DoMatchImpl(*input, ANCHOR_START, &consumed,
|
||||
args, n, vec, kVecSize)) {
|
||||
input->remove_prefix(consumed);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool RE::FindAndConsume(StringPiece* input,
|
||||
const Arg& ptr1,
|
||||
const Arg& ptr2,
|
||||
const Arg& ptr3,
|
||||
const Arg& ptr4,
|
||||
const Arg& ptr5,
|
||||
const Arg& ptr6,
|
||||
const Arg& ptr7,
|
||||
const Arg& ptr8,
|
||||
const Arg& ptr9,
|
||||
const Arg& ptr10,
|
||||
const Arg& ptr11,
|
||||
const Arg& ptr12,
|
||||
const Arg& ptr13,
|
||||
const Arg& ptr14,
|
||||
const Arg& ptr15,
|
||||
const Arg& ptr16) const {
|
||||
const Arg* args[kMaxArgs];
|
||||
int n = 0;
|
||||
if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
|
||||
if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
|
||||
if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
|
||||
if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
|
||||
if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
|
||||
if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
|
||||
if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
|
||||
if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
|
||||
if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
|
||||
if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
|
||||
if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
|
||||
if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
|
||||
if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
|
||||
if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
|
||||
if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
|
||||
if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
|
||||
done:
|
||||
|
||||
int consumed;
|
||||
int vec[kVecSize];
|
||||
if (DoMatchImpl(*input, UNANCHORED, &consumed,
|
||||
args, n, vec, kVecSize)) {
|
||||
input->remove_prefix(consumed);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool RE::Replace(const StringPiece& rewrite,
|
||||
string *str) const {
|
||||
int vec[kVecSize];
|
||||
int matches = TryMatch(*str, 0, UNANCHORED, vec, kVecSize);
|
||||
if (matches == 0)
|
||||
return false;
|
||||
|
||||
string s;
|
||||
if (!Rewrite(&s, rewrite, *str, vec, matches))
|
||||
return false;
|
||||
|
||||
assert(vec[0] >= 0);
|
||||
assert(vec[1] >= 0);
|
||||
str->replace(vec[0], vec[1] - vec[0], s);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF.
|
||||
// Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF.
|
||||
// Modified by PH to add PCRE_NEWLINE_ANY and PCRE_NEWLINE_ANYCRLF.
|
||||
|
||||
static int NewlineMode(int pcre_options) {
|
||||
// TODO: if we can make it threadsafe, cache this var
|
||||
int newline_mode = 0;
|
||||
/* if (newline_mode) return newline_mode; */ // do this once it's cached
|
||||
if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|
|
||||
PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF)) {
|
||||
newline_mode = (pcre_options &
|
||||
(PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|
|
||||
PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF));
|
||||
} else {
|
||||
int newline;
|
||||
pcre_config(PCRE_CONFIG_NEWLINE, &newline);
|
||||
if (newline == 10)
|
||||
newline_mode = PCRE_NEWLINE_LF;
|
||||
else if (newline == 13)
|
||||
newline_mode = PCRE_NEWLINE_CR;
|
||||
else if (newline == 3338)
|
||||
newline_mode = PCRE_NEWLINE_CRLF;
|
||||
else if (newline == -1)
|
||||
newline_mode = PCRE_NEWLINE_ANY;
|
||||
else if (newline == -2)
|
||||
newline_mode = PCRE_NEWLINE_ANYCRLF;
|
||||
else
|
||||
assert("" == "Unexpected return value from pcre_config(NEWLINE)");
|
||||
}
|
||||
return newline_mode;
|
||||
}
|
||||
|
||||
int RE::GlobalReplace(const StringPiece& rewrite,
|
||||
string *str) const {
|
||||
int count = 0;
|
||||
int vec[kVecSize];
|
||||
string out;
|
||||
int start = 0;
|
||||
int lastend = -1;
|
||||
|
||||
while (start <= static_cast<int>(str->length())) {
|
||||
int matches = TryMatch(*str, start, UNANCHORED, vec, kVecSize);
|
||||
if (matches <= 0)
|
||||
break;
|
||||
int matchstart = vec[0], matchend = vec[1];
|
||||
assert(matchstart >= start);
|
||||
assert(matchend >= matchstart);
|
||||
if (matchstart == matchend && matchstart == lastend) {
|
||||
// advance one character if we matched an empty string at the same
|
||||
// place as the last match occurred
|
||||
matchend = start + 1;
|
||||
// If the current char is CR and we're in CRLF mode, skip LF too.
|
||||
// Note it's better to call pcre_fullinfo() than to examine
|
||||
// all_options(), since options_ could have changed bewteen
|
||||
// compile-time and now, but this is simpler and safe enough.
|
||||
// Modified by PH to add ANY and ANYCRLF.
|
||||
if (start+1 < static_cast<int>(str->length()) &&
|
||||
(*str)[start] == '\r' && (*str)[start+1] == '\n' &&
|
||||
(NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF ||
|
||||
NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY ||
|
||||
NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF)
|
||||
) {
|
||||
matchend++;
|
||||
}
|
||||
// We also need to advance more than one char if we're in utf8 mode.
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (options_.utf8()) {
|
||||
while (matchend < static_cast<int>(str->length()) &&
|
||||
((*str)[matchend] & 0xc0) == 0x80)
|
||||
matchend++;
|
||||
}
|
||||
#endif
|
||||
if (matchend <= static_cast<int>(str->length()))
|
||||
out.append(*str, start, matchend - start);
|
||||
start = matchend;
|
||||
} else {
|
||||
out.append(*str, start, matchstart - start);
|
||||
Rewrite(&out, rewrite, *str, vec, matches);
|
||||
start = matchend;
|
||||
lastend = matchend;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (count == 0)
|
||||
return 0;
|
||||
|
||||
if (start < static_cast<int>(str->length()))
|
||||
out.append(*str, start, str->length() - start);
|
||||
swap(out, *str);
|
||||
return count;
|
||||
}
|
||||
|
||||
bool RE::Extract(const StringPiece& rewrite,
|
||||
const StringPiece& text,
|
||||
string *out) const {
|
||||
int vec[kVecSize];
|
||||
int matches = TryMatch(text, 0, UNANCHORED, vec, kVecSize);
|
||||
if (matches == 0)
|
||||
return false;
|
||||
out->erase();
|
||||
return Rewrite(out, rewrite, text, vec, matches);
|
||||
}
|
||||
|
||||
/*static*/ string RE::QuoteMeta(const StringPiece& unquoted) {
|
||||
string result;
|
||||
|
||||
// Escape any ascii character not in [A-Za-z_0-9].
|
||||
//
|
||||
// Note that it's legal to escape a character even if it has no
|
||||
// special meaning in a regular expression -- so this function does
|
||||
// that. (This also makes it identical to the perl function of the
|
||||
// same name; see `perldoc -f quotemeta`.)
|
||||
for (int ii = 0; ii < unquoted.size(); ++ii) {
|
||||
// Note that using 'isalnum' here raises the benchmark time from
|
||||
// 32ns to 58ns:
|
||||
if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
|
||||
(unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
|
||||
(unquoted[ii] < '0' || unquoted[ii] > '9') &&
|
||||
unquoted[ii] != '_' &&
|
||||
// If this is the part of a UTF8 or Latin1 character, we need
|
||||
// to copy this byte without escaping. Experimentally this is
|
||||
// what works correctly with the regexp library.
|
||||
!(unquoted[ii] & 128)) {
|
||||
result += '\\';
|
||||
}
|
||||
result += unquoted[ii];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/***** Actual matching and rewriting code *****/
|
||||
|
||||
int RE::TryMatch(const StringPiece& text,
|
||||
int startpos,
|
||||
Anchor anchor,
|
||||
int *vec,
|
||||
int vecsize) const {
|
||||
pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
|
||||
if (re == NULL) {
|
||||
//fprintf(stderr, "Matching against invalid re: %s\n", error_->c_str());
|
||||
return 0;
|
||||
}
|
||||
|
||||
pcre_extra extra = { 0, 0, 0, 0, 0, 0 };
|
||||
if (options_.match_limit() > 0) {
|
||||
extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
|
||||
extra.match_limit = options_.match_limit();
|
||||
}
|
||||
if (options_.match_limit_recursion() > 0) {
|
||||
extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
|
||||
extra.match_limit_recursion = options_.match_limit_recursion();
|
||||
}
|
||||
int rc = pcre_exec(re, // The regular expression object
|
||||
&extra,
|
||||
(text.data() == NULL) ? "" : text.data(),
|
||||
text.size(),
|
||||
startpos,
|
||||
(anchor == UNANCHORED) ? 0 : PCRE_ANCHORED,
|
||||
vec,
|
||||
vecsize);
|
||||
|
||||
// Handle errors
|
||||
if (rc == PCRE_ERROR_NOMATCH) {
|
||||
return 0;
|
||||
} else if (rc < 0) {
|
||||
//fprintf(stderr, "Unexpected return code: %d when matching '%s'\n",
|
||||
// re, pattern_.c_str());
|
||||
return 0;
|
||||
} else if (rc == 0) {
|
||||
// pcre_exec() returns 0 as a special case when the number of
|
||||
// capturing subpatterns exceeds the size of the vector.
|
||||
// When this happens, there is a match and the output vector
|
||||
// is filled, but we miss out on the positions of the extra subpatterns.
|
||||
rc = vecsize / 2;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
bool RE::DoMatchImpl(const StringPiece& text,
|
||||
Anchor anchor,
|
||||
int* consumed,
|
||||
const Arg* const* args,
|
||||
int n,
|
||||
int* vec,
|
||||
int vecsize) const {
|
||||
assert((1 + n) * 3 <= vecsize); // results + PCRE workspace
|
||||
int matches = TryMatch(text, 0, anchor, vec, vecsize);
|
||||
assert(matches >= 0); // TryMatch never returns negatives
|
||||
if (matches == 0)
|
||||
return false;
|
||||
|
||||
*consumed = vec[1];
|
||||
|
||||
if (n == 0 || args == NULL) {
|
||||
// We are not interested in results
|
||||
return true;
|
||||
}
|
||||
|
||||
if (NumberOfCapturingGroups() < n) {
|
||||
// RE has fewer capturing groups than number of arg pointers passed in
|
||||
return false;
|
||||
}
|
||||
|
||||
// If we got here, we must have matched the whole pattern.
|
||||
// We do not need (can not do) any more checks on the value of 'matches' here
|
||||
// -- see the comment for TryMatch.
|
||||
for (int i = 0; i < n; i++) {
|
||||
const int start = vec[2*(i+1)];
|
||||
const int limit = vec[2*(i+1)+1];
|
||||
if (!args[i]->Parse(text.data() + start, limit-start)) {
|
||||
// TODO: Should we indicate what the error was?
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RE::DoMatch(const StringPiece& text,
|
||||
Anchor anchor,
|
||||
int* consumed,
|
||||
const Arg* const args[],
|
||||
int n) const {
|
||||
assert(n >= 0);
|
||||
size_t const vecsize = (1 + n) * 3; // results + PCRE workspace
|
||||
// (as for kVecSize)
|
||||
int space[21]; // use stack allocation for small vecsize (common case)
|
||||
int* vec = vecsize <= 21 ? space : new int[vecsize];
|
||||
bool retval = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize);
|
||||
if (vec != space) delete [] vec;
|
||||
return retval;
|
||||
}
|
||||
|
||||
bool RE::Rewrite(string *out, const StringPiece &rewrite,
|
||||
const StringPiece &text, int *vec, int veclen) const {
|
||||
for (const char *s = rewrite.data(), *end = s + rewrite.size();
|
||||
s < end; s++) {
|
||||
int c = *s;
|
||||
if (c == '\\') {
|
||||
c = *++s;
|
||||
if (isdigit(c)) {
|
||||
int n = (c - '0');
|
||||
if (n >= veclen) {
|
||||
//fprintf(stderr, requested group %d in regexp %.*s\n",
|
||||
// n, rewrite.size(), rewrite.data());
|
||||
return false;
|
||||
}
|
||||
int start = vec[2 * n];
|
||||
if (start >= 0)
|
||||
out->append(text.data() + start, vec[2 * n + 1] - start);
|
||||
} else if (c == '\\') {
|
||||
out->push_back('\\');
|
||||
} else {
|
||||
//fprintf(stderr, "invalid rewrite pattern: %.*s\n",
|
||||
// rewrite.size(), rewrite.data());
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
out->push_back(c);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Return the number of capturing subpatterns, or -1 if the
|
||||
// regexp wasn't valid on construction.
|
||||
int RE::NumberOfCapturingGroups() const {
|
||||
if (re_partial_ == NULL) return -1;
|
||||
|
||||
int result;
|
||||
int pcre_retval = pcre_fullinfo(re_partial_, // The regular expression object
|
||||
NULL, // We did not study the pattern
|
||||
PCRE_INFO_CAPTURECOUNT,
|
||||
&result);
|
||||
assert(pcre_retval == 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
/***** Parsers for various types *****/
|
||||
|
||||
bool Arg::parse_null(const char* str, int n, void* dest) {
|
||||
// We fail if somebody asked us to store into a non-NULL void* pointer
|
||||
return (dest == NULL);
|
||||
}
|
||||
|
||||
bool Arg::parse_string(const char* str, int n, void* dest) {
|
||||
if (dest == NULL) return true;
|
||||
reinterpret_cast<string*>(dest)->assign(str, n);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_stringpiece(const char* str, int n, void* dest) {
|
||||
if (dest == NULL) return true;
|
||||
reinterpret_cast<StringPiece*>(dest)->set(str, n);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_char(const char* str, int n, void* dest) {
|
||||
if (n != 1) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<char*>(dest)) = str[0];
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_uchar(const char* str, int n, void* dest) {
|
||||
if (n != 1) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<unsigned char*>(dest)) = str[0];
|
||||
return true;
|
||||
}
|
||||
|
||||
// Largest number spec that we are willing to parse
|
||||
static const int kMaxNumberLength = 32;
|
||||
|
||||
// REQUIRES "buf" must have length at least kMaxNumberLength+1
|
||||
// REQUIRES "n > 0"
|
||||
// Copies "str" into "buf" and null-terminates if necessary.
|
||||
// Returns one of:
|
||||
// a. "str" if no termination is needed
|
||||
// b. "buf" if the string was copied and null-terminated
|
||||
// c. "" if the input was invalid and has no hope of being parsed
|
||||
static const char* TerminateNumber(char* buf, const char* str, int n) {
|
||||
if ((n > 0) && isspace(*str)) {
|
||||
// We are less forgiving than the strtoxxx() routines and do not
|
||||
// allow leading spaces.
|
||||
return "";
|
||||
}
|
||||
|
||||
// See if the character right after the input text may potentially
|
||||
// look like a digit.
|
||||
if (isdigit(str[n]) ||
|
||||
((str[n] >= 'a') && (str[n] <= 'f')) ||
|
||||
((str[n] >= 'A') && (str[n] <= 'F'))) {
|
||||
if (n > kMaxNumberLength) return ""; // Input too big to be a valid number
|
||||
memcpy(buf, str, n);
|
||||
buf[n] = '\0';
|
||||
return buf;
|
||||
} else {
|
||||
// We can parse right out of the supplied string, so return it.
|
||||
return str;
|
||||
}
|
||||
}
|
||||
|
||||
bool Arg::parse_long_radix(const char* str,
|
||||
int n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
if (n == 0) return false;
|
||||
char buf[kMaxNumberLength+1];
|
||||
str = TerminateNumber(buf, str, n);
|
||||
char* end;
|
||||
errno = 0;
|
||||
long r = strtol(str, &end, radix);
|
||||
if (end != str + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<long*>(dest)) = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_ulong_radix(const char* str,
|
||||
int n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
if (n == 0) return false;
|
||||
char buf[kMaxNumberLength+1];
|
||||
str = TerminateNumber(buf, str, n);
|
||||
if (str[0] == '-') return false; // strtoul() on a negative number?!
|
||||
char* end;
|
||||
errno = 0;
|
||||
unsigned long r = strtoul(str, &end, radix);
|
||||
if (end != str + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<unsigned long*>(dest)) = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_short_radix(const char* str,
|
||||
int n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
long r;
|
||||
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
|
||||
if (r < SHRT_MIN || r > SHRT_MAX) return false; // Out of range
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<short*>(dest)) = static_cast<short>(r);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_ushort_radix(const char* str,
|
||||
int n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
unsigned long r;
|
||||
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
|
||||
if (r > USHRT_MAX) return false; // Out of range
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<unsigned short*>(dest)) = static_cast<unsigned short>(r);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_int_radix(const char* str,
|
||||
int n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
long r;
|
||||
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
|
||||
if (r < INT_MIN || r > INT_MAX) return false; // Out of range
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<int*>(dest)) = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_uint_radix(const char* str,
|
||||
int n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
unsigned long r;
|
||||
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
|
||||
if (r > UINT_MAX) return false; // Out of range
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<unsigned int*>(dest)) = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_longlong_radix(const char* str,
|
||||
int n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
#ifndef HAVE_LONG_LONG
|
||||
return false;
|
||||
#else
|
||||
if (n == 0) return false;
|
||||
char buf[kMaxNumberLength+1];
|
||||
str = TerminateNumber(buf, str, n);
|
||||
char* end;
|
||||
errno = 0;
|
||||
#if defined HAVE_STRTOQ
|
||||
long long r = strtoq(str, &end, radix);
|
||||
#elif defined HAVE_STRTOLL
|
||||
long long r = strtoll(str, &end, radix);
|
||||
#elif defined HAVE__STRTOI64
|
||||
long long r = _strtoi64(str, &end, radix);
|
||||
#else
|
||||
#error parse_longlong_radix: cannot convert input to a long-long
|
||||
#endif
|
||||
if (end != str + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<long long*>(dest)) = r;
|
||||
return true;
|
||||
#endif /* HAVE_LONG_LONG */
|
||||
}
|
||||
|
||||
bool Arg::parse_ulonglong_radix(const char* str,
|
||||
int n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
#ifndef HAVE_UNSIGNED_LONG_LONG
|
||||
return false;
|
||||
#else
|
||||
if (n == 0) return false;
|
||||
char buf[kMaxNumberLength+1];
|
||||
str = TerminateNumber(buf, str, n);
|
||||
if (str[0] == '-') return false; // strtoull() on a negative number?!
|
||||
char* end;
|
||||
errno = 0;
|
||||
#if defined HAVE_STRTOQ
|
||||
unsigned long long r = strtouq(str, &end, radix);
|
||||
#elif defined HAVE_STRTOLL
|
||||
unsigned long long r = strtoull(str, &end, radix);
|
||||
#elif defined HAVE__STRTOI64
|
||||
unsigned long long r = _strtoui64(str, &end, radix);
|
||||
#else
|
||||
#error parse_ulonglong_radix: cannot convert input to a long-long
|
||||
#endif
|
||||
if (end != str + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<unsigned long long*>(dest)) = r;
|
||||
return true;
|
||||
#endif /* HAVE_UNSIGNED_LONG_LONG */
|
||||
}
|
||||
|
||||
bool Arg::parse_double(const char* str, int n, void* dest) {
|
||||
if (n == 0) return false;
|
||||
static const int kMaxLength = 200;
|
||||
char buf[kMaxLength];
|
||||
if (n >= kMaxLength) return false;
|
||||
memcpy(buf, str, n);
|
||||
buf[n] = '\0';
|
||||
errno = 0;
|
||||
char* end;
|
||||
double r = strtod(buf, &end);
|
||||
if (end != buf + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<double*>(dest)) = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_float(const char* str, int n, void* dest) {
|
||||
double r;
|
||||
if (!parse_double(str, n, &r)) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<float*>(dest)) = static_cast<float>(r);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
#define DEFINE_INTEGER_PARSERS(name) \
|
||||
bool Arg::parse_##name(const char* str, int n, void* dest) { \
|
||||
return parse_##name##_radix(str, n, dest, 10); \
|
||||
} \
|
||||
bool Arg::parse_##name##_hex(const char* str, int n, void* dest) { \
|
||||
return parse_##name##_radix(str, n, dest, 16); \
|
||||
} \
|
||||
bool Arg::parse_##name##_octal(const char* str, int n, void* dest) { \
|
||||
return parse_##name##_radix(str, n, dest, 8); \
|
||||
} \
|
||||
bool Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \
|
||||
return parse_##name##_radix(str, n, dest, 0); \
|
||||
}
|
||||
|
||||
DEFINE_INTEGER_PARSERS(short) /* */
|
||||
DEFINE_INTEGER_PARSERS(ushort) /* */
|
||||
DEFINE_INTEGER_PARSERS(int) /* Don't use semicolons after these */
|
||||
DEFINE_INTEGER_PARSERS(uint) /* statements because they can cause */
|
||||
DEFINE_INTEGER_PARSERS(long) /* compiler warnings if the checking */
|
||||
DEFINE_INTEGER_PARSERS(ulong) /* level is turned up high enough. */
|
||||
DEFINE_INTEGER_PARSERS(longlong) /* */
|
||||
DEFINE_INTEGER_PARSERS(ulonglong) /* */
|
||||
|
||||
#undef DEFINE_INTEGER_PARSERS
|
||||
|
||||
} // namespace pcrecpp
|
||||
700
Engine/lib/pcre/pcrecpp.h
Normal file
700
Engine/lib/pcre/pcrecpp.h
Normal file
|
|
@ -0,0 +1,700 @@
|
|||
// Copyright (c) 2005, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: Sanjay Ghemawat
|
||||
// Support for PCRE_XXX modifiers added by Giuseppe Maxia, July 2005
|
||||
|
||||
#ifndef _PCRECPP_H
|
||||
#define _PCRECPP_H
|
||||
|
||||
// C++ interface to the pcre regular-expression library. RE supports
|
||||
// Perl-style regular expressions (with extensions like \d, \w, \s,
|
||||
// ...).
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// REGEXP SYNTAX:
|
||||
//
|
||||
// This module is part of the pcre library and hence supports its syntax
|
||||
// for regular expressions.
|
||||
//
|
||||
// The syntax is pretty similar to Perl's. For those not familiar
|
||||
// with Perl's regular expressions, here are some examples of the most
|
||||
// commonly used extensions:
|
||||
//
|
||||
// "hello (\\w+) world" -- \w matches a "word" character
|
||||
// "version (\\d+)" -- \d matches a digit
|
||||
// "hello\\s+world" -- \s matches any whitespace character
|
||||
// "\\b(\\w+)\\b" -- \b matches empty string at a word boundary
|
||||
// "(?i)hello" -- (?i) turns on case-insensitive matching
|
||||
// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// MATCHING INTERFACE:
|
||||
//
|
||||
// The "FullMatch" operation checks that supplied text matches a
|
||||
// supplied pattern exactly.
|
||||
//
|
||||
// Example: successful match
|
||||
// pcrecpp::RE re("h.*o");
|
||||
// re.FullMatch("hello");
|
||||
//
|
||||
// Example: unsuccessful match (requires full match):
|
||||
// pcrecpp::RE re("e");
|
||||
// !re.FullMatch("hello");
|
||||
//
|
||||
// Example: creating a temporary RE object:
|
||||
// pcrecpp::RE("h.*o").FullMatch("hello");
|
||||
//
|
||||
// You can pass in a "const char*" or a "string" for "text". The
|
||||
// examples below tend to use a const char*.
|
||||
//
|
||||
// You can, as in the different examples above, store the RE object
|
||||
// explicitly in a variable or use a temporary RE object. The
|
||||
// examples below use one mode or the other arbitrarily. Either
|
||||
// could correctly be used for any of these examples.
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// MATCHING WITH SUB-STRING EXTRACTION:
|
||||
//
|
||||
// You can supply extra pointer arguments to extract matched subpieces.
|
||||
//
|
||||
// Example: extracts "ruby" into "s" and 1234 into "i"
|
||||
// int i;
|
||||
// string s;
|
||||
// pcrecpp::RE re("(\\w+):(\\d+)");
|
||||
// re.FullMatch("ruby:1234", &s, &i);
|
||||
//
|
||||
// Example: does not try to extract any extra sub-patterns
|
||||
// re.FullMatch("ruby:1234", &s);
|
||||
//
|
||||
// Example: does not try to extract into NULL
|
||||
// re.FullMatch("ruby:1234", NULL, &i);
|
||||
//
|
||||
// Example: integer overflow causes failure
|
||||
// !re.FullMatch("ruby:1234567891234", NULL, &i);
|
||||
//
|
||||
// Example: fails because there aren't enough sub-patterns:
|
||||
// !pcrecpp::RE("\\w+:\\d+").FullMatch("ruby:1234", &s);
|
||||
//
|
||||
// Example: fails because string cannot be stored in integer
|
||||
// !pcrecpp::RE("(.*)").FullMatch("ruby", &i);
|
||||
//
|
||||
// The provided pointer arguments can be pointers to any scalar numeric
|
||||
// type, or one of
|
||||
// string (matched piece is copied to string)
|
||||
// StringPiece (StringPiece is mutated to point to matched piece)
|
||||
// T (where "bool T::ParseFrom(const char*, int)" exists)
|
||||
// NULL (the corresponding matched sub-pattern is not copied)
|
||||
//
|
||||
// CAVEAT: An optional sub-pattern that does not exist in the matched
|
||||
// string is assigned the empty string. Therefore, the following will
|
||||
// return false (because the empty string is not a valid number):
|
||||
// int number;
|
||||
// pcrecpp::RE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// DO_MATCH
|
||||
//
|
||||
// The matching interface supports at most 16 arguments per call.
|
||||
// If you need more, consider using the more general interface
|
||||
// pcrecpp::RE::DoMatch(). See pcrecpp.h for the signature for DoMatch.
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// PARTIAL MATCHES
|
||||
//
|
||||
// You can use the "PartialMatch" operation when you want the pattern
|
||||
// to match any substring of the text.
|
||||
//
|
||||
// Example: simple search for a string:
|
||||
// pcrecpp::RE("ell").PartialMatch("hello");
|
||||
//
|
||||
// Example: find first number in a string:
|
||||
// int number;
|
||||
// pcrecpp::RE re("(\\d+)");
|
||||
// re.PartialMatch("x*100 + 20", &number);
|
||||
// assert(number == 100);
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// UTF-8 AND THE MATCHING INTERFACE:
|
||||
//
|
||||
// By default, pattern and text are plain text, one byte per character.
|
||||
// The UTF8 flag, passed to the constructor, causes both pattern
|
||||
// and string to be treated as UTF-8 text, still a byte stream but
|
||||
// potentially multiple bytes per character. In practice, the text
|
||||
// is likelier to be UTF-8 than the pattern, but the match returned
|
||||
// may depend on the UTF8 flag, so always use it when matching
|
||||
// UTF8 text. E.g., "." will match one byte normally but with UTF8
|
||||
// set may match up to three bytes of a multi-byte character.
|
||||
//
|
||||
// Example:
|
||||
// pcrecpp::RE_Options options;
|
||||
// options.set_utf8();
|
||||
// pcrecpp::RE re(utf8_pattern, options);
|
||||
// re.FullMatch(utf8_string);
|
||||
//
|
||||
// Example: using the convenience function UTF8():
|
||||
// pcrecpp::RE re(utf8_pattern, pcrecpp::UTF8());
|
||||
// re.FullMatch(utf8_string);
|
||||
//
|
||||
// NOTE: The UTF8 option is ignored if pcre was not configured with the
|
||||
// --enable-utf8 flag.
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE
|
||||
//
|
||||
// PCRE defines some modifiers to change the behavior of the regular
|
||||
// expression engine.
|
||||
// The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle
|
||||
// to pass such modifiers to a RE class.
|
||||
//
|
||||
// Currently, the following modifiers are supported
|
||||
//
|
||||
// modifier description Perl corresponding
|
||||
//
|
||||
// PCRE_CASELESS case insensitive match /i
|
||||
// PCRE_MULTILINE multiple lines match /m
|
||||
// PCRE_DOTALL dot matches newlines /s
|
||||
// PCRE_DOLLAR_ENDONLY $ matches only at end N/A
|
||||
// PCRE_EXTRA strict escape parsing N/A
|
||||
// PCRE_EXTENDED ignore whitespaces /x
|
||||
// PCRE_UTF8 handles UTF8 chars built-in
|
||||
// PCRE_UNGREEDY reverses * and *? N/A
|
||||
// PCRE_NO_AUTO_CAPTURE disables matching parens N/A (*)
|
||||
//
|
||||
// (For a full account on how each modifier works, please check the
|
||||
// PCRE API reference manual).
|
||||
//
|
||||
// (*) Both Perl and PCRE allow non matching parentheses by means of the
|
||||
// "?:" modifier within the pattern itself. e.g. (?:ab|cd) does not
|
||||
// capture, while (ab|cd) does.
|
||||
//
|
||||
// For each modifier, there are two member functions whose name is made
|
||||
// out of the modifier in lowercase, without the "PCRE_" prefix. For
|
||||
// instance, PCRE_CASELESS is handled by
|
||||
// bool caseless(),
|
||||
// which returns true if the modifier is set, and
|
||||
// RE_Options & set_caseless(bool),
|
||||
// which sets or unsets the modifier.
|
||||
//
|
||||
// Moreover, PCRE_EXTRA_MATCH_LIMIT can be accessed through the
|
||||
// set_match_limit() and match_limit() member functions.
|
||||
// Setting match_limit to a non-zero value will limit the executation of
|
||||
// pcre to keep it from doing bad things like blowing the stack or taking
|
||||
// an eternity to return a result. A value of 5000 is good enough to stop
|
||||
// stack blowup in a 2MB thread stack. Setting match_limit to zero will
|
||||
// disable match limiting. Alternately, you can set match_limit_recursion()
|
||||
// which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to limit how much pcre
|
||||
// recurses. match_limit() caps the number of matches pcre does;
|
||||
// match_limit_recrusion() caps the depth of recursion.
|
||||
//
|
||||
// Normally, to pass one or more modifiers to a RE class, you declare
|
||||
// a RE_Options object, set the appropriate options, and pass this
|
||||
// object to a RE constructor. Example:
|
||||
//
|
||||
// RE_options opt;
|
||||
// opt.set_caseless(true);
|
||||
//
|
||||
// if (RE("HELLO", opt).PartialMatch("hello world")) ...
|
||||
//
|
||||
// RE_options has two constructors. The default constructor takes no
|
||||
// arguments and creates a set of flags that are off by default.
|
||||
//
|
||||
// The optional parameter 'option_flags' is to facilitate transfer
|
||||
// of legacy code from C programs. This lets you do
|
||||
// RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
|
||||
//
|
||||
// But new code is better off doing
|
||||
// RE(pattern,
|
||||
// RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
|
||||
// (See below)
|
||||
//
|
||||
// If you are going to pass one of the most used modifiers, there are some
|
||||
// convenience functions that return a RE_Options class with the
|
||||
// appropriate modifier already set:
|
||||
// CASELESS(), UTF8(), MULTILINE(), DOTALL(), EXTENDED()
|
||||
//
|
||||
// If you need to set several options at once, and you don't want to go
|
||||
// through the pains of declaring a RE_Options object and setting several
|
||||
// options, there is a parallel method that give you such ability on the
|
||||
// fly. You can concatenate several set_xxxxx member functions, since each
|
||||
// of them returns a reference to its class object. e.g.: to pass
|
||||
// PCRE_CASELESS, PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one
|
||||
// statement, you may write
|
||||
//
|
||||
// RE(" ^ xyz \\s+ .* blah$", RE_Options()
|
||||
// .set_caseless(true)
|
||||
// .set_extended(true)
|
||||
// .set_multiline(true)).PartialMatch(sometext);
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// SCANNING TEXT INCREMENTALLY
|
||||
//
|
||||
// The "Consume" operation may be useful if you want to repeatedly
|
||||
// match regular expressions at the front of a string and skip over
|
||||
// them as they match. This requires use of the "StringPiece" type,
|
||||
// which represents a sub-range of a real string. Like RE, StringPiece
|
||||
// is defined in the pcrecpp namespace.
|
||||
//
|
||||
// Example: read lines of the form "var = value" from a string.
|
||||
// string contents = ...; // Fill string somehow
|
||||
// pcrecpp::StringPiece input(contents); // Wrap in a StringPiece
|
||||
//
|
||||
// string var;
|
||||
// int value;
|
||||
// pcrecpp::RE re("(\\w+) = (\\d+)\n");
|
||||
// while (re.Consume(&input, &var, &value)) {
|
||||
// ...;
|
||||
// }
|
||||
//
|
||||
// Each successful call to "Consume" will set "var/value", and also
|
||||
// advance "input" so it points past the matched text.
|
||||
//
|
||||
// The "FindAndConsume" operation is similar to "Consume" but does not
|
||||
// anchor your match at the beginning of the string. For example, you
|
||||
// could extract all words from a string by repeatedly calling
|
||||
// pcrecpp::RE("(\\w+)").FindAndConsume(&input, &word)
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// PARSING HEX/OCTAL/C-RADIX NUMBERS
|
||||
//
|
||||
// By default, if you pass a pointer to a numeric value, the
|
||||
// corresponding text is interpreted as a base-10 number. You can
|
||||
// instead wrap the pointer with a call to one of the operators Hex(),
|
||||
// Octal(), or CRadix() to interpret the text in another base. The
|
||||
// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
|
||||
// prefixes, but defaults to base-10.
|
||||
//
|
||||
// Example:
|
||||
// int a, b, c, d;
|
||||
// pcrecpp::RE re("(.*) (.*) (.*) (.*)");
|
||||
// re.FullMatch("100 40 0100 0x40",
|
||||
// pcrecpp::Octal(&a), pcrecpp::Hex(&b),
|
||||
// pcrecpp::CRadix(&c), pcrecpp::CRadix(&d));
|
||||
// will leave 64 in a, b, c, and d.
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// REPLACING PARTS OF STRINGS
|
||||
//
|
||||
// You can replace the first match of "pattern" in "str" with
|
||||
// "rewrite". Within "rewrite", backslash-escaped digits (\1 to \9)
|
||||
// can be used to insert text matching corresponding parenthesized
|
||||
// group from the pattern. \0 in "rewrite" refers to the entire
|
||||
// matching text. E.g.,
|
||||
//
|
||||
// string s = "yabba dabba doo";
|
||||
// pcrecpp::RE("b+").Replace("d", &s);
|
||||
//
|
||||
// will leave "s" containing "yada dabba doo". The result is true if
|
||||
// the pattern matches and a replacement occurs, or false otherwise.
|
||||
//
|
||||
// GlobalReplace() is like Replace(), except that it replaces all
|
||||
// occurrences of the pattern in the string with the rewrite.
|
||||
// Replacements are not subject to re-matching. E.g.,
|
||||
//
|
||||
// string s = "yabba dabba doo";
|
||||
// pcrecpp::RE("b+").GlobalReplace("d", &s);
|
||||
//
|
||||
// will leave "s" containing "yada dada doo". It returns the number
|
||||
// of replacements made.
|
||||
//
|
||||
// Extract() is like Replace(), except that if the pattern matches,
|
||||
// "rewrite" is copied into "out" (an additional argument) with
|
||||
// substitutions. The non-matching portions of "text" are ignored.
|
||||
// Returns true iff a match occurred and the extraction happened
|
||||
// successfully. If no match occurs, the string is left unaffected.
|
||||
|
||||
|
||||
#include <string>
|
||||
#include <pcre.h>
|
||||
#include <pcrecpparg.h> // defines the Arg class
|
||||
// This isn't technically needed here, but we include it
|
||||
// anyway so folks who include pcrecpp.h don't have to.
|
||||
#include <pcre_stringpiece.h>
|
||||
|
||||
namespace pcrecpp {
|
||||
|
||||
#define PCRE_SET_OR_CLEAR(b, o) \
|
||||
if (b) all_options_ |= (o); else all_options_ &= ~(o); \
|
||||
return *this
|
||||
|
||||
#define PCRE_IS_SET(o) \
|
||||
(all_options_ & o) == o
|
||||
|
||||
/***** Compiling regular expressions: the RE class *****/
|
||||
|
||||
// RE_Options allow you to set options to be passed along to pcre,
|
||||
// along with other options we put on top of pcre.
|
||||
// Only 9 modifiers, plus match_limit and match_limit_recursion,
|
||||
// are supported now.
|
||||
class PCRECPP_EXP_DEFN RE_Options {
|
||||
public:
|
||||
// constructor
|
||||
RE_Options() : match_limit_(0), match_limit_recursion_(0), all_options_(0) {}
|
||||
|
||||
// alternative constructor.
|
||||
// To facilitate transfer of legacy code from C programs
|
||||
//
|
||||
// This lets you do
|
||||
// RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
|
||||
// But new code is better off doing
|
||||
// RE(pattern,
|
||||
// RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
|
||||
RE_Options(int option_flags) : match_limit_(0), match_limit_recursion_(0),
|
||||
all_options_(option_flags) {}
|
||||
// we're fine with the default destructor, copy constructor, etc.
|
||||
|
||||
// accessors and mutators
|
||||
int match_limit() const { return match_limit_; };
|
||||
RE_Options &set_match_limit(int limit) {
|
||||
match_limit_ = limit;
|
||||
return *this;
|
||||
}
|
||||
|
||||
int match_limit_recursion() const { return match_limit_recursion_; };
|
||||
RE_Options &set_match_limit_recursion(int limit) {
|
||||
match_limit_recursion_ = limit;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool caseless() const {
|
||||
return PCRE_IS_SET(PCRE_CASELESS);
|
||||
}
|
||||
RE_Options &set_caseless(bool x) {
|
||||
PCRE_SET_OR_CLEAR(x, PCRE_CASELESS);
|
||||
}
|
||||
|
||||
bool multiline() const {
|
||||
return PCRE_IS_SET(PCRE_MULTILINE);
|
||||
}
|
||||
RE_Options &set_multiline(bool x) {
|
||||
PCRE_SET_OR_CLEAR(x, PCRE_MULTILINE);
|
||||
}
|
||||
|
||||
bool dotall() const {
|
||||
return PCRE_IS_SET(PCRE_DOTALL);
|
||||
}
|
||||
RE_Options &set_dotall(bool x) {
|
||||
PCRE_SET_OR_CLEAR(x, PCRE_DOTALL);
|
||||
}
|
||||
|
||||
bool extended() const {
|
||||
return PCRE_IS_SET(PCRE_EXTENDED);
|
||||
}
|
||||
RE_Options &set_extended(bool x) {
|
||||
PCRE_SET_OR_CLEAR(x, PCRE_EXTENDED);
|
||||
}
|
||||
|
||||
bool dollar_endonly() const {
|
||||
return PCRE_IS_SET(PCRE_DOLLAR_ENDONLY);
|
||||
}
|
||||
RE_Options &set_dollar_endonly(bool x) {
|
||||
PCRE_SET_OR_CLEAR(x, PCRE_DOLLAR_ENDONLY);
|
||||
}
|
||||
|
||||
bool extra() const {
|
||||
return PCRE_IS_SET(PCRE_EXTRA);
|
||||
}
|
||||
RE_Options &set_extra(bool x) {
|
||||
PCRE_SET_OR_CLEAR(x, PCRE_EXTRA);
|
||||
}
|
||||
|
||||
bool ungreedy() const {
|
||||
return PCRE_IS_SET(PCRE_UNGREEDY);
|
||||
}
|
||||
RE_Options &set_ungreedy(bool x) {
|
||||
PCRE_SET_OR_CLEAR(x, PCRE_UNGREEDY);
|
||||
}
|
||||
|
||||
bool utf8() const {
|
||||
return PCRE_IS_SET(PCRE_UTF8);
|
||||
}
|
||||
RE_Options &set_utf8(bool x) {
|
||||
PCRE_SET_OR_CLEAR(x, PCRE_UTF8);
|
||||
}
|
||||
|
||||
bool no_auto_capture() const {
|
||||
return PCRE_IS_SET(PCRE_NO_AUTO_CAPTURE);
|
||||
}
|
||||
RE_Options &set_no_auto_capture(bool x) {
|
||||
PCRE_SET_OR_CLEAR(x, PCRE_NO_AUTO_CAPTURE);
|
||||
}
|
||||
|
||||
RE_Options &set_all_options(int opt) {
|
||||
all_options_ = opt;
|
||||
return *this;
|
||||
}
|
||||
int all_options() const {
|
||||
return all_options_ ;
|
||||
}
|
||||
|
||||
// TODO: add other pcre flags
|
||||
|
||||
private:
|
||||
int match_limit_;
|
||||
int match_limit_recursion_;
|
||||
int all_options_;
|
||||
};
|
||||
|
||||
// These functions return some common RE_Options
|
||||
static inline RE_Options UTF8() {
|
||||
return RE_Options().set_utf8(true);
|
||||
}
|
||||
|
||||
static inline RE_Options CASELESS() {
|
||||
return RE_Options().set_caseless(true);
|
||||
}
|
||||
static inline RE_Options MULTILINE() {
|
||||
return RE_Options().set_multiline(true);
|
||||
}
|
||||
|
||||
static inline RE_Options DOTALL() {
|
||||
return RE_Options().set_dotall(true);
|
||||
}
|
||||
|
||||
static inline RE_Options EXTENDED() {
|
||||
return RE_Options().set_extended(true);
|
||||
}
|
||||
|
||||
// Interface for regular expression matching. Also corresponds to a
|
||||
// pre-compiled regular expression. An "RE" object is safe for
|
||||
// concurrent use by multiple threads.
|
||||
class PCRECPP_EXP_DEFN RE {
|
||||
public:
|
||||
// We provide implicit conversions from strings so that users can
|
||||
// pass in a string or a "const char*" wherever an "RE" is expected.
|
||||
RE(const string& pat) { Init(pat, NULL); }
|
||||
RE(const string& pat, const RE_Options& option) { Init(pat, &option); }
|
||||
RE(const char* pat) { Init(pat, NULL); }
|
||||
RE(const char* pat, const RE_Options& option) { Init(pat, &option); }
|
||||
RE(const unsigned char* pat) {
|
||||
Init(reinterpret_cast<const char*>(pat), NULL);
|
||||
}
|
||||
RE(const unsigned char* pat, const RE_Options& option) {
|
||||
Init(reinterpret_cast<const char*>(pat), &option);
|
||||
}
|
||||
|
||||
// Copy constructor & assignment - note that these are expensive
|
||||
// because they recompile the expression.
|
||||
RE(const RE& re) { Init(re.pattern_, &re.options_); }
|
||||
const RE& operator=(const RE& re) {
|
||||
if (this != &re) {
|
||||
Cleanup();
|
||||
|
||||
// This is the code that originally came from Google
|
||||
// Init(re.pattern_.c_str(), &re.options_);
|
||||
|
||||
// This is the replacement from Ari Pollak
|
||||
Init(re.pattern_, &re.options_);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
~RE();
|
||||
|
||||
// The string specification for this RE. E.g.
|
||||
// RE re("ab*c?d+");
|
||||
// re.pattern(); // "ab*c?d+"
|
||||
const string& pattern() const { return pattern_; }
|
||||
|
||||
// If RE could not be created properly, returns an error string.
|
||||
// Else returns the empty string.
|
||||
const string& error() const { return *error_; }
|
||||
|
||||
/***** The useful part: the matching interface *****/
|
||||
|
||||
// This is provided so one can do pattern.ReplaceAll() just as
|
||||
// easily as ReplaceAll(pattern-text, ....)
|
||||
|
||||
bool FullMatch(const StringPiece& text,
|
||||
const Arg& ptr1 = no_arg,
|
||||
const Arg& ptr2 = no_arg,
|
||||
const Arg& ptr3 = no_arg,
|
||||
const Arg& ptr4 = no_arg,
|
||||
const Arg& ptr5 = no_arg,
|
||||
const Arg& ptr6 = no_arg,
|
||||
const Arg& ptr7 = no_arg,
|
||||
const Arg& ptr8 = no_arg,
|
||||
const Arg& ptr9 = no_arg,
|
||||
const Arg& ptr10 = no_arg,
|
||||
const Arg& ptr11 = no_arg,
|
||||
const Arg& ptr12 = no_arg,
|
||||
const Arg& ptr13 = no_arg,
|
||||
const Arg& ptr14 = no_arg,
|
||||
const Arg& ptr15 = no_arg,
|
||||
const Arg& ptr16 = no_arg) const;
|
||||
|
||||
bool PartialMatch(const StringPiece& text,
|
||||
const Arg& ptr1 = no_arg,
|
||||
const Arg& ptr2 = no_arg,
|
||||
const Arg& ptr3 = no_arg,
|
||||
const Arg& ptr4 = no_arg,
|
||||
const Arg& ptr5 = no_arg,
|
||||
const Arg& ptr6 = no_arg,
|
||||
const Arg& ptr7 = no_arg,
|
||||
const Arg& ptr8 = no_arg,
|
||||
const Arg& ptr9 = no_arg,
|
||||
const Arg& ptr10 = no_arg,
|
||||
const Arg& ptr11 = no_arg,
|
||||
const Arg& ptr12 = no_arg,
|
||||
const Arg& ptr13 = no_arg,
|
||||
const Arg& ptr14 = no_arg,
|
||||
const Arg& ptr15 = no_arg,
|
||||
const Arg& ptr16 = no_arg) const;
|
||||
|
||||
bool Consume(StringPiece* input,
|
||||
const Arg& ptr1 = no_arg,
|
||||
const Arg& ptr2 = no_arg,
|
||||
const Arg& ptr3 = no_arg,
|
||||
const Arg& ptr4 = no_arg,
|
||||
const Arg& ptr5 = no_arg,
|
||||
const Arg& ptr6 = no_arg,
|
||||
const Arg& ptr7 = no_arg,
|
||||
const Arg& ptr8 = no_arg,
|
||||
const Arg& ptr9 = no_arg,
|
||||
const Arg& ptr10 = no_arg,
|
||||
const Arg& ptr11 = no_arg,
|
||||
const Arg& ptr12 = no_arg,
|
||||
const Arg& ptr13 = no_arg,
|
||||
const Arg& ptr14 = no_arg,
|
||||
const Arg& ptr15 = no_arg,
|
||||
const Arg& ptr16 = no_arg) const;
|
||||
|
||||
bool FindAndConsume(StringPiece* input,
|
||||
const Arg& ptr1 = no_arg,
|
||||
const Arg& ptr2 = no_arg,
|
||||
const Arg& ptr3 = no_arg,
|
||||
const Arg& ptr4 = no_arg,
|
||||
const Arg& ptr5 = no_arg,
|
||||
const Arg& ptr6 = no_arg,
|
||||
const Arg& ptr7 = no_arg,
|
||||
const Arg& ptr8 = no_arg,
|
||||
const Arg& ptr9 = no_arg,
|
||||
const Arg& ptr10 = no_arg,
|
||||
const Arg& ptr11 = no_arg,
|
||||
const Arg& ptr12 = no_arg,
|
||||
const Arg& ptr13 = no_arg,
|
||||
const Arg& ptr14 = no_arg,
|
||||
const Arg& ptr15 = no_arg,
|
||||
const Arg& ptr16 = no_arg) const;
|
||||
|
||||
bool Replace(const StringPiece& rewrite,
|
||||
string *str) const;
|
||||
|
||||
int GlobalReplace(const StringPiece& rewrite,
|
||||
string *str) const;
|
||||
|
||||
bool Extract(const StringPiece &rewrite,
|
||||
const StringPiece &text,
|
||||
string *out) const;
|
||||
|
||||
// Escapes all potentially meaningful regexp characters in
|
||||
// 'unquoted'. The returned string, used as a regular expression,
|
||||
// will exactly match the original string. For example,
|
||||
// 1.5-2.0?
|
||||
// may become:
|
||||
// 1\.5\-2\.0\?
|
||||
static string QuoteMeta(const StringPiece& unquoted);
|
||||
|
||||
|
||||
/***** Generic matching interface *****/
|
||||
|
||||
// Type of match (TODO: Should be restructured as part of RE_Options)
|
||||
enum Anchor {
|
||||
UNANCHORED, // No anchoring
|
||||
ANCHOR_START, // Anchor at start only
|
||||
ANCHOR_BOTH // Anchor at start and end
|
||||
};
|
||||
|
||||
// General matching routine. Stores the length of the match in
|
||||
// "*consumed" if successful.
|
||||
bool DoMatch(const StringPiece& text,
|
||||
Anchor anchor,
|
||||
int* consumed,
|
||||
const Arg* const* args, int n) const;
|
||||
|
||||
// Return the number of capturing subpatterns, or -1 if the
|
||||
// regexp wasn't valid on construction.
|
||||
int NumberOfCapturingGroups() const;
|
||||
|
||||
// The default value for an argument, to indicate no arg was passed in
|
||||
static Arg no_arg;
|
||||
|
||||
private:
|
||||
|
||||
void Init(const string& pattern, const RE_Options* options);
|
||||
void Cleanup();
|
||||
|
||||
// Match against "text", filling in "vec" (up to "vecsize" * 2/3) with
|
||||
// pairs of integers for the beginning and end positions of matched
|
||||
// text. The first pair corresponds to the entire matched text;
|
||||
// subsequent pairs correspond, in order, to parentheses-captured
|
||||
// matches. Returns the number of pairs (one more than the number of
|
||||
// the last subpattern with a match) if matching was successful
|
||||
// and zero if the match failed.
|
||||
// I.e. for RE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching
|
||||
// against "foo", "bar", and "baz" respectively.
|
||||
// When matching RE("(foo)|hello") against "hello", it will return 1.
|
||||
// But the values for all subpattern are filled in into "vec".
|
||||
int TryMatch(const StringPiece& text,
|
||||
int startpos,
|
||||
Anchor anchor,
|
||||
int *vec,
|
||||
int vecsize) const;
|
||||
|
||||
// Append the "rewrite" string, with backslash subsitutions from "text"
|
||||
// and "vec", to string "out".
|
||||
bool Rewrite(string *out,
|
||||
const StringPiece& rewrite,
|
||||
const StringPiece& text,
|
||||
int *vec,
|
||||
int veclen) const;
|
||||
|
||||
// internal implementation for DoMatch
|
||||
bool DoMatchImpl(const StringPiece& text,
|
||||
Anchor anchor,
|
||||
int* consumed,
|
||||
const Arg* const args[],
|
||||
int n,
|
||||
int* vec,
|
||||
int vecsize) const;
|
||||
|
||||
// Compile the regexp for the specified anchoring mode
|
||||
pcre* Compile(Anchor anchor);
|
||||
|
||||
string pattern_;
|
||||
RE_Options options_;
|
||||
pcre* re_full_; // For full matches
|
||||
pcre* re_partial_; // For partial matches
|
||||
const string* error_; // Error indicator (or points to empty string)
|
||||
};
|
||||
|
||||
} // namespace pcrecpp
|
||||
|
||||
#endif /* _PCRECPP_H */
|
||||
68
Engine/lib/pcre/pcrecpp_internal.h
Normal file
68
Engine/lib/pcre/pcrecpp_internal.h
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifndef PCRECPP_INTERNAL_H
|
||||
#define PCRECPP_INTERNAL_H
|
||||
|
||||
/* When compiling a DLL for Windows, the exported symbols have to be declared
|
||||
using some MS magic. I found some useful information on this web page:
|
||||
http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
|
||||
information there, using __declspec(dllexport) without "extern" we have a
|
||||
definition; with "extern" we have a declaration. The settings here override the
|
||||
setting in pcre.h. We use:
|
||||
|
||||
PCRECPP_EXP_DECL for declarations
|
||||
PCRECPP_EXP_DEFN for definitions of exported functions
|
||||
|
||||
*/
|
||||
|
||||
#ifndef PCRECPP_EXP_DECL
|
||||
# ifdef _WIN32
|
||||
# ifndef PCRE_STATIC
|
||||
# define PCRECPP_EXP_DECL extern __declspec(dllexport)
|
||||
# define PCRECPP_EXP_DEFN __declspec(dllexport)
|
||||
# else
|
||||
# define PCRECPP_EXP_DECL extern
|
||||
# define PCRECPP_EXP_DEFN
|
||||
# endif
|
||||
# else
|
||||
# define PCRECPP_EXP_DECL extern
|
||||
# define PCRECPP_EXP_DEFN
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif /* PCRECPP_INTERNAL_H */
|
||||
|
||||
/* End of pcrecpp_internal.h */
|
||||
174
Engine/lib/pcre/pcrecpparg.h
Normal file
174
Engine/lib/pcre/pcrecpparg.h
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
// Copyright (c) 2005, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: Sanjay Ghemawat
|
||||
|
||||
#ifndef _PCRECPPARG_H
|
||||
#define _PCRECPPARG_H
|
||||
|
||||
#include <stdlib.h> // for NULL
|
||||
#include <string>
|
||||
|
||||
#include <pcre.h>
|
||||
|
||||
namespace pcrecpp {
|
||||
|
||||
class StringPiece;
|
||||
|
||||
// Hex/Octal/Binary?
|
||||
|
||||
// Special class for parsing into objects that define a ParseFrom() method
|
||||
template <class T>
|
||||
class _RE_MatchObject {
|
||||
public:
|
||||
static inline bool Parse(const char* str, int n, void* dest) {
|
||||
if (dest == NULL) return true;
|
||||
T* object = reinterpret_cast<T*>(dest);
|
||||
return object->ParseFrom(str, n);
|
||||
}
|
||||
};
|
||||
|
||||
class PCRECPP_EXP_DEFN Arg {
|
||||
public:
|
||||
// Empty constructor so we can declare arrays of Arg
|
||||
Arg();
|
||||
|
||||
// Constructor specially designed for NULL arguments
|
||||
Arg(void*);
|
||||
|
||||
typedef bool (*Parser)(const char* str, int n, void* dest);
|
||||
|
||||
// Type-specific parsers
|
||||
#define PCRE_MAKE_PARSER(type,name) \
|
||||
Arg(type* p) : arg_(p), parser_(name) { } \
|
||||
Arg(type* p, Parser parser) : arg_(p), parser_(parser) { }
|
||||
|
||||
|
||||
PCRE_MAKE_PARSER(char, parse_char);
|
||||
PCRE_MAKE_PARSER(unsigned char, parse_uchar);
|
||||
PCRE_MAKE_PARSER(short, parse_short);
|
||||
PCRE_MAKE_PARSER(unsigned short, parse_ushort);
|
||||
PCRE_MAKE_PARSER(int, parse_int);
|
||||
PCRE_MAKE_PARSER(unsigned int, parse_uint);
|
||||
PCRE_MAKE_PARSER(long, parse_long);
|
||||
PCRE_MAKE_PARSER(unsigned long, parse_ulong);
|
||||
#if 1
|
||||
PCRE_MAKE_PARSER(long long, parse_longlong);
|
||||
#endif
|
||||
#if 1
|
||||
PCRE_MAKE_PARSER(unsigned long long, parse_ulonglong);
|
||||
#endif
|
||||
PCRE_MAKE_PARSER(float, parse_float);
|
||||
PCRE_MAKE_PARSER(double, parse_double);
|
||||
PCRE_MAKE_PARSER(std::string, parse_string);
|
||||
PCRE_MAKE_PARSER(StringPiece, parse_stringpiece);
|
||||
|
||||
#undef PCRE_MAKE_PARSER
|
||||
|
||||
// Generic constructor
|
||||
template <class T> Arg(T*, Parser parser);
|
||||
// Generic constructor template
|
||||
template <class T> Arg(T* p)
|
||||
: arg_(p), parser_(_RE_MatchObject<T>::Parse) {
|
||||
}
|
||||
|
||||
// Parse the data
|
||||
bool Parse(const char* str, int n) const;
|
||||
|
||||
private:
|
||||
void* arg_;
|
||||
Parser parser_;
|
||||
|
||||
static bool parse_null (const char* str, int n, void* dest);
|
||||
static bool parse_char (const char* str, int n, void* dest);
|
||||
static bool parse_uchar (const char* str, int n, void* dest);
|
||||
static bool parse_float (const char* str, int n, void* dest);
|
||||
static bool parse_double (const char* str, int n, void* dest);
|
||||
static bool parse_string (const char* str, int n, void* dest);
|
||||
static bool parse_stringpiece (const char* str, int n, void* dest);
|
||||
|
||||
#define PCRE_DECLARE_INTEGER_PARSER(name) \
|
||||
private: \
|
||||
static bool parse_ ## name(const char* str, int n, void* dest); \
|
||||
static bool parse_ ## name ## _radix( \
|
||||
const char* str, int n, void* dest, int radix); \
|
||||
public: \
|
||||
static bool parse_ ## name ## _hex(const char* str, int n, void* dest); \
|
||||
static bool parse_ ## name ## _octal(const char* str, int n, void* dest); \
|
||||
static bool parse_ ## name ## _cradix(const char* str, int n, void* dest)
|
||||
|
||||
PCRE_DECLARE_INTEGER_PARSER(short);
|
||||
PCRE_DECLARE_INTEGER_PARSER(ushort);
|
||||
PCRE_DECLARE_INTEGER_PARSER(int);
|
||||
PCRE_DECLARE_INTEGER_PARSER(uint);
|
||||
PCRE_DECLARE_INTEGER_PARSER(long);
|
||||
PCRE_DECLARE_INTEGER_PARSER(ulong);
|
||||
PCRE_DECLARE_INTEGER_PARSER(longlong);
|
||||
PCRE_DECLARE_INTEGER_PARSER(ulonglong);
|
||||
|
||||
#undef PCRE_DECLARE_INTEGER_PARSER
|
||||
};
|
||||
|
||||
inline Arg::Arg() : arg_(NULL), parser_(parse_null) { }
|
||||
inline Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }
|
||||
|
||||
inline bool Arg::Parse(const char* str, int n) const {
|
||||
return (*parser_)(str, n, arg_);
|
||||
}
|
||||
|
||||
// This part of the parser, appropriate only for ints, deals with bases
|
||||
#define MAKE_INTEGER_PARSER(type, name) \
|
||||
inline Arg Hex(type* ptr) { \
|
||||
return Arg(ptr, Arg::parse_ ## name ## _hex); } \
|
||||
inline Arg Octal(type* ptr) { \
|
||||
return Arg(ptr, Arg::parse_ ## name ## _octal); } \
|
||||
inline Arg CRadix(type* ptr) { \
|
||||
return Arg(ptr, Arg::parse_ ## name ## _cradix); }
|
||||
|
||||
MAKE_INTEGER_PARSER(short, short) /* */
|
||||
MAKE_INTEGER_PARSER(unsigned short, ushort) /* */
|
||||
MAKE_INTEGER_PARSER(int, int) /* Don't use semicolons */
|
||||
MAKE_INTEGER_PARSER(unsigned int, uint) /* after these statement */
|
||||
MAKE_INTEGER_PARSER(long, long) /* because they can cause */
|
||||
MAKE_INTEGER_PARSER(unsigned long, ulong) /* compiler warnings if */
|
||||
#if 1 /* the checking level is */
|
||||
MAKE_INTEGER_PARSER(long long, longlong) /* turned up high enough. */
|
||||
#endif /* */
|
||||
#if 1 /* */
|
||||
MAKE_INTEGER_PARSER(unsigned long long, ulonglong) /* */
|
||||
#endif
|
||||
|
||||
#undef PCRE_IS_SET
|
||||
#undef PCRE_SET_OR_CLEAR
|
||||
#undef MAKE_INTEGER_PARSER
|
||||
|
||||
} // namespace pcrecpp
|
||||
|
||||
|
||||
#endif /* _PCRECPPARG_H */
|
||||
142
Engine/lib/pcre/pcreposix.h
Normal file
142
Engine/lib/pcre/pcreposix.h
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
#ifndef _PCREPOSIX_H
|
||||
#define _PCREPOSIX_H
|
||||
|
||||
/* This is the header for the POSIX wrapper interface to the PCRE Perl-
|
||||
Compatible Regular Expression library. It defines the things POSIX says should
|
||||
be there. I hope.
|
||||
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* Have to include stdlib.h in order to ensure that size_t is defined. */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Allow for C++ users */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Options, mostly defined by POSIX, but with a couple of extras. */
|
||||
|
||||
#define REG_ICASE 0x0001
|
||||
#define REG_NEWLINE 0x0002
|
||||
#define REG_NOTBOL 0x0004
|
||||
#define REG_NOTEOL 0x0008
|
||||
#define REG_DOTALL 0x0010 /* NOT defined by POSIX. */
|
||||
#define REG_NOSUB 0x0020
|
||||
#define REG_UTF8 0x0040 /* NOT defined by POSIX. */
|
||||
|
||||
/* This is not used by PCRE, but by defining it we make it easier
|
||||
to slot PCRE into existing programs that make POSIX calls. */
|
||||
|
||||
#define REG_EXTENDED 0
|
||||
|
||||
/* Error values. Not all these are relevant or used by the wrapper. */
|
||||
|
||||
enum {
|
||||
REG_ASSERT = 1, /* internal error ? */
|
||||
REG_BADBR, /* invalid repeat counts in {} */
|
||||
REG_BADPAT, /* pattern error */
|
||||
REG_BADRPT, /* ? * + invalid */
|
||||
REG_EBRACE, /* unbalanced {} */
|
||||
REG_EBRACK, /* unbalanced [] */
|
||||
REG_ECOLLATE, /* collation error - not relevant */
|
||||
REG_ECTYPE, /* bad class */
|
||||
REG_EESCAPE, /* bad escape sequence */
|
||||
REG_EMPTY, /* empty expression */
|
||||
REG_EPAREN, /* unbalanced () */
|
||||
REG_ERANGE, /* bad range inside [] */
|
||||
REG_ESIZE, /* expression too big */
|
||||
REG_ESPACE, /* failed to get memory */
|
||||
REG_ESUBREG, /* bad back reference */
|
||||
REG_INVARG, /* bad argument */
|
||||
REG_NOMATCH /* match failed */
|
||||
};
|
||||
|
||||
|
||||
/* The structure representing a compiled regular expression. */
|
||||
|
||||
typedef struct {
|
||||
void *re_pcre;
|
||||
size_t re_nsub;
|
||||
size_t re_erroffset;
|
||||
} regex_t;
|
||||
|
||||
/* The structure in which a captured offset is returned. */
|
||||
|
||||
typedef int regoff_t;
|
||||
|
||||
typedef struct {
|
||||
regoff_t rm_so;
|
||||
regoff_t rm_eo;
|
||||
} regmatch_t;
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
export settings are needed, and are set in pcreposix.c before including this
|
||||
file. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE_STATIC) && !defined(PCREPOSIX_EXP_DECL)
|
||||
# define PCREPOSIX_EXP_DECL extern __declspec(dllimport)
|
||||
# define PCREPOSIX_EXP_DEFN __declspec(dllimport)
|
||||
#endif
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCREPOSIX_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCREPOSIX_EXP_DECL extern "C"
|
||||
# define PCREPOSIX_EXP_DEFN extern "C"
|
||||
# else
|
||||
# define PCREPOSIX_EXP_DECL extern
|
||||
# define PCREPOSIX_EXP_DEFN extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* The functions */
|
||||
|
||||
PCREPOSIX_EXP_DECL int regcomp(regex_t *, const char *, int);
|
||||
PCREPOSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t,
|
||||
regmatch_t *, int);
|
||||
PCREPOSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t);
|
||||
PCREPOSIX_EXP_DECL void regfree(regex_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* End of pcreposix.h */
|
||||
133
Engine/lib/pcre/ucp.h
Normal file
133
Engine/lib/pcre/ucp.h
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
/*************************************************
|
||||
* Unicode Property Table handler *
|
||||
*************************************************/
|
||||
|
||||
#ifndef _UCP_H
|
||||
#define _UCP_H
|
||||
|
||||
/* This file contains definitions of the property values that are returned by
|
||||
the function _pcre_ucp_findprop(). New values that are added for new releases
|
||||
of Unicode should always be at the end of each enum, for backwards
|
||||
compatibility. */
|
||||
|
||||
/* These are the general character categories. */
|
||||
|
||||
enum {
|
||||
ucp_C, /* Other */
|
||||
ucp_L, /* Letter */
|
||||
ucp_M, /* Mark */
|
||||
ucp_N, /* Number */
|
||||
ucp_P, /* Punctuation */
|
||||
ucp_S, /* Symbol */
|
||||
ucp_Z /* Separator */
|
||||
};
|
||||
|
||||
/* These are the particular character types. */
|
||||
|
||||
enum {
|
||||
ucp_Cc, /* Control */
|
||||
ucp_Cf, /* Format */
|
||||
ucp_Cn, /* Unassigned */
|
||||
ucp_Co, /* Private use */
|
||||
ucp_Cs, /* Surrogate */
|
||||
ucp_Ll, /* Lower case letter */
|
||||
ucp_Lm, /* Modifier letter */
|
||||
ucp_Lo, /* Other letter */
|
||||
ucp_Lt, /* Title case letter */
|
||||
ucp_Lu, /* Upper case letter */
|
||||
ucp_Mc, /* Spacing mark */
|
||||
ucp_Me, /* Enclosing mark */
|
||||
ucp_Mn, /* Non-spacing mark */
|
||||
ucp_Nd, /* Decimal number */
|
||||
ucp_Nl, /* Letter number */
|
||||
ucp_No, /* Other number */
|
||||
ucp_Pc, /* Connector punctuation */
|
||||
ucp_Pd, /* Dash punctuation */
|
||||
ucp_Pe, /* Close punctuation */
|
||||
ucp_Pf, /* Final punctuation */
|
||||
ucp_Pi, /* Initial punctuation */
|
||||
ucp_Po, /* Other punctuation */
|
||||
ucp_Ps, /* Open punctuation */
|
||||
ucp_Sc, /* Currency symbol */
|
||||
ucp_Sk, /* Modifier symbol */
|
||||
ucp_Sm, /* Mathematical symbol */
|
||||
ucp_So, /* Other symbol */
|
||||
ucp_Zl, /* Line separator */
|
||||
ucp_Zp, /* Paragraph separator */
|
||||
ucp_Zs /* Space separator */
|
||||
};
|
||||
|
||||
/* These are the script identifications. */
|
||||
|
||||
enum {
|
||||
ucp_Arabic,
|
||||
ucp_Armenian,
|
||||
ucp_Bengali,
|
||||
ucp_Bopomofo,
|
||||
ucp_Braille,
|
||||
ucp_Buginese,
|
||||
ucp_Buhid,
|
||||
ucp_Canadian_Aboriginal,
|
||||
ucp_Cherokee,
|
||||
ucp_Common,
|
||||
ucp_Coptic,
|
||||
ucp_Cypriot,
|
||||
ucp_Cyrillic,
|
||||
ucp_Deseret,
|
||||
ucp_Devanagari,
|
||||
ucp_Ethiopic,
|
||||
ucp_Georgian,
|
||||
ucp_Glagolitic,
|
||||
ucp_Gothic,
|
||||
ucp_Greek,
|
||||
ucp_Gujarati,
|
||||
ucp_Gurmukhi,
|
||||
ucp_Han,
|
||||
ucp_Hangul,
|
||||
ucp_Hanunoo,
|
||||
ucp_Hebrew,
|
||||
ucp_Hiragana,
|
||||
ucp_Inherited,
|
||||
ucp_Kannada,
|
||||
ucp_Katakana,
|
||||
ucp_Kharoshthi,
|
||||
ucp_Khmer,
|
||||
ucp_Lao,
|
||||
ucp_Latin,
|
||||
ucp_Limbu,
|
||||
ucp_Linear_B,
|
||||
ucp_Malayalam,
|
||||
ucp_Mongolian,
|
||||
ucp_Myanmar,
|
||||
ucp_New_Tai_Lue,
|
||||
ucp_Ogham,
|
||||
ucp_Old_Italic,
|
||||
ucp_Old_Persian,
|
||||
ucp_Oriya,
|
||||
ucp_Osmanya,
|
||||
ucp_Runic,
|
||||
ucp_Shavian,
|
||||
ucp_Sinhala,
|
||||
ucp_Syloti_Nagri,
|
||||
ucp_Syriac,
|
||||
ucp_Tagalog,
|
||||
ucp_Tagbanwa,
|
||||
ucp_Tai_Le,
|
||||
ucp_Tamil,
|
||||
ucp_Telugu,
|
||||
ucp_Thaana,
|
||||
ucp_Thai,
|
||||
ucp_Tibetan,
|
||||
ucp_Tifinagh,
|
||||
ucp_Ugaritic,
|
||||
ucp_Yi,
|
||||
ucp_Balinese, /* New for Unicode 5.0.0 */
|
||||
ucp_Cuneiform, /* New for Unicode 5.0.0 */
|
||||
ucp_Nko, /* New for Unicode 5.0.0 */
|
||||
ucp_Phags_Pa, /* New for Unicode 5.0.0 */
|
||||
ucp_Phoenician /* New for Unicode 5.0.0 */
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/* End of ucp.h */
|
||||
92
Engine/lib/pcre/ucpinternal.h
Normal file
92
Engine/lib/pcre/ucpinternal.h
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
/*************************************************
|
||||
* Unicode Property Table handler *
|
||||
*************************************************/
|
||||
|
||||
#ifndef _UCPINTERNAL_H
|
||||
#define _UCPINTERNAL_H
|
||||
|
||||
/* Internal header file defining the layout of the bits in each pair of 32-bit
|
||||
words that form a data item in the table. */
|
||||
|
||||
typedef struct cnode {
|
||||
pcre_uint32 f0;
|
||||
pcre_uint32 f1;
|
||||
} cnode;
|
||||
|
||||
/* Things for the f0 field */
|
||||
|
||||
#define f0_scriptmask 0xff000000 /* Mask for script field */
|
||||
#define f0_scriptshift 24 /* Shift for script value */
|
||||
#define f0_rangeflag 0x00f00000 /* Flag for a range item */
|
||||
#define f0_charmask 0x001fffff /* Mask for code point value */
|
||||
|
||||
/* Things for the f1 field */
|
||||
|
||||
#define f1_typemask 0xfc000000 /* Mask for char type field */
|
||||
#define f1_typeshift 26 /* Shift for the type field */
|
||||
#define f1_rangemask 0x0000ffff /* Mask for a range offset */
|
||||
#define f1_casemask 0x0000ffff /* Mask for a case offset */
|
||||
#define f1_caseneg 0xffff8000 /* Bits for negation */
|
||||
|
||||
/* The data consists of a vector of structures of type cnode. The two unsigned
|
||||
32-bit integers are used as follows:
|
||||
|
||||
(f0) (1) The most significant byte holds the script number. The numbers are
|
||||
defined by the enum in ucp.h.
|
||||
|
||||
(2) The 0x00800000 bit is set if this entry defines a range of characters.
|
||||
It is not set if this entry defines a single character
|
||||
|
||||
(3) The 0x00600000 bits are spare.
|
||||
|
||||
(4) The 0x001fffff bits contain the code point. No Unicode code point will
|
||||
ever be greater than 0x0010ffff, so this should be OK for ever.
|
||||
|
||||
(f1) (1) The 0xfc000000 bits contain the character type number. The numbers are
|
||||
defined by an enum in ucp.h.
|
||||
|
||||
(2) The 0x03ff0000 bits are spare.
|
||||
|
||||
(3) The 0x0000ffff bits contain EITHER the unsigned offset to the top of
|
||||
range if this entry defines a range, OR the *signed* offset to the
|
||||
character's "other case" partner if this entry defines a single
|
||||
character. There is no partner if the value is zero.
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
| script (8) |.|.|.| codepoint (21) || type (6) |.|.| spare (8) | offset (16) |
|
||||
-------------------------------------------------------------------------------
|
||||
| | | | |
|
||||
| | |-> spare | |-> spare
|
||||
| | |
|
||||
| |-> spare |-> spare
|
||||
|
|
||||
|-> range flag
|
||||
|
||||
The upper/lower casing information is set only for characters that come in
|
||||
pairs. The non-one-to-one mappings in the Unicode data are ignored.
|
||||
|
||||
When searching the data, proceed as follows:
|
||||
|
||||
(1) Set up for a binary chop search.
|
||||
|
||||
(2) If the top is not greater than the bottom, the character is not in the
|
||||
table. Its type must therefore be "Cn" ("Undefined").
|
||||
|
||||
(3) Find the middle vector element.
|
||||
|
||||
(4) Extract the code point and compare. If equal, we are done.
|
||||
|
||||
(5) If the test character is smaller, set the top to the current point, and
|
||||
goto (2).
|
||||
|
||||
(6) If the current entry defines a range, compute the last character by adding
|
||||
the offset, and see if the test character is within the range. If it is,
|
||||
we are done.
|
||||
|
||||
(7) Otherwise, set the bottom to one element past the current point and goto
|
||||
(2).
|
||||
*/
|
||||
|
||||
#endif /* _UCPINTERNAL_H */
|
||||
|
||||
/* End of ucpinternal.h */
|
||||
3088
Engine/lib/pcre/ucptable.h
Normal file
3088
Engine/lib/pcre/ucptable.h
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue