123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231 |
- /*
- tre-internal.h - TRE internal definitions
- Copyright (c) 2001-2009 Ville Laurikari <[email protected]>
- All rights reserved.
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
- 1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- 2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- #include <regex.h>
- #include <wchar.h>
- #include <wctype.h>
- #undef TRE_MBSTATE
- #define NDEBUG
- #define TRE_REGEX_T_FIELD __opaque
- typedef int reg_errcode_t;
- typedef wchar_t tre_char_t;
- #define DPRINT(msg) do { } while(0)
- #define elementsof(x) ( sizeof(x) / sizeof(x[0]) )
- #define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n)))
- /* Wide characters. */
- typedef wint_t tre_cint_t;
- #define TRE_CHAR_MAX 0x10ffff
- #define tre_isalnum iswalnum
- #define tre_isalpha iswalpha
- #define tre_isblank iswblank
- #define tre_iscntrl iswcntrl
- #define tre_isdigit iswdigit
- #define tre_isgraph iswgraph
- #define tre_islower iswlower
- #define tre_isprint iswprint
- #define tre_ispunct iswpunct
- #define tre_isspace iswspace
- #define tre_isupper iswupper
- #define tre_isxdigit iswxdigit
- #define tre_tolower towlower
- #define tre_toupper towupper
- #define tre_strlen wcslen
- /* Use system provided iswctype() and wctype(). */
- typedef wctype_t tre_ctype_t;
- #define tre_isctype iswctype
- #define tre_ctype wctype
- /* Returns number of bytes to add to (char *)ptr to make it
- properly aligned for the type. */
- #define ALIGN(ptr, type) \
- ((((long)ptr) % sizeof(type)) \
- ? (sizeof(type) - (((long)ptr) % sizeof(type))) \
- : 0)
- #undef MAX
- #undef MIN
- #define MAX(a, b) (((a) >= (b)) ? (a) : (b))
- #define MIN(a, b) (((a) <= (b)) ? (a) : (b))
- /* TNFA transition type. A TNFA state is an array of transitions,
- the terminator is a transition with NULL `state'. */
- typedef struct tnfa_transition tre_tnfa_transition_t;
- struct tnfa_transition {
- /* Range of accepted characters. */
- tre_cint_t code_min;
- tre_cint_t code_max;
- /* Pointer to the destination state. */
- tre_tnfa_transition_t *state;
- /* ID number of the destination state. */
- int state_id;
- /* -1 terminated array of tags (or NULL). */
- int *tags;
- /* Assertion bitmap. */
- int assertions;
- /* Assertion parameters. */
- union {
- /* Character class assertion. */
- tre_ctype_t class;
- /* Back reference assertion. */
- int backref;
- } u;
- /* Negative character class assertions. */
- tre_ctype_t *neg_classes;
- };
- /* Assertions. */
- #define ASSERT_AT_BOL 1 /* Beginning of line. */
- #define ASSERT_AT_EOL 2 /* End of line. */
- #define ASSERT_CHAR_CLASS 4 /* Character class in `class'. */
- #define ASSERT_CHAR_CLASS_NEG 8 /* Character classes in `neg_classes'. */
- #define ASSERT_AT_BOW 16 /* Beginning of word. */
- #define ASSERT_AT_EOW 32 /* End of word. */
- #define ASSERT_AT_WB 64 /* Word boundary. */
- #define ASSERT_AT_WB_NEG 128 /* Not a word boundary. */
- #define ASSERT_BACKREF 256 /* A back reference in `backref'. */
- #define ASSERT_LAST 256
- /* Tag directions. */
- typedef enum {
- TRE_TAG_MINIMIZE = 0,
- TRE_TAG_MAXIMIZE = 1
- } tre_tag_direction_t;
- /* Instructions to compute submatch register values from tag values
- after a successful match. */
- struct tre_submatch_data {
- /* Tag that gives the value for rm_so (submatch start offset). */
- int so_tag;
- /* Tag that gives the value for rm_eo (submatch end offset). */
- int eo_tag;
- /* List of submatches this submatch is contained in. */
- int *parents;
- };
- typedef struct tre_submatch_data tre_submatch_data_t;
- /* TNFA definition. */
- typedef struct tnfa tre_tnfa_t;
- struct tnfa {
- tre_tnfa_transition_t *transitions;
- unsigned int num_transitions;
- tre_tnfa_transition_t *initial;
- tre_tnfa_transition_t *final;
- tre_submatch_data_t *submatch_data;
- char *firstpos_chars;
- int first_char;
- unsigned int num_submatches;
- tre_tag_direction_t *tag_directions;
- int *minimal_tags;
- int num_tags;
- int num_minimals;
- int end_tag;
- int num_states;
- int cflags;
- int have_backrefs;
- int have_approx;
- };
- /* from tre-mem.h: */
- #define TRE_MEM_BLOCK_SIZE 1024
- typedef struct tre_list {
- void *data;
- struct tre_list *next;
- } tre_list_t;
- typedef struct tre_mem_struct {
- tre_list_t *blocks;
- tre_list_t *current;
- char *ptr;
- size_t n;
- int failed;
- void **provided;
- } *tre_mem_t;
- #define tre_mem_new_impl __tre_mem_new_impl
- #define tre_mem_alloc_impl __tre_mem_alloc_impl
- #define tre_mem_destroy __tre_mem_destroy
- hidden tre_mem_t tre_mem_new_impl(int provided, void *provided_block);
- hidden void *tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block,
- int zero, size_t size);
- /* Returns a new memory allocator or NULL if out of memory. */
- #define tre_mem_new() tre_mem_new_impl(0, NULL)
- /* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
- allocated block or NULL if an underlying malloc() failed. */
- #define tre_mem_alloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 0, size)
- /* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
- allocated block or NULL if an underlying malloc() failed. The memory
- is set to zero. */
- #define tre_mem_calloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 1, size)
- #ifdef TRE_USE_ALLOCA
- /* alloca() versions. Like above, but memory is allocated with alloca()
- instead of malloc(). */
- #define tre_mem_newa() \
- tre_mem_new_impl(1, alloca(sizeof(struct tre_mem_struct)))
- #define tre_mem_alloca(mem, size) \
- ((mem)->n >= (size) \
- ? tre_mem_alloc_impl((mem), 1, NULL, 0, (size)) \
- : tre_mem_alloc_impl((mem), 1, alloca(TRE_MEM_BLOCK_SIZE), 0, (size)))
- #endif /* TRE_USE_ALLOCA */
- /* Frees the memory allocator and all memory allocated with it. */
- hidden void tre_mem_destroy(tre_mem_t mem);
- #define xmalloc malloc
- #define xcalloc calloc
- #define xfree free
- #define xrealloc realloc
|