Changeset 488
- Timestamp:
- 12/27/08 21:49:48 (3 years ago)
- Location:
- microregex/trunk
- Files:
-
- 4 edited
-
ChangeLog (modified) (1 diff)
-
microregex_internal.h (modified) (1 diff)
-
microregex_matcher.c (modified) (2 diffs)
-
microregex_parser.c (modified) (15 diffs)
Legend:
- Unmodified
- Added
- Removed
-
microregex/trunk/ChangeLog
r485 r488 1 1 12/27/2008: 2 2 * Fixed extreme memory usage during run. 3 * Support \c, \l, \u, \L, \U, \E, \Q, \B, \A, \Z and \z escapes. 3 4 4 5 12/20/2008: -
microregex/trunk/microregex_internal.h
r437 r488 70 70 char *err_string; 71 71 int case_insensitive; 72 int parser_flags; 72 73 microregex_nfa_t nfa; 73 74 microregex_nfa_t nlist; -
microregex/trunk/microregex_matcher.c
r485 r488 503 503 const char *str, const char *c) 504 504 { 505 int left = 0, right = 0 ;505 int left = 0, right = 0, res; 506 506 (void)st_lst; 507 507 … … 528 528 } 529 529 530 if (cur->state->next1 && (left && right)) 530 if (cur->state->invert) 531 res = !(left && right); 532 else 533 res = (left && right); 534 535 if (cur->state->next1 && res) 531 536 { 532 537 microregex_state_t newst = microregex_state_clone(cur); -
microregex/trunk/microregex_parser.c
r484 r488 33 33 #include "microregex_internal.h" 34 34 35 #define LOWERCASE_NEXT_LETTER (1 << 0) 36 #define LOWERCASE_UNTIL_END (1 << 1) 37 #define UPPERCASE_NEXT_LETTER (1 << 2) 38 #define UPPERCASE_UNTIL_END (1 << 3) 39 #define DISABLE_METACHARACTERS (1 << 4) 40 #define SET_REGEX_PARSER_FLAG(re, fl) re->parser_flags |= fl 41 #define CLEAR_REGEX_PARSER_FLAG(re, fl) re->parser_flags &= ~fl 42 #define IS_REGEX_PARSER_FLAG_SET(re, fl) (re->parser_flags & fl) 43 35 44 static microregex_nfa_t microregex_parse_regex(microregex_t, const char *, char **, int, microregex_nfa_t *); 36 45 … … 270 279 271 280 static const char * 272 microregex_nfa_charliteral_process(const char *regex, char *buf)281 microregex_nfa_charliteral_process(const microregex_t rgx_obj, const char *regex, char *buf) 273 282 { 274 283 int i, j; … … 276 285 memset(buf, 0, 256); 277 286 287 if (IS_REGEX_PARSER_FLAG_SET(rgx_obj, DISABLE_METACHARACTERS)) 288 { 289 goto make_literal; 290 } 291 278 292 if (*regex == '.') 279 293 { … … 288 302 switch(*regex) 289 303 { 304 case 'c': 305 { 306 char esc_type = tolower(*(regex + 2)); 307 if (esc_type >= 'a' && esc_type <= 'z') 308 { 309 buf[0] = 1 + (esc_type - 'a'); 310 } 311 else if (esc_type == '[') 312 { 313 buf[0] = '\x1b'; 314 } 315 else if (esc_type == '\\') 316 { 317 buf[0] = '\x1c'; 318 } 319 else if (esc_type == ']') 320 { 321 buf[0] = '\x1d'; 322 } 323 else if (esc_type == '^') 324 { 325 buf[0] = '\x1e'; 326 } 327 else if (esc_type == '_') 328 { 329 buf[0] = '\x1f'; 330 } 331 332 if (*buf) regex++; 333 else buf[0] = 'c'; 334 break; 335 } 290 336 case 't': 291 337 { … … 410 456 else 411 457 { 412 buf[0] = *regex; 458 make_literal: 459 if (IS_REGEX_PARSER_FLAG_SET(rgx_obj, LOWERCASE_NEXT_LETTER)) 460 { 461 buf[0] = tolower(*regex); 462 CLEAR_REGEX_PARSER_FLAG(rgx_obj, LOWERCASE_NEXT_LETTER); 463 } 464 else if (IS_REGEX_PARSER_FLAG_SET(rgx_obj, UPPERCASE_NEXT_LETTER)) 465 { 466 buf[0] = toupper(*regex); 467 CLEAR_REGEX_PARSER_FLAG(rgx_obj, UPPERCASE_NEXT_LETTER); 468 } 469 else if (IS_REGEX_PARSER_FLAG_SET(rgx_obj, LOWERCASE_UNTIL_END)) 470 { 471 buf[0] = tolower(*regex); 472 } 473 else if (IS_REGEX_PARSER_FLAG_SET(rgx_obj, UPPERCASE_UNTIL_END)) 474 { 475 buf[0] = toupper(*regex); 476 } 477 else 478 { 479 buf[0] = *regex; 480 } 413 481 } 414 482 … … 417 485 418 486 static microregex_nfa_t 419 microregex_nfa_charliteral(const char *regex, const char **end_location, microregex_nfa_t *lst)487 microregex_nfa_charliteral(const microregex_t rgx_obj, const char *regex, const char **end_location, microregex_nfa_t *lst) 420 488 { 421 489 microregex_nfa_t ret = (microregex_nfa_t)malloc(sizeof(struct microregex_nfa_t)); … … 435 503 ret->character_class[0] = 0; 436 504 505 try_another: 506 if (!*regex) 507 { 508 free(ret->character_class); 509 free(ret); 510 return NULL; 511 } 512 513 if (IS_REGEX_PARSER_FLAG_SET(rgx_obj, DISABLE_METACHARACTERS)) 514 { 515 goto literal_chars; 516 } 517 437 518 if (*regex == '[') 438 519 { … … 450 531 char from = tmp_buf[0]; 451 532 char b[2]; 452 tmp = microregex_nfa_charliteral_process( tmp + 1, tmp_buf);533 tmp = microregex_nfa_charliteral_process(rgx_obj, tmp + 1, tmp_buf); 453 534 if (strlen(tmp_buf) == 1 && tmp_buf[0] > from) 454 535 { … … 491 572 } 492 573 493 tmp = microregex_nfa_charliteral_process( tmp, tmp_buf);574 tmp = microregex_nfa_charliteral_process(rgx_obj, tmp, tmp_buf); 494 575 if (!*tmp) 495 576 { … … 517 598 else 518 599 { 519 if (*regex == '\\' && *(regex+1) == 'b') 600 literal_chars: 601 if (IS_REGEX_PARSER_FLAG_SET(rgx_obj, DISABLE_METACHARACTERS) && *regex == '\\' && *(regex+1) == 'E') 602 { 603 rgx_obj->parser_flags = 0; 604 regex += 2; 605 goto try_another; 606 } 607 else if (IS_REGEX_PARSER_FLAG_SET(rgx_obj, DISABLE_METACHARACTERS)) 608 { 609 goto default_literal_chars; 610 } 611 612 if (*regex == '\\' && *(regex+1) == 'l') 613 { 614 SET_REGEX_PARSER_FLAG(rgx_obj, LOWERCASE_NEXT_LETTER); 615 CLEAR_REGEX_PARSER_FLAG(rgx_obj, UPPERCASE_NEXT_LETTER); 616 regex += 2; 617 goto try_another; 618 } 619 else if (*regex == '\\' && *(regex+1) == 'u') 620 { 621 SET_REGEX_PARSER_FLAG(rgx_obj, UPPERCASE_NEXT_LETTER); 622 CLEAR_REGEX_PARSER_FLAG(rgx_obj, LOWERCASE_NEXT_LETTER); 623 regex += 2; 624 goto try_another; 625 } 626 else if (*regex == '\\' && *(regex+1) == 'L') 627 { 628 SET_REGEX_PARSER_FLAG(rgx_obj, LOWERCASE_UNTIL_END); 629 CLEAR_REGEX_PARSER_FLAG(rgx_obj, UPPERCASE_UNTIL_END); 630 regex += 2; 631 goto try_another; 632 } 633 else if (*regex == '\\' && *(regex+1) == 'U') 634 { 635 SET_REGEX_PARSER_FLAG(rgx_obj, UPPERCASE_UNTIL_END); 636 CLEAR_REGEX_PARSER_FLAG(rgx_obj, LOWERCASE_UNTIL_END); 637 regex += 2; 638 goto try_another; 639 } 640 else if (*regex == '\\' && *(regex+1) == 'Q') 641 { 642 rgx_obj->parser_flags = 0; 643 SET_REGEX_PARSER_FLAG(rgx_obj, DISABLE_METACHARACTERS); 644 regex += 2; 645 goto try_another; 646 } 647 else if (*regex == '\\' && *(regex+1) == 'E') 648 { 649 rgx_obj->parser_flags = 0; 650 regex += 2; 651 goto try_another; 652 } 653 else if (*regex == '\\' && *(regex+1) == 'b') 520 654 { 521 655 /* word boundary */ 522 656 regex += 2; 523 657 ret->state_type = WORD_BOUNDARY; 658 } 659 else if (*regex == '\\' && *(regex+1) == 'B') 660 { 661 /* word boundary */ 662 regex += 2; 663 ret->state_type = WORD_BOUNDARY; 664 ret->invert = 1; 665 } 666 else if (*regex == '\\' && *(regex+1) == 'A') 667 { 668 /* Beginning of string */ 669 regex += 2; 670 ret->state_type = ASSERT_BEGINNING; 671 } 672 else if (*regex == '\\' && *(regex+1) == 'Z') 673 { 674 /* End of string */ 675 regex += 2; 676 ret->state_type = ASSERT_END; 677 } 678 else if (*regex == '\\' && *(regex+1) == 'z') 679 { 680 /* End of string */ 681 regex += 2; 682 ret->state_type = ASSERT_END; 524 683 } 525 684 else if (*regex == '\\' && *(regex+1) >= '1' && *(regex+1) <= '9') … … 538 697 else 539 698 { 540 regex = microregex_nfa_charliteral_process(regex, tmp_buf); 699 default_literal_chars: 700 regex = microregex_nfa_charliteral_process(rgx_obj, regex, tmp_buf); 541 701 ret->character_class = 542 702 realloc( … … 567 727 ret->err_string = NULL; 568 728 ret->nlist = NULL; 729 ret->parser_flags = 0; 569 730 ret->nfa = microregex_parse_regex(ret, regex, NULL, 0, &ret->nlist); 570 731 microregex_nfa_append(&ret->nfa, microregex_nfa_end(&ret->nlist)); … … 631 792 microregex_nfa_append(&nfa, prev); 632 793 prev = NULL; 633 } 634 794 } 795 else if (prev && IS_REGEX_PARSER_FLAG_SET(regex_obj, DISABLE_METACHARACTERS)) 796 { 797 microregex_nfa_append(&nfa, prev); 798 prev = NULL; 799 } 800 801 if (IS_REGEX_PARSER_FLAG_SET(regex_obj, DISABLE_METACHARACTERS)) 802 { 803 goto default_character; 804 } 805 635 806 switch(*regex) 636 807 { … … 816 987 /* Character class/literal */ 817 988 prev = microregex_nfa_charliteral( 989 regex_obj, 818 990 regex, 819 991 (const char **)(&end_location), … … 829 1001 if (prev) microregex_nfa_append(&nfa, prev); 830 1002 if (end_char) *end_char = (char *)regex; 1003 if (!*regex) regex_obj->parser_flags = 0; 831 1004 return nfa; 832 1005 }
Note: See TracChangeset
for help on using the changeset viewer.
