regex.cpp 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. #include <cstdio>
  2. #include "tool.hpp"
  3. #define PCRE2_CODE_UNIT_WIDTH 8
  4. #include "pcre2.h"
  5. #include "regex.hpp"
  6. using namespace aFuntool;
  7. aFuntool::Regex::Regex(const std::string &pattern_) : pattern {pattern_} {
  8. if (!isCharUTF8(pattern))
  9. throw RegexException("Pattern not utf-8");
  10. int error_code;
  11. size_t erroroffset;
  12. char regex_error[REGEX_ERROR_SIZE];
  13. this->re = pcre2_compile((PCRE2_SPTR)pattern.c_str(), PCRE2_ZERO_TERMINATED, 0, &error_code, &erroroffset, nullptr);
  14. if (re == nullptr) {
  15. PCRE2_UCHAR buffer[256];
  16. pcre2_get_error_message(error_code, buffer, sizeof(buffer));
  17. snprintf(regex_error, sizeof(regex_error), "R%d: %s\n", (int) erroroffset, buffer);
  18. throw RegexException(regex_error);
  19. }
  20. }
  21. aFuntool::Regex::~Regex() {
  22. if (re != nullptr)
  23. pcre2_code_free(re);
  24. }
  25. /*
  26. * 函数名: matchRegex
  27. * 目标: 检查一个字符串是否可被完全匹配一个正则表达式
  28. * 返回 (1) - 可完全匹配
  29. * 返回 (0) - 不可完全匹配或不可匹配
  30. * 返回 (>0) - 失败
  31. */
  32. int aFuntool::Regex::match(const char *subject) {
  33. if (!isCharUTF8(subject))
  34. throw RegexException("Subject not utf-8");
  35. char regex_error[REGEX_ERROR_SIZE];
  36. PCRE2_SIZE sub_len = strlen(subject);
  37. pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(re, nullptr);
  38. int rc = pcre2_match(re, (PCRE2_SPTR)subject, sub_len, 0, 0, match_data, nullptr);
  39. if (rc < 0) {
  40. pcre2_match_data_free(match_data);
  41. if (rc == PCRE2_ERROR_NOMATCH)
  42. return 0;
  43. else {
  44. snprintf(regex_error, sizeof(regex_error),
  45. "Regex match '%s' failed by '%s'\n", subject, pattern.c_str());
  46. throw RegexException(regex_error);
  47. }
  48. }
  49. PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data);
  50. if (ovector[0] > ovector[1]) {
  51. snprintf(regex_error, sizeof(regex_error),
  52. "\\K was used in an assertion to set the match start after its end.\n"
  53. "From end to start the match was: %.*s\n",
  54. (int) (ovector[0] - ovector[1]), (char *) (subject + ovector[1]));
  55. pcre2_match_data_free(match_data);
  56. throw RegexException(regex_error);
  57. }
  58. int ret = 0;
  59. if (ovector[0] == 0 && ovector[1] == sub_len) // 完全匹配
  60. ret = 1;
  61. pcre2_match_data_free(match_data);
  62. return ret;
  63. }