Selaa lähdekoodia

refactor & feat: regex使用pcre2

SongZihuan 3 vuotta sitten
vanhempi
sitoutus
bb8eae0f31
3 muutettua tiedostoa jossa 82 lisäystä ja 21 poistoa
  1. 7 6
      include/tool/tool-regex.h
  2. 6 15
      include/tool/tool-regex.inline.h
  3. 69 0
      src/tool/regex.cpp

+ 7 - 6
include/tool/tool-regex.h

@@ -1,20 +1,21 @@
 #ifndef AFUN_TOOL_REGEX
 #define AFUN_TOOL_REGEX
-#include <regex>
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include "pcre2.h"
 
 namespace aFuntool {
-    class Regex {  // 整个对象都是inline的, 不需要Export符号
-        std::regex re;  // 正则表达式
+    class AFUN_TOOL_EXPORT Regex {  // 整个对象都是inline的, 不需要Export符号
+        pcre2_code *re;  // 正则表达式
         std::string pattern;  // 正则表达式的字符串
     public:
-        inline explicit Regex(const std::string &pattern_) noexcept(false);
+        explicit Regex(std::string pattern_) noexcept(false);
         inline Regex(const Regex &regex) noexcept;
         inline Regex(Regex &&regex) noexcept;
+        inline ~Regex() noexcept;
         Regex &operator=(const Regex &regex)=delete;
         Regex &operator=(Regex &&regex)=delete;
 
-        [[nodiscard]] inline bool match(const char *subject) const;
-        [[nodiscard]] inline bool match(const std::string &subject) const;
+        [[nodiscard]] bool match(const std::string &subject) const;
     };
 }
 

+ 6 - 15
include/tool/tool-regex.inline.h

@@ -1,28 +1,19 @@
 #ifndef AFUN_TOOL_REGEX_INLINE_H
 #define AFUN_TOOL_REGEX_INLINE_H
-
 #include "tool-regex.h"
 
 namespace aFuntool {
-    inline Regex::Regex(const std::string &pattern_) noexcept(false) : re{pattern_}, pattern{pattern_} {
-        if (!isCharUTF8(pattern))
-            throw RegexException("Pattern not utf-8");
-    }
-
-    inline Regex::Regex(const Regex &regex) noexcept: re{regex.pattern}, pattern{regex.pattern}{
-
-    }
-
-    inline Regex::Regex(Regex &&regex) noexcept : pattern {std::move(regex.pattern)}, re {std::move(regex.re)} {
+    inline Regex::Regex(const Regex &regex) noexcept: Regex(regex.pattern) {
 
     }
 
-    inline bool Regex::match(const char *subject) const{
-        return std::regex_match(subject, re);
+    inline Regex::Regex(Regex &&regex) noexcept : pattern {std::move(regex.pattern)}, re {regex.re} {
+        regex.re = nullptr;
     }
 
-    inline bool Regex::match(const std::string &subject) const {
-        return std::regex_match(subject, re);
+    Regex::~Regex() noexcept {
+        if (re != nullptr)
+            pcre2_code_free(re);
     }
 }
 

+ 69 - 0
src/tool/regex.cpp

@@ -0,0 +1,69 @@
+#include "tool-exception.h"
+#include "tool-regex.h"
+#include "file.h"
+#include "string"
+
+namespace aFuntool {
+    Regex::Regex(std::string pattern_) noexcept(false): re{nullptr}, pattern{std::move(pattern_)} {
+        int error_code;
+        size_t error_offset;
+        pcre2_code *ret = pcre2_compile((PCRE2_SPTR) pattern.c_str(), PCRE2_ZERO_TERMINATED, 0, &error_code,
+                                        &error_offset, nullptr);
+
+        if (ret == nullptr) {
+            if (error_code) {
+                PCRE2_UCHAR buffer[256];
+                pcre2_get_error_message(error_code, buffer, sizeof(buffer));
+                char regex_error[1024];
+                snprintf(regex_error, sizeof(regex_error), "Regex failed: %d: %s\n", (int) error_offset, buffer);
+                throw RegexException(regex_error);
+            } else
+                throw RegexException("Regex failed: unknown");
+        }
+
+        if (!isCharUTF8(pattern)) {
+            pcre2_code_free(ret);
+            throw RegexException("Pattern not utf-8");
+        }
+
+        re = ret;
+    }
+
+    bool Regex::match(const std::string &subject) const{
+        if (!isCharUTF8(subject))
+            throw RegexException("Subject not utf-8");
+
+        auto sub = (PCRE2_SPTR)subject.c_str();
+        PCRE2_SIZE sub_len = subject.size();
+        pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(re, nullptr);
+        int rc = pcre2_match(re, sub, sub_len, 0, 0, match_data, nullptr);
+
+        if (rc < 0) {
+            pcre2_match_data_free(match_data);
+            if (rc == PCRE2_ERROR_NOMATCH)
+                return false;
+            else {
+                char regex_error[1024];
+                snprintf(regex_error, sizeof(regex_error), "Regex match '%s' failed by '%s'\n", subject.c_str(), pattern.c_str());
+                throw RegexException(regex_error);
+            }
+        }
+
+        PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data);
+        if (ovector[0] > ovector[1]) {
+            char regex_error[1024];
+            snprintf(regex_error, sizeof(regex_error),
+                     "\\K was used in an assertion to set the match start after its end.\n"
+                     "From end to start the match was: %.*s\n",
+                     (int) (ovector[0] - ovector[1]), (char *) (subject.c_str() + ovector[1]));
+            pcre2_match_data_free(match_data);
+            throw RegexException(regex_error);
+        }
+
+        bool result = false;
+        if (ovector[0] == 0 && ovector[1] == sub_len) // 完全匹配
+            result = true;
+        pcre2_match_data_free(match_data);
+        return result;
+    }
+}