Преглед изворни кода

feat: 添加语法分析模块

SongZihuan пре 3 година
родитељ
комит
56e184bf5f

+ 3 - 3
include/code.h

@@ -7,9 +7,9 @@ typedef struct af_Code af_Code;
 
 
 /* 括号类型 */
 /* 括号类型 */
 enum af_BlockType {
 enum af_BlockType {
-    parentheses = 0,  // 小括号
-    brackets,  // 中括号
-    curly,  // 大括号
+    parentheses = '(',  // 小括号
+    brackets = '[',  // 中括号
+    curly = '{',  // 大括号
 };
 };
 
 
 /* 代码块创建函数 */
 /* 代码块创建函数 */

+ 0 - 17
include/lexical_warning_error.h

@@ -1,17 +0,0 @@
-/*
- * 文件名: lexical_warning_error.h
- * 目标: 记录lexical的警告和错误信息
- */
-#ifndef AFUN_LEXICAL_WARNING_ERROR_H
-#define AFUN_LEXICAL_WARNING_ERROR_H
-
-#define LEXICAL_ERROR(status, info) ("status: " #status " " #info)
-
-#define SYS_ILLEGAL_CHAR(status) LEXICAL_ERROR(status, "System error to obtain illegal characters") /* switch分支获得了不可能的字符 */
-#define ILLEGAL_CHAR(status) LEXICAL_ERROR(status, "Illegal characters") /* 输入了非法字符 */
-
-#define SYS_ERROR_STATUS(status) LEXICAL_ERROR(status, "System error to jump status") /* 状态跳转错误 */
-#define INCOMPLETE_FILE(status) LEXICAL_ERROR(status, "Incomplete file") /* 文件不完整 */
-#define INCULDE_CONTROL(status) LEXICAL_ERROR(status, "Include control characters in the text (not recommended)") /* 文本中包含控制符 */
-
-#endif //AFUN_LEXICAL_WARNING_ERROR_H

+ 1 - 1
include/macro.h

@@ -9,7 +9,7 @@
 #include <inttypes.h>
 #include <inttypes.h>
 #include <stdarg.h>
 #include <stdarg.h>
 #include "mem.h"
 #include "mem.h"
-#include "error_macro.h"
+#include "runtime_error.h"
 
 
 #ifndef __bool_true_false_are_defined
 #ifndef __bool_true_false_are_defined
 #define bool int
 #define bool int

+ 2 - 0
include/parser.h

@@ -4,6 +4,7 @@
 #include "token.h"
 #include "token.h"
 #include "reader.h"
 #include "reader.h"
 
 
+#define SYNTACTIC_MAX_DEEP (1000)
 typedef struct af_Parser af_Parser;
 typedef struct af_Parser af_Parser;
 
 
 /* Parser 创建与释放 */
 /* Parser 创建与释放 */
@@ -13,6 +14,7 @@ void freeParser(af_Parser *parser);
 af_Parser *makeParserByString(char *str, bool free_str, FILE *error);
 af_Parser *makeParserByString(char *str, bool free_str, FILE *error);
 
 
 /* Parser 操作函数 */
 /* Parser 操作函数 */
+af_Code *parserCode(af_Parser *parser);
 af_TokenType getTokenFromLexical(char **text, af_Parser *parser);
 af_TokenType getTokenFromLexical(char **text, af_Parser *parser);
 void *getParserData(af_Parser *parser);
 void *getParserData(af_Parser *parser);
 void initParser(af_Parser *parser);
 void initParser(af_Parser *parser);

+ 33 - 0
include/parserl_warning_error.h

@@ -0,0 +1,33 @@
+/*
+ * 文件名: parser_warning_error.h
+ * 目标: 记录parser的警告和错误信息
+ */
+#ifndef AFUN_PARSERL_WARNING_ERROR_H
+#define AFUN_PARSERL_WARNING_ERROR_H
+
+/* 词法分析器错误和警告信息 */
+#define LEXICAL_ERROR(status, info) ("status: " #status " " info)
+
+#define SYS_ILLEGAL_CHAR(status) LEXICAL_ERROR(status, "System error to obtain illegal characters") /* switch分支获得了不可能的字符 */
+#define ILLEGAL_CHAR(status) LEXICAL_ERROR(status, "Illegal characters") /* 输入了非法字符 */
+
+#define SYS_ERROR_STATUS(status) LEXICAL_ERROR(status, "System error to jump status") /* 状态跳转错误 */
+#define INCOMPLETE_FILE(status) LEXICAL_ERROR(status, "Incomplete file") /* 文件不完整 */
+#define INCULDE_CONTROL(status) LEXICAL_ERROR(status, "Include control characters in the text (not recommended)") /* 文本中包含控制符 */
+
+/* 语法分析器错误和经过信息 */
+#define SYNTACTIC_ERROR(status, info) (#status ": " info)
+
+#define CodeListStartError() SYNTACTIC_ERROR(CodeList, "CodeList did not get a suitable start symbol")
+#define CodeListEndError() SYNTACTIC_ERROR(CodeList, "CodeList did not get EOF/NUL with end")
+
+#define CodeStartError() SYNTACTIC_ERROR(Code, "Code did not get a suitable start symbol")
+#define CodeEndError(p) SYNTACTIC_ERROR(Code, "Code-Block did not get " p " with end")
+
+#define MakeCodeFail() SYNTACTIC_ERROR(Code, "Make code fail (Maybe by prefix)")
+
+#define SYNTACTIC_TOO_DEEP() SYNTACTIC_ERROR(Syntactic, "Recursion too deep")
+
+#define PREFIX_ERROR(satus) SYNTACTIC_ERROR(status, "The system gets the prefix error")
+
+#endif //AFUN_PARSERL_WARNING_ERROR_H

+ 5 - 5
include/error_macro.h → include/runtime_error.h

@@ -1,10 +1,10 @@
 /*
 /*
- * 文件名: error_macro.h
- * 目标: 定义错误信息
+ * 文件名: runtime_error.h
+ * 目标: 定义aFunlang运行时错误信息
  */
  */
 
 
-#ifndef AFUN_ERROR_MACRO_H
-#define AFUN_ERROR_MACRO_H
+#ifndef AFUN_RUNTIME_ERROR_H
+#define AFUN_RUNTIME_ERROR_H
 
 
 #define SYNTAX_ERROR "Syntax-Error"
 #define SYNTAX_ERROR "Syntax-Error"
 #define SYNTAX_ERROR_INFO "Block syntax errors." /* block元素不足 */
 #define SYNTAX_ERROR_INFO "Block syntax errors." /* block元素不足 */
@@ -33,4 +33,4 @@
 #define API_RUN_ERROR "API-Run-Error"
 #define API_RUN_ERROR "API-Run-Error"
 #define API_DONOT_GIVE(name) ("Object API don't give: " #name)
 #define API_DONOT_GIVE(name) ("Object API don't give: " #name)
 
 
-#endif //AFUN_ERROR_MACRO_H
+#endif //AFUN_RUNTIME_ERROR_H

+ 10 - 0
src/core/__parser.h

@@ -33,10 +33,13 @@ enum af_LexicalStatus {
 
 
 typedef enum af_LexicalStatus af_LexicalStatus;
 typedef enum af_LexicalStatus af_LexicalStatus;
 typedef struct af_Lexical af_Lexical;
 typedef struct af_Lexical af_Lexical;
+typedef struct af_Syntactic af_Syntactic;
 
 
 struct af_Parser {
 struct af_Parser {
     struct af_Reader *reader;
     struct af_Reader *reader;
     struct af_Lexical *lexical;
     struct af_Lexical *lexical;
+    struct af_Syntactic *syntactic;
+
     FILE *error;
     FILE *error;
     bool is_error;  // Parser遇到错误
     bool is_error;  // Parser遇到错误
 };
 };
@@ -48,6 +51,13 @@ struct af_Lexical {  // 词法匹配器的状态机
 
 
     size_t mutli_comment;  // 多行注释嵌套等级
     size_t mutli_comment;  // 多行注释嵌套等级
     bool is_end;
     bool is_end;
+    bool is_error;
+};
+
+struct af_Syntactic {
+    bool back;
+    enum af_TokenType token;
+    char *text;
 };
 };
 
 
 #endif //AFUN__PARSER_H
 #endif //AFUN__PARSER_H

+ 13 - 4
src/core/code.c

@@ -387,17 +387,26 @@ char *codeToStr(af_Code *code, int n) {
     return re;
     return re;
 }
 }
 
 
+static void printLayerSpace(size_t layer) {
+    for (size_t i = 0; i < layer; i++)
+        printf("    ");
+}
+
 void printCode(af_Code *bt) {
 void printCode(af_Code *bt) {
-    for (NULL; bt != NULL; bt = bt->next) {
+    size_t layer = 0;
+    for (NULL; bt != NULL || layer < 0; bt = bt->next) {
+        printLayerSpace(layer);
+        layer = layer - bt->code_end;
         switch (bt->type) {
         switch (bt->type) {
             case code_element:
             case code_element:
-                printf("code_element: %s prefix: %d\n", bt->element.data, bt->prefix);
+                printf("element: [prefix (%c)] [end %d] [data '%s']\n", bt->prefix, bt->code_end, bt->element.data);
                 break;
                 break;
             case code_block:
             case code_block:
-                printf("code_block: %d %d prefix: %d\n", bt->block.elements, bt->block.type, bt->prefix);
+                layer++;
+                printf("code: [prefix (%c)] [end %d] [type %c] [elements %d]\n", bt->prefix, bt->code_end, bt->block.type, bt->block.elements);
                 break;
                 break;
             default:
             default:
-                printf("Unknow: %d prefix: %d\n", bt->type, bt->prefix);
+                printf("Unknown: [prefix (%c)] [end %d] [type %d]\n", bt->prefix, bt->code_end, bt->type);
                 break;
                 break;
         }
         }
     }
     }

+ 9 - 4
src/core/lexical.c

@@ -5,12 +5,13 @@
 #include <ctype.h>
 #include <ctype.h>
 #include "aFun.h"
 #include "aFun.h"
 #include "__parser.h"
 #include "__parser.h"
-#include "lexical_warning_error.h"
+#include "parserl_warning_error.h"
 
 
 static void printLexicalError(char *info, af_Parser *parser) {
 static void printLexicalError(char *info, af_Parser *parser) {
     if (parser->error == NULL)
     if (parser->error == NULL)
         return;
         return;
     fprintf(parser->error, "[Lexical-Error] %s\n", info);
     fprintf(parser->error, "[Lexical-Error] %s\n", info);
+    parser->is_error = true;
 }
 }
 
 
 static void printLexicalWarning(char *info, af_Parser *parser) {
 static void printLexicalWarning(char *info, af_Parser *parser) {
@@ -327,11 +328,14 @@ af_TokenType getTokenFromLexical(char **text, af_Parser *parser) {
     if (parser->lexical->is_end) {
     if (parser->lexical->is_end) {
         *text = NULL;
         *text = NULL;
         return TK_EOF;
         return TK_EOF;
+    } else if (parser->lexical->is_error) {
+        *text = NULL;
+        return TK_ERROR;
     }
     }
 
 
     while (1) {
     while (1) {
         char ch = getChar(parser->reader);
         char ch = getChar(parser->reader);
-        if (iscntrl(ch) && !isspace(ch))
+        if (iscntrl(ch) && !isspace(ch) && ch != NUL)
             printLexicalWarning(INCULDE_CONTROL(base), parser);
             printLexicalWarning(INCULDE_CONTROL(base), parser);
 
 
         switch (parser->lexical->status) {
         switch (parser->lexical->status) {
@@ -377,7 +381,7 @@ af_TokenType getTokenFromLexical(char **text, af_Parser *parser) {
             char *word = readWord(parser->lexical->last, parser->reader);
             char *word = readWord(parser->lexical->last, parser->reader);
             tt = parser->lexical->token;
             tt = parser->lexical->token;
 
 
-            if (tt == TK_ELEMENT_SHORT)
+            if (tt == TK_ELEMENT_SHORT || tt == TK_PREFIX)
                 *text = word;
                 *text = word;
             else if (tt == TK_ELEMENT_LONG) {
             else if (tt == TK_ELEMENT_LONG) {
                 char *new = NEW_STR(STR_LEN(word) - 2);  // 去除收尾|
                 char *new = NEW_STR(STR_LEN(word) - 2);  // 去除收尾|
@@ -398,6 +402,7 @@ af_TokenType getTokenFromLexical(char **text, af_Parser *parser) {
                 }
                 }
 
 
                 *text = strCopy(new);
                 *text = strCopy(new);
+                free(word);
                 free(new);
                 free(new);
             } else
             } else
                 free(word);
                 free(word);
@@ -415,11 +420,11 @@ af_TokenType getTokenFromLexical(char **text, af_Parser *parser) {
             free(word);
             free(word);
             parser->lexical->status = lex_begin;
             parser->lexical->status = lex_begin;
             parser->lexical->last = 0;
             parser->lexical->last = 0;
-            parser->is_error = true;
             continue;
             continue;
         } else if (re == -2 || re == -3) {
         } else if (re == -2 || re == -3) {
             tt = TK_ERROR;
             tt = TK_ERROR;
             *text = NULL;
             *text = NULL;
+            parser->lexical->is_error = true;
             break;
             break;
         }
         }
     }
     }

+ 14 - 0
src/core/parser.c

@@ -8,12 +8,15 @@
 
 
 static af_Lexical *makeLexical(void);
 static af_Lexical *makeLexical(void);
 static void freeLexical(af_Lexical *lex);
 static void freeLexical(af_Lexical *lex);
+static af_Syntactic *makeSyntactic(void);
+static void freeSyntactic(af_Syntactic *syntactic);
 
 
 af_Parser *makeParser(DLC_SYMBOL(readerFunc) read_func, DLC_SYMBOL(destructReaderFunc) destruct_func, size_t data_size,
 af_Parser *makeParser(DLC_SYMBOL(readerFunc) read_func, DLC_SYMBOL(destructReaderFunc) destruct_func, size_t data_size,
                       FILE *error) {
                       FILE *error) {
     af_Parser *parser = calloc(1, sizeof(af_Parser));
     af_Parser *parser = calloc(1, sizeof(af_Parser));
     parser->reader = makeReader(read_func, destruct_func, data_size);
     parser->reader = makeReader(read_func, destruct_func, data_size);
     parser->lexical = makeLexical();
     parser->lexical = makeLexical();
+    parser->syntactic = makeSyntactic();
     parser->error = error;
     parser->error = error;
     return parser;
     return parser;
 }
 }
@@ -21,6 +24,7 @@ af_Parser *makeParser(DLC_SYMBOL(readerFunc) read_func, DLC_SYMBOL(destructReade
 void freeParser(af_Parser *parser) {
 void freeParser(af_Parser *parser) {
     freeReader(parser->reader);
     freeReader(parser->reader);
     freeLexical(parser->lexical);
     freeLexical(parser->lexical);
+    freeSyntactic(parser->syntactic);
     free(parser);
     free(parser);
 }
 }
 
 
@@ -42,7 +46,17 @@ static void freeLexical(af_Lexical *lex) {
     free(lex);
     free(lex);
 }
 }
 
 
+static af_Syntactic *makeSyntactic(void) {
+    af_Syntactic *syntactic = calloc(1, sizeof(af_Syntactic));
+    return syntactic;
+}
+
+static void freeSyntactic(af_Syntactic *syntactic) {
+    free(syntactic->text);
+    free(syntactic);
+}
 
 
+/* makeParser函数封装 */
 struct readerDataString {
 struct readerDataString {
     char *str;
     char *str;
     bool free_str;
     bool free_str;

+ 228 - 0
src/core/syntactic.c

@@ -0,0 +1,228 @@
+#include <ctype.h>
+#include "aFun.h"
+#include "__code.h"
+#include "__parser.h"
+#include "parserl_warning_error.h"
+
+static void printSyntacticError(char *info, af_Parser *parser) {
+    if (parser->error == NULL)
+        return;
+    fprintf(parser->error, "[Syntactic-Error] %s\n", info);
+    parser->is_error = true;
+}
+
+static void printSyntacticWarning(char *info, af_Parser *parser) {
+    if (parser->error == NULL)
+        return;
+    fprintf(parser->error, "[Syntactic-Warning] %s\n", info);
+}
+
+static bool getToken(af_Parser *parser) {
+    if (parser->syntactic->back) {
+        parser->syntactic->back = false;
+        return true;
+    }
+
+    parser->syntactic->token = getTokenFromLexical(&parser->syntactic->text, parser);
+    return parser->syntactic->token != TK_ERROR;  // 非错误则返回true, 遇到错误则返回false
+}
+
+static bool goBackToken(af_Parser *parser) {
+    if (parser->syntactic->back)
+        return false;  // 已经有一个回退
+    parser->syntactic->back = true;
+    return true;
+}
+
+static af_Code *codeList(size_t deep, af_Parser *parser);
+
+static af_Code *code(size_t deep, char prefix, af_Parser *parser) {
+    af_Code *re;
+    af_Code *code_list = NULL;
+    deep++;
+
+    getToken(parser);
+    switch (parser->syntactic->token) {
+        case TK_ELEMENT_SHORT:
+        case TK_ELEMENT_LONG:
+            re = makeElementCode(parser->syntactic->text, prefix, 0, NULL);
+            free(parser->syntactic->text);
+            break;
+        case TK_LP:
+            if (deep <= SYNTACTIC_MAX_DEEP)
+                code_list = codeList(deep, parser);
+            else
+                printSyntacticError(SYNTACTIC_TOO_DEEP(), parser);
+
+            getToken(parser);
+            switch (parser->syntactic->token) {
+                case TK_RP:
+                    break;
+                case TK_ERROR:
+                    freeAllCode(code_list);
+                    return NULL;
+                default:
+                    goBackToken(parser);
+                    printSyntacticError(CodeEndError(") or !)"), parser);
+                    break;
+            }
+
+            re = makeBlockCode(parentheses, code_list, prefix, 0, NULL, NULL);
+            break;
+        case TK_LB:
+            if (deep <= SYNTACTIC_MAX_DEEP)
+                code_list = codeList(deep, parser);
+            else
+                printSyntacticError(SYNTACTIC_TOO_DEEP(), parser);
+
+            getToken(parser);
+            switch (parser->syntactic->token) {
+                case TK_RB:
+                    break;
+                case TK_ERROR:
+                    freeAllCode(code_list);
+                    return NULL;
+                default:
+                    goBackToken(parser);
+                    printSyntacticError(CodeEndError("] or @)"), parser);
+                    break;
+            }
+
+            re = makeBlockCode(brackets, code_list, prefix, 0, NULL, NULL);
+            break;
+        case TK_LC:
+            if (deep <= SYNTACTIC_MAX_DEEP)
+                code_list = codeList(deep, parser);
+            else
+                printSyntacticError(SYNTACTIC_TOO_DEEP(), parser);
+
+            getToken(parser);
+            switch (parser->syntactic->token) {
+                case TK_RC:
+                    break;
+                case TK_ERROR:
+                    freeAllCode(code_list);
+                    return NULL;
+                default:
+                    goBackToken(parser);
+                    printSyntacticError(CodeEndError("} or #)"), parser);
+                    break;
+            }
+
+            re = makeBlockCode(curly, code_list, prefix, 0, NULL, NULL);
+            break;
+        case TK_ERROR:
+            return NULL;
+        default:
+            printSyntacticError(CodeStartError(), parser);
+            return NULL;
+    }
+
+    if (re == NULL)
+        printSyntacticError(MakeCodeFail(), parser);
+    return re;
+}
+
+static af_Code *codePrefix(size_t deep, af_Parser *parser) {
+    char ch = NUL;
+    getToken(parser);
+    if (parser->syntactic->token != TK_PREFIX) {
+        goBackToken(parser);
+        printSyntacticError(PREFIX_ERROR(codePrefix), parser);
+    } else if (STR_LEN( parser->syntactic->text) != 1) {
+        printSyntacticError(PREFIX_ERROR(codePrefix), parser);
+        free(parser->syntactic->text);
+    } else {
+        ch = *(parser->syntactic->text);
+        free(parser->syntactic->text);
+    }
+
+    return code(deep, ch, parser);
+}
+
+static af_Code *codeList(size_t deep, af_Parser *parser) {
+    af_Code *re = NULL;
+    af_Code **pre = &re;
+    af_Code *code_list;
+
+    while (1) {
+        getToken(parser);
+        switch (parser->syntactic->token) {
+            case TK_PREFIX:
+                goBackToken(parser);
+                code_list = codePrefix(deep, parser);
+                if (code_list != NULL)
+                    pre = &(connectCode(pre, code_list)->next);
+                break;
+
+            case TK_ELEMENT_SHORT:
+            case TK_ELEMENT_LONG:
+            case TK_LP:
+            case TK_LB:
+            case TK_LC:
+                goBackToken(parser);
+                code_list = code(deep, NUL, parser);
+                if (code_list != NULL)
+                    pre = &(connectCode(pre, code_list)->next);
+                break;
+            case TK_ERROR:
+                freeAllCode(re);
+                return NULL;
+            default: /* 结束 */
+                goBackToken(parser);
+                return re;
+        }
+    }
+}
+
+static af_Code *codeListEnd(af_Parser *parser) {
+    af_Code *re = NULL;
+    af_Code **pre = &re;
+    af_Code *code_list;
+
+    getToken(parser);
+    switch (parser->syntactic->token) {
+        case TK_EOF:
+            break;  // 结束
+        case TK_PREFIX:
+        case TK_ELEMENT_SHORT:
+        case TK_ELEMENT_LONG:
+        case TK_LP:
+        case TK_LB:
+        case TK_LC:
+            goBackToken(parser);
+            code_list = codeList(0, parser);
+            connectCode(pre, code_list);
+
+            getToken(parser);
+            switch (parser->syntactic->token) {
+                case TK_EOF:
+                    break;  // 正常结束
+                case TK_ERROR:
+                    freeAllCode(re);
+                    return NULL;
+                default:
+                    printSyntacticError(CodeListEndError(), parser);
+                    freeAllCode(re);
+                    return NULL;
+            }
+            break;
+        case TK_ERROR:
+            return NULL;
+        default:
+            printSyntacticError(CodeListStartError(), parser);
+            return NULL;
+    }
+
+    return re;
+}
+
+af_Code *parserCode(af_Parser *parser) {
+    af_Code *code = codeListEnd(parser);
+    if (parser->is_error) {
+        freeAllCode(code);
+        return NULL;
+    }
+
+    return code;
+}

+ 2 - 16
test/CMakeLists.txt

@@ -32,23 +32,9 @@ ADD_aFunTest(regex test_regex.c)
 ADD_aFunTest(run test_run.c)
 ADD_aFunTest(run test_run.c)
 ADD_aFunTest(reader test_reader.c)
 ADD_aFunTest(reader test_reader.c)
 ADD_aFunTest(lexical test_lexcial.c)
 ADD_aFunTest(lexical test_lexcial.c)
+ADD_aFunTest(syntactic test_syntactic.c)
 
 
 SET_LINK(lib lib_Test1)  # 链接测试程序需要的动态库
 SET_LINK(lib lib_Test1)  # 链接测试程序需要的动态库
 
 
 SET_PASS(lib "num = 100 test = 110")
 SET_PASS(lib "num = 100 test = 110")
-SET_PASS(dlc "a = 100, test = 110")
-
-SET_PASS(byte_code
-"out:
-code_element: data prefix: 44
-code_element: var1 prefix: 0
-code_block: 2 0 prefix: 0
-code_element: data2 prefix: 0
-code_element: var2 prefix: 0
-in:
-code_element: data prefix: 44
-code_element: var1 prefix: 0
-code_block: 2 0 prefix: 0
-code_element: data2 prefix: 0
-code_element: var2 prefix: 0"
-        )
+SET_PASS(dlc "a = 100, test = 110")

+ 19 - 0
test/test_syntactic.c

@@ -0,0 +1,19 @@
+#include <stdio.h>
+#include "aFun.h"
+
+char *str = "10 '20.32 100var\n"
+            "|10||20.32|int->num\n"
+            "{if true 10}\n"
+            "of(HelloWorld)\n"
+            "[Hello]\n"
+            ",[Hello]\n"
+;
+
+int main() {
+    af_Parser *parser = makeParserByString(str, false, stderr);
+    af_Code *code = parserCode(parser);
+    printCode(code);
+    freeParser(parser);
+    freeAllCode(code);
+    return 0;
+}