Преглед изворни кода

feat & fix: lexical基础匹配器设计和token流操控

内存检查已经通过
能正常匹配代码为token
设置了一个用于test的函数位于main.c,并且使用printToekn函数可视化token(主要是处理\n)
修复了严重bug: 位于mem.c文件, 将memStrcpy中,释放str的步骤放到了函数末尾
tokenMessage涵盖file、tokenStream等结构体,并且可以一次生成,一次释放
在main.c文件中,准备了一个测试函数testMain2、testMain3和tokenStream可视化函数

需要传入一个命令行参数, 即vm文件的位置。
SongZihuan пре 4 година
родитељ
комит
864a97127f
10 измењених фајлова са 942 додато и 8 уклоњено
  1. 5 2
      CMakeLists.txt
  2. 13 0
      include/__virtualmath.h
  3. 49 0
      include/lexical.h
  4. 14 0
      include/syntax.h
  5. 136 0
      include/token.h
  6. 115 3
      main.c
  7. 3 3
      memory/mem.c
  8. 149 0
      parser/lexical.c
  9. 307 0
      parser/syntax.c
  10. 151 0
      parser/token.c

+ 5 - 2
CMakeLists.txt

@@ -3,5 +3,8 @@ PROJECT(VirtualMath C)
 SET(CMAKE_C_STANDARD 11)
 SET(CMAKE_C_STANDARD 11)
 
 
 INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/include)
 INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/include)
-AUX_SOURCE_DIRECTORY(memory PASER_LIST)
-ADD_EXECUTABLE(VirtualMath main.c ${PASER_LIST})
+AUX_SOURCE_DIRECTORY(${PROJECT_SOURCE_DIR}/memory MEM_LIST)
+AUX_SOURCE_DIRECTORY(${PROJECT_SOURCE_DIR}/parser PASER_LIST)
+
+message("project dir is ${PROJECT_SOURCE_DIR}")
+ADD_EXECUTABLE(VirtualMath main.c ${PASER_LIST} ${MEM_LIST})

+ 13 - 0
include/__virtualmath.h

@@ -0,0 +1,13 @@
+#ifndef VIRTUALMATH___VIRTUALMATH_H
+#define VIRTUALMATH___VIRTUALMATH_H
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "mem.h"
+
+#define bool int
+#define true 1
+#define false 0
+
+#endif //VIRTUALMATH___VIRTUALMATH_H

+ 49 - 0
include/lexical.h

@@ -0,0 +1,49 @@
+#ifndef VIRTUALMATH_LEXICAL_H
+#define VIRTUALMATH_LEXICAL_H
+#include "__virtualmath.h"
+
+typedef struct lexFile{
+    FILE *file;
+    struct back{
+        bool is_back;
+        char p;
+    } back;
+} lexFile;
+
+typedef struct lexMather{
+    int len;
+    int string_type;
+    char *str;
+    char *second_str;
+    enum status{
+        LEXMATHER_START=1,
+        LEXMATHER_ING,
+        LEXMATHER_INGPOINT,
+        LEXMATHER_INGSECOND,
+        LEXMATHER_PASS,
+        LEXMATHER_END,
+        LEXMATHER_END_SECOND,
+        LEXMATHER_MISTAKE,
+    } status;
+} lexMather;
+
+typedef struct lexMathers{
+    int size;
+    struct lexMather **mathers;
+} lexMathers;
+
+char readChar(lexFile *file);
+void backChar(lexFile *file);
+
+lexFile *makeLexFile(char *dir);
+void freeLexFile(lexFile *file, bool self);
+
+void setupMather(lexMather *mather);
+lexMather *makeMather();
+void freeMather(lexMather *mather, bool self);
+
+lexMathers *makeMathers(int size);
+void freeMathers(lexMathers *mathers, bool self);
+void setupMathers(lexMathers *mathers);
+int checkoutMather(lexMathers *mathers, int max);
+#endif //VIRTUALMATH_LEXICAL_H

+ 14 - 0
include/syntax.h

@@ -0,0 +1,14 @@
+#ifndef VIRTUALMATH_SYNTAX_H
+#define VIRTUALMATH_SYNTAX_H
+#include "lexical.h"
+#include "token.h"
+
+void numberMather(char p, lexMather *mather);
+void varMather(char p, lexMather *mather);
+void stringMather(char p, lexMather *mather);
+void strMather(char p, lexMather *mather, const char *dest_p);
+void charMather(char p, lexMather *mather, char dest_p);
+
+#define strMatherMacro(n, word) strMather(p, mathers->mathers[n], word) /*这个宏只能用于getMatherStatus*/
+#define charMatherMacro(n, word) charMather(p, mathers->mathers[n], word) /*这个宏只能用于getMatherStatus*/
+#endif //VIRTUALMATH_SYNTAX_H

+ 136 - 0
include/token.h

@@ -0,0 +1,136 @@
+#ifndef VIRTUALMATH_TOKEN_H
+#define VIRTUALMATH_TOKEN_H
+#include "__virtualmath.h"
+
+#define MATHER_NUMBER 0
+#define MATHER_STRING 1
+#define MATHER_VAR 2
+
+#define MATHER_EOF 3
+#define MATHER_ENTER 4
+#define MATHER_SPACE 5
+
+#define MATHER_IF 6
+#define MATHER_ELIF 7
+#define MATHER_WHILE 8
+#define MATHER_FOR 9
+#define MATHER_IN 10
+#define MATHER_TRY 11
+#define MATHER_EXCEPT 12
+#define MATHER_AS 13
+#define MATHER_WITH 14
+#define MATHER_DO 15
+#define MATHER_ELSE 16
+#define MATHER_FINALLY 17
+#define MATHER_DEFAULT 18
+#define MATHER_GLOBAL 19
+#define MATHER_NONLOCAL 20
+#define MATHER_PUBLIC 21
+#define MATHER_PROTECT 22
+#define MATHER_PRIVATE 23
+#define MATHER_TRUE 24
+#define MATHER_FALSE 25
+#define MATHER_NULL 26
+#define MATHER_DEF 27
+#define MATHER_CLASS 28
+#define MATHER_BLOCK 29
+#define MATHER_BREAK 30
+#define MATHER_CONTINUE 31
+#define MATHER_REGO 32
+#define MATHER_RESTART 33
+#define MATHER_RETURN 34
+#define MATHER_YIELD 35
+#define MATHER_IMPORT 36
+#define MATHER_INCLUDE 37
+
+#define MATHER_ADD 38
+#define MATHER_SUB 39
+#define MATHER_MUL 40
+#define MATHER_DIV 41
+#define MATHER_INTDIV 42
+#define MATHER_PER 43
+#define MATHER_POW 44
+#define MATHER_LESS 45
+#define MATHER_LESSEQ 46
+#define MATHER_MORE 47
+#define MATHER_MOREEQ 48
+#define MATHER_EQ 49
+#define MATHER_NOTEQ 50
+
+#define MATHER_BITAND 51
+#define MATHER_BITOR 52
+#define MATHER_BITXOR 53
+#define MATHER_BITNOT 54
+#define MATHER_BITLEFT 55
+#define MATHER_BITRIGHT 56
+
+#define MATHER_BOOLAND 57
+#define MATHER_BOOLOR 58
+#define MATHER_BOOLNOT 59
+
+#define MATHER_ASSIGNMENT 60
+#define MATHER_POINT 61
+#define MATHER_AT 62
+#define MATHER_SVAR 63
+
+#define MATHER_LP 64
+#define MATHER_RP 65
+#define MATHER_LB 66
+#define MATHER_RB 67
+#define MATHER_LC 68
+#define MATHER_RC 69
+#define MATHER_COMMA 70
+#define MATHER_COLON 71
+#define MATHER_SEMICOLON 72
+#define MATHER_Link 73
+
+#define MATHER_MAX 74
+
+// 预定义一部分的内容
+struct statement;
+struct lexFile;
+struct lexMathers;
+
+typedef struct token{
+    int token_type;  // 记录token的类型,大于0的数字均为lex匹配器所匹配,小于0的为syntax解析器所匹配
+    struct data{
+        char *str;
+        char *second_str;  // 针对123.4j这种形式设定的,其中second_str存储j
+        struct statement *st;
+    } data;
+} token;
+
+typedef struct tokenStream{
+    token **token_list;  // 存储token的列表
+    token **token_ahead;  // 提前存储token的列表
+    int size;
+    int ahead;
+} tokenStream;
+
+typedef struct tokenMessage{
+    tokenStream *ts;
+    struct lexFile *file;
+    struct lexMathers *mathers;
+} tokenMessage;
+
+token *makeToken();
+token *makeLexToken(int type, char *str, char *second_str);
+token *makeStatementToken(int type, struct statement *st);
+void freeToken(token *tk, bool self);
+
+extern token *getToken(struct lexFile *file, struct lexMathers *mathers);
+
+extern struct lexFile *makeLexFile(char *dir);
+extern void freeLexFile(struct lexFile *file, bool self);
+
+extern struct lexMathers *makeMathers(int size);
+extern void freeMathers(struct lexMathers *mathers, bool self);
+int safeGetToken(tokenMessage *tm);
+token *forwardToken(tokenStream *ts);
+token *backToken(tokenStream *ts);
+void addToken(tokenStream *ts, token *new_tk);
+token *popToken(tokenStream *ts);
+
+tokenMessage *makeTokenMessage(char *file_dir);
+void freeTokenMessage(tokenMessage *tm, bool self);
+#endif //VIRTUALMATH_TOKEN_H

+ 115 - 3
main.c

@@ -1,6 +1,118 @@
-#include <stdio.h>
+#include "__virtualmath.h"
+#include "lexical.h"
+#include "token.h"
 
 
-int main() {
-    printf("Hello, World!\n");
+#define testMain3macro(tm, message) do{ \
+printf("message: %s\n", message); \
+printf("token stream: \n"); \
+printTokenStream(tm->ts->token_list, tm->ts->size); \
+printf("token ahead: \n"); \
+printTokenStream(tm->ts->token_ahead, tm->ts->ahead); \
+printf("end\n"); \
+}while(0)
+
+int testMain(int argc, char *argv[]);
+int testMain2(int argc, char *argv[]);
+int testMain3(int argc, char *argv[]);
+void printToken(token *tk);
+void printTokenStream(token **tk, int max);
+
+int main(int argc, char *argv[]) {
+    testMain3(argc, argv);
+    return 0;
+}
+
+int testMain3(int argc, char *argv[]) {
+    if (argc != 2) {
+        printf("Too many or little argc\n");
+    }
+    tokenMessage *tm = makeTokenMessage(argv[1]);
+    safeGetToken(tm);
+    safeGetToken(tm);
+    safeGetToken(tm);
+    testMain3macro(tm, "3 times safeGetToken test");
+    backToken(tm->ts);
+    backToken(tm->ts);
+    testMain3macro(tm, "2 times backToken test");
+
+    forwardToken(tm->ts);
+    testMain3macro(tm, "1 times forwardToken test");
+
+    token *tmp = popToken(tm->ts);
+    printToken(tmp);
+    testMain3macro(tm, "1 times popToken test");
+
+    addToken(tm->ts, tmp);
+    testMain3macro(tm, "1 times addToken test");
+
+    safeGetToken(tm);
+    testMain3macro(tm, "3 times safeGetToken test");
+    
+    freeTokenMessage(tm, true);
     return 0;
     return 0;
 }
 }
+
+int testMain2(int argc, char *argv[]) {
+    if (argc != 2) {
+        printf("Too many or little argc\n");
+    }
+    tokenMessage *tm = makeTokenMessage(argv[1]);
+    int tmp;
+    while (true){
+        tmp = safeGetToken(tm);
+        if (tmp == MATHER_EOF){
+            break;
+        }
+    }
+    printTokenStream(tm->ts->token_list, tm->ts->size);
+    freeTokenMessage(tm, true);
+    return 0;
+}
+/**
+ * 用于测试的主函数程序
+ * 需要一个命令行参数,指定一个vm文件用于解析
+ * @param argc
+ * @param argv
+ * @return
+ */
+int testMain(int argc, char *argv[]){
+    if (argc != 2){
+        printf("Too many or little argc\n");
+    }
+
+    lexFile *file = makeLexFile(argv[1]);
+    lexMathers *mathers = makeMathers(MATHER_MAX);
+    token *tmp;
+    while (true){
+       tmp = getToken(file, mathers);
+       if (tmp->token_type == MATHER_EOF){
+           freeToken(tmp, true);
+           break;
+       }
+       printToken(tmp);
+       freeToken(tmp, true);
+    }
+    freeMathers(mathers, true);
+    freeLexFile(file, true);
+    return 0;
+}
+
+void printToken(token *tk){
+    char *tmp = tk->data.str, *second_tmp = tk->data.second_str;
+    if (!strcmp(tmp, "\n")){
+        tmp = "\\n";
+    }
+    if (!strcmp(second_tmp, "\n")){
+        second_tmp = "\\n";
+    }
+    if (tmp[0] == EOF){
+        tmp = "(EOF)";
+    }
+    printf("<token str = ('%s','%s'), type = %d>\n", tmp, second_tmp, tk->token_type);
+}
+
+void printTokenStream(token **tk, int max){
+    for (int i=0; i < max; i ++){
+        printToken(tk[i]);
+    }
+}

+ 3 - 3
memory/mem.c

@@ -47,9 +47,6 @@ char *memStrcpy(size_t nsize, int free_old, char *str, int write, ...) {  // 复
     if (str != NULL){
     if (str != NULL){
         strcpy(tmp, str);
         strcpy(tmp, str);
         tmp[memStrlen(str)] = (char)0;  // 去除多余的\0
         tmp[memStrlen(str)] = (char)0;  // 去除多余的\0
-        if (free_old){
-            memFree(str);
-        }
     }
     }
     if (write){
     if (write){
         va_list argp;
         va_list argp;
@@ -59,5 +56,8 @@ char *memStrcpy(size_t nsize, int free_old, char *str, int write, ...) {  // 复
         }
         }
         va_end(argp);
         va_end(argp);
     }
     }
+    if (free_old){
+        memFree(str);
+    }
     return tmp;
     return tmp;
 }
 }

+ 149 - 0
parser/lexical.c

@@ -0,0 +1,149 @@
+#include "lexical.h"
+
+/**
+ * 从文件中读取一个字节,并处理is_back
+ * 每次从文件中读取字符时,则会保存字符到back.p中,调用backChar回退一个字符的时候则不需要传入字符了
+ * @param file
+ * @return 返回一个字符,若为EOF则返回-1
+ */
+char readChar(lexFile *file){
+    if (file->back.is_back){
+        file->back.is_back = false;
+    }
+    else
+        file->back.p = (char)fgetc(file->file);
+    return file->back.p;
+}
+
+/**
+ * 设置字符回退
+ * @param file
+ */
+void backChar(lexFile *file){
+    file->back.is_back = true;
+}
+
+lexFile *makeLexFile(char *dir){
+    lexFile *tmp = memCalloc(1, sizeof(lexFile));
+    tmp->file = fopen(dir, "r");
+    tmp->back.is_back = false;
+    tmp->back.p = EOF;
+    return tmp;
+}
+
+void freeLexFile(lexFile *file, bool self){
+    fclose(file->file);
+    if (self){
+        memFree(file);
+    }
+}
+
+/**
+ * 初始化mather,代码被复用
+ * @param mather
+ */
+void setupMather(lexMather *mather){
+    mather->len = 0;
+    mather->str = NULL;
+    mather->second_str = NULL;
+    mather->string_type = '"';
+    mather->status = LEXMATHER_START;
+}
+
+lexMather *makeMather(){
+    lexMather *tmp = memCalloc(1, sizeof(lexMather));
+    setupMather(tmp);
+    return tmp;
+}
+
+void freeMather(lexMather *mather, bool self){
+    memFree(mather->str);
+    memFree(mather->second_str);
+    mather->len = 0;
+    if (self){
+        memFree(mather);
+    }
+}
+
+lexMathers *makeMathers(int size){
+    lexMathers *tmp = memCalloc(1, sizeof(lexMathers));
+    tmp->size = size;
+    tmp->mathers = (struct lexMather**)memCalloc(size, sizeof(lexMather*));
+    for(int i=0;i < size; i++){
+        tmp->mathers[i] = makeMather();
+    }
+    return tmp;
+}
+
+void freeMathers(lexMathers *mathers, bool self){
+    for(int i=0;i < mathers->size; i++){
+        freeMather(mathers->mathers[i], true);
+    }
+    memFree(mathers->mathers);
+    mathers->size = 0;
+    if (self){
+        memFree(mathers);
+    }
+}
+
+/**
+ * 初始化mathers,本质是初始化mathers.mathers内所有的mather
+ * @param mathers
+ */
+void setupMathers(lexMathers *mathers){
+    for (int i=0;i < mathers->size;i++){
+        if(mathers->mathers[i]->str != NULL){
+            memFree(mathers->mathers[i]->str);
+            memFree(mathers->mathers[i]->second_str);
+        }
+        setupMather(mathers->mathers[i]);
+    }
+}
+
+/**
+ * 检查mathers中mather的匹配情况。
+ * 情况1:只出现一个匹配器处于END状态,其他均处于MISTAKE或者END_SECOND状态,则视为匹配成功,返回END状态的匹配器
+ * 情况2:只出现一个匹配器处于END_SECOND状态,其他均处于MISTAKE状态无END状态,则视为匹配成功,返回END_SECOND状态的匹配器
+ * 情况3:全部都在MISTAKE,返回-2,匹配失败
+ * 其他情况:匹配还未完成,返回-1
+ * @param mathers
+ * @param max
+ * @return
+ */
+int checkoutMather(lexMathers *mathers, int max) {
+    int mistake_count = 0;
+    int end_count = 0, end_index = -1;
+    int end_second_count = 0, end_second_index = -1;
+//    printf("CHECKOUT:\n");
+//    for (int i=0;i < mathers->size;i++){
+//        printf("mathers->mathers[%d]->status == %d\n", i, mathers->mathers[i]->status);
+//    }
+    for (int i=0;i < mathers->size;i++){
+        if(mathers->mathers[i]->status == LEXMATHER_END){
+            end_count ++;
+            end_index = i;
+        }
+        else if(mathers->mathers[i]->status == LEXMATHER_END_SECOND){
+            end_second_count ++;
+            end_second_index = i;
+        }
+        else if(mathers->mathers[i]->status == LEXMATHER_MISTAKE){
+            mistake_count ++;
+        }
+        else if(mathers->mathers[i]->status == LEXMATHER_ING || mathers->mathers[i]->status == LEXMATHER_START){
+            return -1;
+        }
+    }
+    if (mistake_count == max){
+        return -2;
+    }
+    else if(end_count == 1){
+        return end_index;
+    }
+    else if(end_second_count == 1){
+        return end_second_index;
+    }
+    else{
+        return -1;
+    }
+}

+ 307 - 0
parser/syntax.c

@@ -0,0 +1,307 @@
+#include "syntax.h"
+
+/**
+ * 匹配一个数字字面量
+ * 匹配器规则:
+ * START模式:判断比较第一个字符(是否为数字或者小数点),若匹配成功则进入ING模式,若失败则进入MISTAKE模式
+ * ING模式:继续匹配,直到遇到非数字或小数点。则检查是否为英文字母,若是则进入SECOND模式,否则进入END模式
+ * SECOND模式:继续匹配,知道遇到非字母、下划线、数字的内容,进入END模式
+ * END模式:进入END模式意味着匹配结束了,通过checkoutMather可以检查该匹配器是否被采用,采用后则生成token,并且读取器回退一个字符
+ * MISTAKE模式:错误
+ * 匹配内容:12.3jk_2,其中12.3存储在str中,jk_2存储在str_second中
+ * @param p
+ * @param mather
+ */
+void numberMather(char p, lexMather *mather){
+    if (mather->status == LEXMATHER_START || mather->status == LEXMATHER_ING || mather->status == LEXMATHER_INGPOINT){
+        if ('0'<= p && '9' >= p || '.' == p && mather->status == LEXMATHER_ING){
+            mather->str = memStrcpy(1, true, mather->str, true, p);
+            mather->len += 1;
+            if ('.' == p)
+                mather->status = LEXMATHER_INGPOINT;
+            else if (mather->status == LEXMATHER_START)
+                mather->status = LEXMATHER_ING;
+        }
+        else if(mather->status == LEXMATHER_ING || mather->status == LEXMATHER_INGPOINT){
+            if ('A'<= p && 'Z' >= p ||'a'<= p && 'z' >= p ||'_' == p){
+                mather->second_str = memStrcpy(1, true, mather->second_str, true, p);
+                mather->status = LEXMATHER_INGSECOND;
+            }
+            else{
+                mather->status = LEXMATHER_END;
+            }
+        }
+        else{
+            mather->status = LEXMATHER_MISTAKE;
+        }
+    }
+    else if (mather->status == LEXMATHER_INGSECOND){
+        if ('A'<= p && 'Z' >= p ||'a'<= p && 'z' >= p ||'_' == p ||
+            '0'<= p && '9' >= p){
+            mather->second_str = memStrcpy(1, true, mather->second_str, true, p);
+        }
+        else{
+            mather->status = LEXMATHER_END;
+        }
+    }
+    else{
+        mather->status = LEXMATHER_MISTAKE;
+    }
+}
+
+/**
+ * 匹配一个变量
+ * 匹配模式:匹配器结束模式为END_SECOND模式,也就是当checkoutMather检查的时候,END_SECOND位于END的优先级之后。
+ * END_SECOND解决了冲突:关键词if可以满足varMather的匹配,但他并不是变量,if有特殊的匹配器(strMather)来匹配。
+ * 匹配内容:a, a_123
+ * @param p
+ * @param mather
+ */
+void varMather(char p, lexMather *mather){
+    if (mather->status == LEXMATHER_START || mather->status == LEXMATHER_ING){
+        if ('A'<= p && 'Z' >= p ||'a'<= p && 'z' >= p ||'_' == p ||
+            '0'<= p && '9' >= p && mather->status == LEXMATHER_ING){
+            mather->str = memStrcpy(1, true, mather->str, true, p);
+            mather->len ++;
+            mather->status = LEXMATHER_ING;
+        }
+        else if(mather->status == LEXMATHER_ING){
+            mather->status = LEXMATHER_END_SECOND;
+        }
+        else if(mather->status == LEXMATHER_START){
+            mather->status = LEXMATHER_MISTAKE;
+        }
+    }
+    else{
+        mather->status = LEXMATHER_MISTAKE;
+    }
+}
+
+/**
+ * 匹配一个字符串字面量
+ * 注意:string_type记录的是字符串结束标志(‘或者“)
+ * 此处引进LEXMATHER_PASS,是为了在匹配到结束标志"或者'后,多读取一个字符,然后在统一回退
+ * 匹配内容:’134‘,”123“
+ * @param p
+ * @param mather
+ */
+void stringMather(char p, lexMather *mather){
+    if (mather->status == LEXMATHER_START){
+        if ('\"' == p || '\'' == p){
+            mather->status = LEXMATHER_ING;
+            mather->string_type = p;
+        }
+        else{
+            mather->status = LEXMATHER_MISTAKE;
+        }
+    }
+    else if (mather->status == LEXMATHER_ING){
+        if (mather->string_type == p){
+            mather->status = LEXMATHER_PASS;
+        }
+        else{
+            mather->str = memStrcpy(1, true, mather->str, true, p);
+            mather->len ++;
+            mather->status = LEXMATHER_ING;
+        }
+    }
+    else if (mather->status == LEXMATHER_INGSECOND){
+        if ('A'<= p && 'Z' >= p ||'a'<= p && 'z' >= p ||'_' == p ||
+            '0'<= p && '9' >= p){
+            mather->second_str = memStrcpy(1, true, mather->second_str, true, p);
+        }
+        else{
+            mather->status = LEXMATHER_END;
+        }
+    }
+    else if(mather->status == LEXMATHER_PASS){
+        if ('A'<= p && 'Z' >= p ||'a'<= p && 'z' >= p ||'_' == p){
+            mather->second_str = memStrcpy(1, true, mather->second_str, true, p);
+            mather->status = LEXMATHER_INGSECOND;
+        }
+        else{
+            mather->status = LEXMATHER_END;
+        }
+    }
+    else{
+        mather->status = LEXMATHER_MISTAKE;
+    }
+}
+
+/**
+ * 匹配关键词dest_p
+ * @param p
+ * @param mather
+ * @param dest_p
+ */
+void strMather(char p, lexMather *mather, const char *dest_p){
+    if (mather->status == LEXMATHER_START || mather->status == LEXMATHER_ING){
+        if (p == dest_p[mather->len]){
+            mather->str = memStrcpy(1, true, mather->str, true, p);
+            mather->len ++;
+            mather->status = LEXMATHER_ING;
+        }
+        else if(mather->status == LEXMATHER_ING && mather->len == memStrlen((char *)dest_p)){
+            printf("G\n");
+            mather->status = LEXMATHER_END;
+        }
+        else{
+            mather->status = LEXMATHER_MISTAKE;
+        }
+    }
+    else{
+        mather->status = LEXMATHER_MISTAKE;
+    }
+}
+
+/**
+ * 匹配但个字符dest_p
+ * @param p
+ * @param mather
+ * @param dest_p
+ */
+void charMather(char p, lexMather *mather, char dest_p){
+    int tmp_p = (int)p, tmp_dest = (int)dest_p;
+    if (tmp_p == tmp_dest && mather->status == LEXMATHER_START){
+        mather->str = memStrcpy(1, true, mather->str, true, p);
+        mather->len ++;
+        mather->status = LEXMATHER_ING;
+    }
+    else if (mather->status == LEXMATHER_ING){
+        mather->status = LEXMATHER_END;
+    }
+    else{
+        mather->status = LEXMATHER_MISTAKE;
+    }
+}
+
+bool isIn(char p, char *list){
+    int max = memStrlen(list);
+    for (int i=0;i < max;i++){
+        if (p == list[i])
+            return true;
+    }
+    return false;
+}
+
+/**
+ * 开始匹配,返回的int即checkoutMather返回的值(匹配成功的匹配器的索引)
+ * @param file
+ * @param mathers
+ * @return
+ */
+int getMatherStatus(lexFile *file, lexMathers *mathers){
+    setupMathers(mathers);
+    int status = -1;
+    while (status == -1){
+        char p = readChar(file);
+        numberMather(p ,mathers->mathers[MATHER_NUMBER]);
+        stringMather(p ,mathers->mathers[MATHER_STRING]);
+        varMather(p ,mathers->mathers[MATHER_VAR]);
+        charMatherMacro(MATHER_EOF, EOF);
+        charMatherMacro(MATHER_ENTER, '\n');
+        charMatherMacro(MATHER_SPACE, ' ');
+
+        strMatherMacro(MATHER_IF, "if");  // 条件判断
+        strMatherMacro(MATHER_ELIF, "elif");  // 条件循环
+        strMatherMacro(MATHER_WHILE, "while");  // 条件循环
+        strMatherMacro(MATHER_FOR, "for");  // 遍历
+        strMatherMacro(MATHER_IN, "in");  // 定义类
+        strMatherMacro(MATHER_TRY, "try");  // 定义函数
+        strMatherMacro(MATHER_EXCEPT, "except");  // 定义表达式(匿名函数)
+        strMatherMacro(MATHER_AS, "as");  // 异常捕获
+        strMatherMacro(MATHER_WITH, "with");  // 异常捕获
+        strMatherMacro(MATHER_DO, "do");  // 捕获
+        strMatherMacro(MATHER_ELSE, "else");  // 捕获
+        strMatherMacro(MATHER_FINALLY, "finally");  // 条件分支
+        strMatherMacro(MATHER_DEFAULT, "default");  // 条件-否则
+        strMatherMacro(MATHER_GLOBAL, "global");  // 结束分支
+        strMatherMacro(MATHER_NONLOCAL, "nonlocal");  // 结束分支
+
+        strMatherMacro(MATHER_PUBLIC, "public");  // 结束分支
+        strMatherMacro(MATHER_PROTECT, "protect");  // break跳出分支(循环、条件等)
+        strMatherMacro(MATHER_PRIVATE, "private");
+
+        strMatherMacro(MATHER_TRUE, "true");
+        strMatherMacro(MATHER_FALSE, "false");
+        strMatherMacro(MATHER_NULL, "null");
+
+        strMatherMacro(MATHER_DEF, "def");
+        strMatherMacro(MATHER_CLASS, "class");
+        strMatherMacro(MATHER_BLOCK, "block");
+        strMatherMacro(MATHER_BREAK, "break");
+        strMatherMacro(MATHER_CONTINUE, "continue");
+        strMatherMacro(MATHER_REGO, "rego");
+        strMatherMacro(MATHER_RESTART, "restart");
+        strMatherMacro(MATHER_RETURN, "return");
+        strMatherMacro(MATHER_YIELD, "yield");
+        strMatherMacro(MATHER_IMPORT, "import");
+        strMatherMacro(MATHER_INCLUDE, "include");
+
+        charMatherMacro(MATHER_ADD, '+');
+        charMatherMacro(MATHER_SUB, '-');
+        charMatherMacro(MATHER_MUL, '*');
+        charMatherMacro(MATHER_DIV, '/');
+        strMatherMacro(MATHER_INTDIV, "//");
+        charMatherMacro(MATHER_PER, '%');
+        strMatherMacro(MATHER_POW, "**");
+
+        strMatherMacro(MATHER_EQ, "==");
+        strMatherMacro(MATHER_MOREEQ, ">=");
+        strMatherMacro(MATHER_LESSEQ, "<=");
+        charMatherMacro(MATHER_MORE, '>');
+        charMatherMacro(MATHER_LESS, '<');
+        strMatherMacro(MATHER_NOTEQ, "!=");
+
+        charMatherMacro(MATHER_BITAND, '&');
+        charMatherMacro(MATHER_BITOR, '|');
+        charMatherMacro(MATHER_BITXOR, '^');
+        charMatherMacro(MATHER_BITNOT, '~');
+        strMatherMacro(MATHER_BITLEFT, "<<");
+        strMatherMacro(MATHER_BITRIGHT, ">>");
+
+        strMatherMacro(MATHER_BOOLAND, "&&");
+        strMatherMacro(MATHER_BOOLOR, "||");
+        charMatherMacro(MATHER_BOOLNOT, '!');
+
+        charMatherMacro(MATHER_ASSIGNMENT, '=');
+        charMatherMacro(MATHER_POINT, '.');
+        charMatherMacro(MATHER_AT, '@');
+        charMatherMacro(MATHER_SVAR, '$');
+
+        charMatherMacro(MATHER_LP, '(');
+        charMatherMacro(MATHER_RP, ')');
+        charMatherMacro(MATHER_LB, '[');
+        charMatherMacro(MATHER_RB, ']');
+        charMatherMacro(MATHER_LC, '{');
+        charMatherMacro(MATHER_RC, '}');
+
+        charMatherMacro(MATHER_COMMA, ',');
+        charMatherMacro(MATHER_COLON, ':');
+        charMatherMacro(MATHER_SEMICOLON, ';');
+
+        strMatherMacro(MATHER_Link, "->");
+
+        status = checkoutMather(mathers, MATHER_MAX);
+    }
+    backChar(file);
+    return status;
+}
+
+/**
+ * getMatherStatus的高级封装,若匹配到空格则自动忽略(再次匹配)
+ * @param file
+ * @param mathers
+ * @return
+ */
+token *getToken(lexFile *file, lexMathers *mathers){
+    int status = MATHER_SPACE;
+    while (status == MATHER_SPACE){
+        status = getMatherStatus(file, mathers);
+    }
+    if (status == -2){
+        status = MATHER_EOF;
+        printf("lexical ERROR\n");
+    }
+    return makeLexToken(status, mathers->mathers[status]->str, mathers->mathers[status]->second_str);
+}

+ 151 - 0
parser/token.c

@@ -0,0 +1,151 @@
+#include "token.h"
+
+token *makeToken(){
+    token *tmp = memCalloc(1, sizeof(token));
+    tmp->token_type = 0;
+    tmp->data.str = NULL;
+    tmp->data.st = NULL;
+    tmp->data.second_str = NULL;
+    return tmp;
+}
+
+token *makeLexToken(int type, char *str, char *second_str) {
+    struct token *tmp = makeToken();
+    tmp->token_type = type;
+    tmp->data.str = memStrcpy(0, false, str, false);
+    tmp->data.second_str = memStrcpy(0, false, second_str, false);
+    return tmp;
+}
+
+token *makeStatementToken(int type, struct statement *st){
+    struct token *tmp = makeToken();
+    tmp->token_type = type;
+    tmp->data.st = st;
+    return tmp;
+}
+
+void freeToken(token *tk, bool self){
+    memFree(tk->data.str);
+    memFree(tk->data.second_str);
+    if (self){
+        memFree(tk);
+    }
+}
+
+tokenStream *makeTokenStream(){
+    tokenStream *tmp = memCalloc(1, sizeof(tokenStream));
+    tmp->size = 0;
+    tmp->ahead = 0;
+    tmp->token_list = NULL;
+    tmp->token_ahead = NULL;
+    return tmp;
+}
+
+void freeToekStream(tokenStream *ts, bool self){
+    for (int i=0; i < ts->size; i++){
+        freeToken(ts->token_list[i], true);
+    }
+    for (int i=0; i < ts->ahead; i++){
+        freeToken(ts->token_ahead[i], true);
+    }
+    memFree(ts->token_list);
+    memFree(ts->token_ahead);
+    if (self){
+        memFree(ts);
+    }
+}
+
+tokenMessage *makeTokenMessage(char *file_dir){
+    tokenMessage *tm = memCalloc(1, sizeof(tokenMessage));
+    tm->file = makeLexFile(file_dir);
+    tm->mathers = makeMathers(MATHER_MAX);
+    tm->ts = makeTokenStream();
+    return tm;
+}
+
+void freeTokenMessage(tokenMessage *tm, bool self){
+    freeLexFile(tm->file, true);
+    freeToekStream(tm->ts, true);
+    freeMathers(tm->mathers, true);
+    if (self){
+        free(tm);
+    }
+}
+
+void addToken(tokenStream *ts, token *new_tk){
+    token **new_list = memCalloc(ts->size + 1, sizeof(token *));
+    for (int i=0; i < ts->size; i++){
+        new_list[i] = ts->token_list[i];
+    }
+    new_list[ts->size] = new_tk;
+    ts->size ++;
+    memFree(ts->token_list);
+    ts->token_list = new_list;
+}
+
+token *popToken(tokenStream *ts){
+    token **new_list = memCalloc(ts->size - 1, sizeof(token *));
+    for (int i=0; i < ts->size - 1; i++){
+        new_list[i] = ts->token_list[i];
+    }
+    token *tmp = ts->token_list[ts->size - 1];
+    memFree(ts->token_list);
+    ts->token_list = new_list;
+    ts->size --;
+    return tmp;
+}
+
+token *backToken(tokenStream *ts){
+    token **new_list = memCalloc(ts->size - 1, sizeof(token *));
+    token **new_ahead = memCalloc(ts->ahead + 1, sizeof(token *));
+    for (int i=0; i < ts->size - 1; i++){
+        new_list[i] = ts->token_list[i];
+    }
+    for (int i=0; i < ts->ahead; i++){
+        new_ahead[i] = ts->token_ahead[i];
+    }
+    new_ahead[ts->ahead] = ts->token_list[ts->size - 1];
+    memFree(ts->token_list);
+    memFree(ts->token_ahead);
+    ts->token_ahead = new_ahead;
+    ts->token_list = new_list;
+    ts->size --;
+    ts->ahead ++;
+    return new_ahead[ts->ahead - 1];
+}
+
+token *forwardToken(tokenStream *ts){
+    token **new_list = memCalloc(ts->size + 1, sizeof(token *));
+    token **new_ahead = memCalloc(ts->ahead - 1, sizeof(token *));
+    for (int i=0; i < ts->size; i++){
+        new_list[i] = ts->token_list[i];
+    }
+    for (int i=0; i < ts->ahead - 1; i++){
+        new_ahead[i] = ts->token_ahead[i];
+    }
+    new_list[ts->size] = ts->token_ahead[ts->ahead - 1];
+    memFree(ts->token_list);
+    memFree(ts->token_ahead);
+    ts->token_ahead = new_ahead;
+    ts->token_list = new_list;
+    ts->size ++;
+    ts->ahead --;
+    return new_list[ts->size - 1];
+}
+
+/**
+ * 获取token
+ * @param tm
+ * @return 返回获取token的token_type
+ */
+int safeGetToken(tokenMessage *tm){
+    token *tmp;
+    if (tm->ts->ahead == 0){
+        tmp = getToken(tm->file, tm->mathers);
+        addToken(tm->ts, tmp);
+    }
+    else{
+        tmp = forwardToken(tm->ts);
+    }
+    return tmp->token_type;
+}