lexical.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. /*
  2. * 文件名: lexical
  3. * 目标: aFunlang词法分析
  4. */
  5. #include <ctype.h>
  6. #include "aFunCore.hpp"
  7. #include "__parser.hpp"
  8. #include "parserl_warning_error.h"
  9. #ifndef isascii
  10. #define isascii (((c) & ~0x7f) == 0)
  11. #endif
  12. #define isignore(ch) (isascii(ch) && (iscntrl(ch) || isspace(ch) || (ch) == ',')) /* 被忽略的符号 */
  13. #define iselement(ch) (!isascii(ch) || isgraph(ch)) /* 可以作为element的符号 */
  14. #define DEL_TOKEN (0)
  15. #define FINISH_TOKEN (-1)
  16. #define CONTINUE_TOKEN (1)
  17. #define ERROR_TOKEN (-2)
  18. #define printLexicalError(info, parser) do { \
  19. writeErrorLog(aFunCoreLogger, "[Lexical] %s:%d %s", (parser)->reader->file, (parser)->reader->line, (info ## Log)); \
  20. printf_stderr(0, "[%s] %s:%d : %s\n", HT_aFunGetText(lexical_n, "Lexical"), (parser)->reader->file, \
  21. (parser)->reader->line, info ## Console); \
  22. (parser)->is_error = true; /* 错误标记在Parser而非Lexical中, Lexical的异常表示lexical停止运行 */ \
  23. } while(0)
  24. #define printLexicalWarning(info, parser) do { \
  25. writeWarningLog(aFunCoreLogger, "[Lexical] %s:%d %s", (parser)->reader->file, (parser)->reader->line, (info ## Log)); \
  26. printf_stderr(0, "[%s] %s:%d : %s\n", HT_aFunGetText(lexical_n, "Lexical"), (parser)->reader->file, \
  27. (parser)->reader->line, info ## Console); \
  28. } while(0)
  29. static void setLexicalLast(af_LexicalStatus status, af_TokenType token, af_Parser *parser) {
  30. parser->lexical->status = status;
  31. parser->lexical->last = parser->reader->read - parser->reader->buf;
  32. parser->lexical->token = token;
  33. }
  34. /*
  35. * 函数族: done系列 (doneXXX)
  36. * 目标: 用于把转台xxx转换为其他状态
  37. * 返回值: 1 正常
  38. * 返回值: 0 遇到错误, 仍可继续
  39. * 返回值: -1 正常, 不可继续 -> 必须设置 setLexicalLast
  40. * 返回值: -2 遇到错误, 不可继续
  41. * 注意: 函数使用前不在检查`status`是否正确
  42. */
  43. /*
  44. * 状态机图:
  45. * [lex_begin]
  46. * -> NUL -> (lex_nul)
  47. * -> ALL_PREFIX -> [lex_prefix] # return FINISH_TOKEN
  48. * -> ! -> (lex_prefix_block_p)
  49. * -> @ -> (lex_prefix_block_b)
  50. * -> # -> (lex_prefix_block_c)
  51. * -> ( -> [lex_lp] # return FINISH_TOKEN
  52. * -> [ -> [lex_lb] # return FINISH_TOKEN
  53. * -> { -> [lex_lc] # return FINISH_TOKEN
  54. * -> ) -> [lex_rp] # return FINISH_TOKEN
  55. * -> ] -> [lex_rb] # return FINISH_TOKEN
  56. * -> } -> [lex_rc] # return FINISH_TOKEN
  57. * -> ; -> (lex_comment_before)
  58. * -> isignore(ch) -> [lex_space]
  59. * -> | -> (lex_element_long)
  60. * -> iselement(ch) -> [lex_element]
  61. */
  62. static int doneBegin(char ch, af_Parser *parser) {
  63. if (ch == NUL) {
  64. setLexicalLast(lex_nul, TK_EOF, parser);
  65. return FINISH_TOKEN;
  66. } else if (strchr(ALL_PREFIX, ch)) { /* 属于前缀 */
  67. setLexicalLast(lex_prefix, TK_PREFIX, parser);
  68. return FINISH_TOKEN;
  69. } else if (strchr("!@#", ch)) {
  70. switch (ch) {
  71. case '!':
  72. parser->lexical->status = lex_prefix_block_p;
  73. return 1;
  74. case '@':
  75. parser->lexical->status = lex_prefix_block_b;
  76. return 1;
  77. case '#':
  78. parser->lexical->status = lex_prefix_block_c;
  79. return 1;
  80. default:
  81. writeFatalErrorLog(aFunCoreLogger, EXIT_FAILURE, "Switch illegal characters");
  82. return ERROR_TOKEN;
  83. }
  84. } else if (strchr("([{)]}", ch)) { /* 括号 */
  85. switch (ch) {
  86. case '(':
  87. setLexicalLast(lex_lp, TK_LP, parser);
  88. return FINISH_TOKEN;
  89. case '[':
  90. setLexicalLast(lex_lb, TK_LB, parser);
  91. return FINISH_TOKEN;
  92. case '{':
  93. setLexicalLast(lex_lc, TK_LC, parser);
  94. return FINISH_TOKEN;
  95. case ')':
  96. setLexicalLast(lex_rp, TK_RP, parser);
  97. return FINISH_TOKEN;
  98. case ']':
  99. setLexicalLast(lex_rb, TK_RB, parser);
  100. return FINISH_TOKEN;
  101. case '}':
  102. setLexicalLast(lex_rc, TK_RC, parser);
  103. return FINISH_TOKEN;
  104. default:
  105. writeFatalErrorLog(aFunCoreLogger, EXIT_FAILURE, "Switch illegal characters");
  106. return ERROR_TOKEN;
  107. }
  108. } else if (ch == ';') {
  109. parser->lexical->status = lex_comment_before;
  110. return 1;
  111. } else if (isignore(ch)) { // 空白符或控制字符被忽略
  112. setLexicalLast(lex_space, TK_SPACE, parser);
  113. return 1;
  114. } else if (ch == '|') {
  115. parser->lexical->status = lex_element_long;
  116. return 1;
  117. } else if (iselement(ch)) { // 除空格外的可见字符
  118. setLexicalLast(lex_element_short, TK_ELEMENT_SHORT, parser);
  119. return 1;
  120. }
  121. printLexicalError(IllegalChar, parser);
  122. return DEL_TOKEN;
  123. }
  124. /*
  125. * 状态机图:
  126. * [lex_prefix_block_p] -> ( -> [lex_lp] # return FINISH_TOKEN
  127. * [lex_prefix_block_b] -> ( -> [lex_lb] # return FINISH_TOKEN
  128. * [lex_prefix_block_c] -> ( -> [lex_lc] # return FINISH_TOKEN
  129. * [lex_prefix_block_p] -> ) -> [lex_rp] # return FINISH_TOKEN
  130. * [lex_prefix_block_b] -> ) -> [lex_rb] # return FINISH_TOKEN
  131. * [lex_prefix_block_c] -> ) -> [lex_rc] # return FINISH_TOKEN
  132. */
  133. static int donePrefixBlock(char ch, af_Parser *parser) {
  134. if (ch == '(') {
  135. switch (parser->lexical->status) {
  136. case lex_prefix_block_p:
  137. setLexicalLast(lex_lp, TK_LP, parser);
  138. return FINISH_TOKEN;
  139. case lex_prefix_block_b:
  140. setLexicalLast(lex_lb, TK_LB, parser);
  141. return FINISH_TOKEN;
  142. case lex_prefix_block_c:
  143. setLexicalLast(lex_lc, TK_LC, parser);
  144. return FINISH_TOKEN;
  145. default:
  146. writeFatalErrorLog(aFunCoreLogger, EXIT_FAILURE, "Switch illegal characters");
  147. return ERROR_TOKEN;
  148. }
  149. } else if (ch == ')') {
  150. switch (parser->lexical->status) {
  151. case lex_prefix_block_p:
  152. setLexicalLast(lex_rp, TK_RP, parser);
  153. return FINISH_TOKEN;
  154. case lex_prefix_block_b:
  155. setLexicalLast(lex_rb, TK_RB, parser);
  156. return FINISH_TOKEN;
  157. case lex_prefix_block_c:
  158. setLexicalLast(lex_rc, TK_RC, parser);
  159. return FINISH_TOKEN;
  160. default:
  161. writeFatalErrorLog(aFunCoreLogger, EXIT_FAILURE, "Switch illegal characters");
  162. return ERROR_TOKEN;
  163. }
  164. }
  165. printLexicalError(IllegalChar, parser);
  166. return DEL_TOKEN;
  167. }
  168. /*
  169. * 状态机图:
  170. * [lex_comment_before]
  171. * -> '\n' || NUL -> [lex_uni_comment_end] # return FINISH_TOKEN
  172. * -> ; -> (lex_mutli_comment) # mutli_comment = 0
  173. * -> other -> (lex_uni_comment)
  174. */
  175. static int doneCommentBefore(char ch, af_Parser *parser) {
  176. if (ch == '\n' || ch == NUL) {
  177. setLexicalLast(lex_uni_comment_end, TK_COMMENT, parser);
  178. return FINISH_TOKEN;
  179. } else if (ch == ';') { // 多行注释
  180. parser->lexical->status = lex_mutli_comment;
  181. parser->lexical->mutli_comment = 0;
  182. return 1;
  183. }
  184. parser->lexical->status = lex_uni_comment;
  185. return 1;
  186. }
  187. /*
  188. * 状态机图:
  189. * [lex_uni_comment]
  190. * -> '\n' || NUL -> [lex_uni_comment_end] # return FINISH_TOKEN
  191. * -> other -> (lex_uni_comment)
  192. */
  193. static int doneUniComment(char ch, af_Parser *parser) {
  194. if (ch == '\n' || ch == NUL) {
  195. setLexicalLast(lex_uni_comment_end, TK_COMMENT, parser);
  196. return FINISH_TOKEN;
  197. }
  198. parser->lexical->status = lex_uni_comment;
  199. return 1;
  200. }
  201. /*
  202. * 状态机图:
  203. * [lex_mutli_comment]
  204. * -> NUL -> [lex_mutli_comment_end] # return FINISH_TOKEN; [warning]
  205. * -> ; -> (lex_mutli_comment_end_before)
  206. * -> other -> (lex_mutli_comment)
  207. */
  208. static int doneMutliComment(char ch, af_Parser *parser) {
  209. if (ch == NUL) {
  210. parser->lexical->status = lex_mutli_comment_end;
  211. printLexicalWarning(IncompleteFile, parser);
  212. return FINISH_TOKEN;
  213. } else if (ch == ';')
  214. parser->lexical->status = lex_mutli_comment_end_before;
  215. else
  216. parser->lexical->status = lex_mutli_comment;
  217. return 1;
  218. }
  219. /*
  220. * 状态机图:
  221. * [lex_mutli_comment_end_before]
  222. * -> NUL -> [lex_mutli_comment_end] # return FINISH_TOKEN; [warning]
  223. * -> ; -> (lex_mutli_comment) # mutli_comment++;
  224. * -> = ->
  225. * mutli_comment == 0 -> [lex_mutli_comment_end] # return FINISH_TOKEN
  226. * else -> (lex_mutli_comment)# mutli_comment--;
  227. */
  228. static int doneMutliCommentBeforeEnd(char ch, af_Parser *parser) {
  229. if (ch == NUL) {
  230. printLexicalWarning(IncompleteFile, parser);
  231. setLexicalLast(lex_mutli_comment_end, TK_COMMENT, parser);
  232. return FINISH_TOKEN;
  233. } else if (ch == ';') {
  234. /* 嵌套注释 */
  235. parser->lexical->mutli_comment++;
  236. parser->lexical->status = lex_mutli_comment;
  237. } else if (ch == '=') {
  238. if (parser->lexical->mutli_comment == 0) {
  239. /* 注释结束 */
  240. setLexicalLast(lex_mutli_comment_end, TK_COMMENT, parser);
  241. return FINISH_TOKEN;
  242. } else {
  243. /* 嵌套注释 */
  244. parser->lexical->mutli_comment--;
  245. parser->lexical->status = lex_mutli_comment;
  246. }
  247. }
  248. parser->lexical->status = lex_mutli_comment;
  249. return 1;
  250. }
  251. /*
  252. * 状态机图:
  253. * [lex_element_long]
  254. * -> NUL -> error
  255. * -> | -> [lex_element_long_end]
  256. * -> other -> (lex_element_long)
  257. */
  258. static int doneElementLong(char ch, af_Parser *parser) {
  259. if (ch == '|') { // 结束符
  260. setLexicalLast(lex_element_long_end, TK_ELEMENT_LONG, parser);
  261. return 1;
  262. } else if (ch == NUL) {
  263. printLexicalError(IncompleteFile, parser);
  264. return ERROR_TOKEN;
  265. }
  266. parser->lexical->status = lex_element_long;
  267. return 1;
  268. }
  269. /*
  270. * 状态机图:
  271. * [lex_element_long]
  272. * -> | -> (lex_element_long)
  273. * -> other -> [lex_element_long_end] # return FINISH_TOKEN
  274. */
  275. static int doneElementLongEnd(char ch, af_Parser *parser) {
  276. if (ch == '|') { // ||表示非结束
  277. parser->lexical->status = lex_element_long;
  278. return 1;
  279. }
  280. parser->lexical->status = lex_element_long_end;
  281. return FINISH_TOKEN;
  282. }
  283. /*
  284. * 状态机图:
  285. * [lex_element_short]
  286. * -> !strchr("!@#([{}]);,", ch) && iselement(ch) -> (lex_element_short)
  287. * -> other -> (lex_element_short) # return FINISH_TOKEN
  288. */
  289. static int doneElementShort(char ch, af_Parser *parser) {
  290. if (!strchr("!@#([{}]);,", ch) && iselement(ch)) { // 除空格外的可见字符 (不包括NUL)
  291. setLexicalLast(lex_element_short, TK_ELEMENT_SHORT, parser);
  292. return 1;
  293. }
  294. parser->lexical->status = lex_element_short;
  295. return FINISH_TOKEN;
  296. }
  297. /*
  298. * 状态机图:
  299. * [lex_space]
  300. * -> ch != NUL && isignore(ch) -> (lex_space)
  301. * -> other -> (lex_space) # return FINISH_TOKEN
  302. */
  303. static int doneSpace(char ch, af_Parser *parser) {
  304. if (ch != NUL && isignore(ch)) {
  305. setLexicalLast(lex_space, TK_SPACE, parser);
  306. return 1;
  307. }
  308. parser->lexical->status = lex_space;
  309. return FINISH_TOKEN;
  310. }
  311. /*
  312. * 函数名: getTokenFromLexical
  313. * 目标: 获取Lexical的TokenType以及相关值
  314. */
  315. af_TokenType getTokenFromLexical(char **text, af_Parser *parser) {
  316. af_TokenType tt;
  317. int re;
  318. parser->lexical->status = lex_begin;
  319. parser->lexical->last = 0;
  320. if (parser->lexical->is_end) {
  321. *text = nullptr;
  322. return TK_EOF;
  323. } else if (parser->lexical->is_error || parser->reader->read_error) { /* lexical和reader出现异常后不再执行 */
  324. *text = nullptr;
  325. return TK_ERROR;
  326. }
  327. while (true) {
  328. char ch = getChar(parser->reader);
  329. if (parser->reader->read_error) {
  330. *text = nullptr;
  331. return TK_ERROR;
  332. }
  333. if (isascii(ch) && iscntrl(ch) && !isspace(ch) && ch != NUL) // ascii 控制字符
  334. printLexicalWarning(IncludeControlChar, parser);
  335. switch (parser->lexical->status) {
  336. case lex_begin:
  337. re = doneBegin(ch, parser);
  338. break;
  339. case lex_prefix_block_p:
  340. case lex_prefix_block_b:
  341. case lex_prefix_block_c:
  342. re = donePrefixBlock(ch, parser);
  343. break;
  344. case lex_comment_before:
  345. re = doneCommentBefore(ch, parser);
  346. break;
  347. case lex_element_long:
  348. re = doneElementLong(ch, parser);
  349. break;
  350. case lex_mutli_comment:
  351. re = doneMutliComment(ch, parser);
  352. break;
  353. case lex_uni_comment:
  354. re = doneUniComment(ch, parser);
  355. break;
  356. case lex_mutli_comment_end_before:
  357. re = doneMutliCommentBeforeEnd(ch, parser);
  358. break;
  359. case lex_space:
  360. re = doneSpace(ch, parser);
  361. break;
  362. case lex_element_short:
  363. re = doneElementShort(ch, parser);
  364. break;
  365. case lex_element_long_end:
  366. re = doneElementLongEnd(ch, parser);
  367. break;
  368. default:
  369. writeFatalErrorLog(aFunCoreLogger, EXIT_FAILURE, "Switch illegal characters");
  370. re = ERROR_TOKEN;
  371. break;
  372. }
  373. if (re == ERROR_TOKEN) {
  374. tt = TK_ERROR;
  375. *text = nullptr;
  376. break;
  377. } else if (re == DEL_TOKEN) { // 删除该token, 继续执行
  378. char *word = readWord(parser->lexical->last, parser->reader);
  379. free(word);
  380. parser->lexical->status = lex_begin;
  381. parser->lexical->last = 0;
  382. continue;
  383. } else if (re == FINISH_TOKEN) {
  384. char *word = readWord(parser->lexical->last, parser->reader);
  385. if (word == nullptr) {
  386. tt = TK_ERROR;
  387. *text = nullptr;
  388. break;
  389. }
  390. tt = parser->lexical->token;
  391. if (tt == TK_ELEMENT_SHORT || tt == TK_PREFIX)
  392. *text = word;
  393. else if (tt == TK_ELEMENT_LONG) {
  394. char *new_str = NEW_STR(STR_LEN(word) - 2); // 去除收尾|
  395. bool flat = false;
  396. char *p = word + 1;
  397. size_t count = 0;
  398. for(NULL; *p != NUL; p++) {
  399. if (*p == '|' && !flat) { // 跳过第一个 `|`, 如果是末尾|则自然跳过, 若不是则在遇到第二个`|`时写入数据
  400. flat = true; /* count不需要递增 */
  401. continue;
  402. } else if (*p != '|' && flat) // 遇到错误
  403. break;
  404. else
  405. flat = false;
  406. new_str[count] = *p;
  407. count++;
  408. }
  409. *text = strCopy(new_str);
  410. free(word);
  411. free(new_str);
  412. } else
  413. free(word);
  414. if (tt == TK_SPACE || tt == TK_COMMENT) {
  415. parser->lexical->status = lex_begin;
  416. parser->lexical->last = 0;
  417. continue;
  418. } else if (tt == TK_EOF)
  419. parser->lexical->is_end = true;
  420. break;
  421. }
  422. }
  423. return tt;
  424. }