lexical.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. /*
  2. * 文件名: lexical
  3. * 目标: aFunlang词法分析
  4. */
  5. #include <ctype.h>
  6. #include "aFunCore.h"
  7. #include "__parser.h"
  8. #include "parserl_warning_error.h"
  9. static void printLexicalError(char *info, af_Parser *parser) {
  10. writeErrorLog(aFunCoreLogger, "[Lexical] %s", info);
  11. parser->is_error = true;
  12. }
  13. static void printLexicalWarning(char *info, af_Parser *parser) {
  14. writeWarningLog(aFunCoreLogger, "[Lexical] %s", info);
  15. }
  16. static void setLexicalLast(af_LexicalStatus status, af_TokenType token, af_Parser *parser) {
  17. parser->lexical->status = status;
  18. parser->lexical->last = parser->reader->read - parser->reader->buf;
  19. parser->lexical->token = token;
  20. }
  21. /*
  22. * 函数族: done系列 (doneXXX)
  23. * 目标: 用于把转台xxx转换为其他状态
  24. * 返回值: 1 正常
  25. * 返回值: 0 遇到错误, 仍可继续
  26. * 返回值: -1 正常, 不可继续 -> 必须设置 setLexicalLast
  27. * 返回值: -2 遇到错误, 不可继续
  28. * 注意: 函数使用前不在检查`status`是否正确
  29. */
  30. /*
  31. * 状态机图:
  32. * [lex_begin]
  33. * -> NUL -> (lex_nul)
  34. * -> ALL_PREFIX -> [lex_prefix] # return -1
  35. * -> ! -> (lex_prefix_block_p)
  36. * -> @ -> (lex_prefix_block_b)
  37. * -> # -> (lex_prefix_block_c)
  38. * -> ( -> [lex_lp] # return -1
  39. * -> [ -> [lex_lb] # return -1
  40. * -> { -> [lex_lc] # return -1
  41. * -> ) -> [lex_rp] # return -1
  42. * -> ] -> [lex_rb] # return -1
  43. * -> } -> [lex_rc] # return -1
  44. * -> ; -> (lex_comment_before)
  45. * -> iscntrl(ch) || isspace(ch) || , -> [lex_space]
  46. * -> | -> (lex_element_long)
  47. * -> isgraph(ch) -> [lex_element]
  48. */
  49. static int doneBegin(char ch, af_Parser *parser) {
  50. if (ch == NUL) {
  51. setLexicalLast(lex_nul, TK_EOF, parser);
  52. return -1;
  53. } else if (strchr(ALL_PREFIX, ch)) { /* 属于前缀 */
  54. setLexicalLast(lex_prefix, TK_PREFIX, parser);
  55. return -1;
  56. } else if (strchr("!@#", ch)) {
  57. switch (ch) {
  58. case '!':
  59. parser->lexical->status = lex_prefix_block_p;
  60. return 1;
  61. case '@':
  62. parser->lexical->status = lex_prefix_block_b;
  63. return 1;
  64. case '#':
  65. parser->lexical->status = lex_prefix_block_c;
  66. return 1;
  67. default:
  68. printLexicalError(SYS_ILLEGAL_CHAR(lex_beging), parser);
  69. return -2;
  70. }
  71. } else if (strchr("([{)]}", ch)) { /* 括号 */
  72. switch (ch) {
  73. case '(':
  74. setLexicalLast(lex_lp, TK_LP, parser);
  75. return -1;
  76. case '[':
  77. setLexicalLast(lex_lb, TK_LB, parser);
  78. return -1;
  79. case '{':
  80. setLexicalLast(lex_lc, TK_LC, parser);
  81. return -1;
  82. case ')':
  83. setLexicalLast(lex_rp, TK_RP, parser);
  84. return -1;
  85. case ']':
  86. setLexicalLast(lex_rb, TK_RB, parser);
  87. return -1;
  88. case '}':
  89. setLexicalLast(lex_rc, TK_RC, parser);
  90. return -1;
  91. default:
  92. printLexicalError(SYS_ILLEGAL_CHAR(lex_beging), parser);
  93. return -2;
  94. }
  95. } else if (ch == ';') {
  96. parser->lexical->status = lex_comment_before;
  97. return 1;
  98. } else if (iscntrl(ch) || isspace(ch) || ch == ',') { // 空白符或控制字符被忽略
  99. setLexicalLast(lex_space, TK_SPACE, parser);
  100. return 1;
  101. } else if (ch == '|') {
  102. parser->lexical->status = lex_element_long;
  103. return 1;
  104. } else if (isgraph(ch)) { // 除空格外的可见字符
  105. setLexicalLast(lex_element_short, TK_ELEMENT_SHORT, parser);
  106. return 1;
  107. }
  108. printLexicalError(ILLEGAL_CHAR(lex_beging), parser);
  109. return 0;
  110. }
  111. /*
  112. * 状态机图:
  113. * [lex_prefix_block_p] -> ( -> [lex_lp] # return -1
  114. * [lex_prefix_block_b] -> ( -> [lex_lb] # return -1
  115. * [lex_prefix_block_c] -> ( -> [lex_lc] # return -1
  116. * [lex_prefix_block_p] -> ) -> [lex_rp] # return -1
  117. * [lex_prefix_block_b] -> ) -> [lex_rb] # return -1
  118. * [lex_prefix_block_c] -> ) -> [lex_rc] # return -1
  119. */
  120. static int donePrefixBlock(char ch, af_Parser *parser) {
  121. if (ch == '(') {
  122. switch (parser->lexical->status) {
  123. case lex_prefix_block_p:
  124. setLexicalLast(lex_lp, TK_LP, parser);
  125. return -1;
  126. case lex_prefix_block_b:
  127. setLexicalLast(lex_lb, TK_LB, parser);
  128. return -1;
  129. case lex_prefix_block_c:
  130. setLexicalLast(lex_lc, TK_LC, parser);
  131. return -1;
  132. default:
  133. printLexicalError(SYS_ERROR_STATUS(lex_prefix_block), parser);
  134. return -2;
  135. }
  136. } else if (ch == ')') {
  137. switch (parser->lexical->status) {
  138. case lex_prefix_block_p:
  139. setLexicalLast(lex_rp, TK_RP, parser);
  140. return -1;
  141. case lex_prefix_block_b:
  142. setLexicalLast(lex_rb, TK_RB, parser);
  143. return -1;
  144. case lex_prefix_block_c:
  145. setLexicalLast(lex_rc, TK_RC, parser);
  146. return -1;
  147. default:
  148. printLexicalError(SYS_ERROR_STATUS(lex_prefix_block), parser);
  149. return -2;
  150. }
  151. }
  152. printLexicalError(ILLEGAL_CHAR(lex_prefix_block), parser);
  153. return 0;
  154. }
  155. /*
  156. * 状态机图:
  157. * [lex_comment_before]
  158. * -> '\n' || NUL -> [lex_uni_comment_end] # return -1
  159. * -> ; -> (lex_mutli_comment) # mutli_comment = 0
  160. * -> other -> (lex_uni_comment)
  161. */
  162. static int doneCommentBefore(char ch, af_Parser *parser) {
  163. if (ch == '\n' || ch == NUL) {
  164. setLexicalLast(lex_uni_comment_end, TK_COMMENT, parser);
  165. return -1;
  166. } else if (ch == ';') { // 多行注释
  167. parser->lexical->status = lex_mutli_comment;
  168. parser->lexical->mutli_comment = 0;
  169. return 1;
  170. }
  171. parser->lexical->status = lex_uni_comment;
  172. return 1;
  173. }
  174. /*
  175. * 状态机图:
  176. * [lex_uni_comment]
  177. * -> '\n' || NUL -> [lex_uni_comment_end] # return -1
  178. * -> other -> (lex_uni_comment)
  179. */
  180. static int doneUniComment(char ch, af_Parser *parser) {
  181. if (ch == '\n' || ch == NUL) {
  182. setLexicalLast(lex_uni_comment_end, TK_COMMENT, parser);
  183. return -1;
  184. }
  185. parser->lexical->status = lex_uni_comment;
  186. return 1;
  187. }
  188. /*
  189. * 状态机图:
  190. * [lex_mutli_comment]
  191. * -> NUL -> [lex_mutli_comment_end] # return -1; [warning]
  192. * -> ; -> (lex_mutli_comment_end_before)
  193. * -> other -> (lex_mutli_comment)
  194. */
  195. static int doneMutliComment(char ch, af_Parser *parser) {
  196. if (ch == NUL) {
  197. parser->lexical->status = lex_mutli_comment_end;
  198. printLexicalWarning(INCOMPLETE_FILE(lex_mutli_comment), parser);
  199. return -1;
  200. } else if (ch == ';')
  201. parser->lexical->status = lex_mutli_comment_end_before;
  202. else
  203. parser->lexical->status = lex_mutli_comment;
  204. return 1;
  205. }
  206. /*
  207. * 状态机图:
  208. * [lex_mutli_comment_end_before]
  209. * -> NUL -> [lex_mutli_comment_end] # return -1; [warning]
  210. * -> ; -> (lex_mutli_comment) # mutli_comment++;
  211. * -> = ->
  212. * mutli_comment == 0 -> [lex_mutli_comment_end] # return -1
  213. * else -> (lex_mutli_comment)# mutli_comment--;
  214. */
  215. static int doneMutliCommentBeforeEnd(char ch, af_Parser *parser) {
  216. if (ch == NUL) {
  217. printLexicalWarning(INCOMPLETE_FILE(lex_mutli_comment_end_before), parser);
  218. setLexicalLast(lex_mutli_comment_end, TK_COMMENT, parser);
  219. return -1;
  220. } else if (ch == ';') {
  221. /* 嵌套注释 */
  222. parser->lexical->mutli_comment++;
  223. parser->lexical->status = lex_mutli_comment;
  224. } else if (ch == '=') {
  225. if (parser->lexical->mutli_comment == 0) {
  226. /* 注释结束 */
  227. setLexicalLast(lex_mutli_comment_end, TK_COMMENT, parser);
  228. return -1;
  229. } else {
  230. /* 嵌套注释 */
  231. parser->lexical->mutli_comment--;
  232. parser->lexical->status = lex_mutli_comment;
  233. }
  234. }
  235. parser->lexical->status = lex_mutli_comment;
  236. return 1;
  237. }
  238. /*
  239. * 状态机图:
  240. * [lex_element_long]
  241. * -> NUL -> error
  242. * -> | -> [lex_element_long_end]
  243. * -> other -> (lex_element_long)
  244. */
  245. static int doneElementLong(char ch, af_Parser *parser) {
  246. if (ch == '|') { // 结束符
  247. setLexicalLast(lex_element_long_end, TK_ELEMENT_LONG, parser);
  248. return 1;
  249. } else if (ch == NUL) {
  250. printLexicalError(INCOMPLETE_FILE(lex_element_long), parser);
  251. return -2;
  252. }
  253. parser->lexical->status = lex_element_long;
  254. return 1;
  255. }
  256. /*
  257. * 状态机图:
  258. * [lex_element_long]
  259. * -> | -> (lex_element_long)
  260. * -> other -> [lex_element_long_end] # return -1
  261. */
  262. static int doneElementLongEnd(char ch, af_Parser *parser) {
  263. if (ch == '|') { // ||表示非结束
  264. parser->lexical->status = lex_element_long;
  265. return 1;
  266. }
  267. parser->lexical->status = lex_element_long_end;
  268. return -1;
  269. }
  270. /*
  271. * 状态机图:
  272. * [lex_element_short]
  273. * -> !strchr("!@#([{}]);,", ch) && isgraph(ch) -> (lex_element_short)
  274. * -> other -> (lex_element_short) # return -1
  275. */
  276. static int doneElementShort(char ch, af_Parser *parser) {
  277. if (!strchr("!@#([{}]);,", ch) && isgraph(ch)) { // 除空格外的可见字符 (不包括NUL)
  278. setLexicalLast(lex_element_short, TK_ELEMENT_SHORT, parser);
  279. return 1;
  280. }
  281. parser->lexical->status = lex_element_short;
  282. return -1;
  283. }
  284. /*
  285. * 状态机图:
  286. * [lex_space]
  287. * -> ch != NUL && (iscntrl(ch) || isspace(ch)) || , -> (lex_space)
  288. * -> other -> (lex_space) # return -1
  289. */
  290. static int doneSpace(char ch, af_Parser *parser) {
  291. if (ch != NUL && (iscntrl(ch) || isspace(ch)) || ch == ',') {
  292. setLexicalLast(lex_space, TK_SPACE, parser);
  293. return 1;
  294. }
  295. parser->lexical->status = lex_space;
  296. return -1;
  297. }
  298. /*
  299. * 函数名: getTokenFromLexical
  300. * 目标: 获取Lexical的TokenType以及相关值
  301. */
  302. af_TokenType getTokenFromLexical(char **text, af_Parser *parser) {
  303. af_TokenType tt;
  304. int re;
  305. parser->lexical->status = lex_begin;
  306. parser->lexical->last = 0;
  307. if (parser->lexical->is_end) {
  308. *text = NULL;
  309. return TK_EOF;
  310. } else if (parser->lexical->is_error) {
  311. *text = NULL;
  312. return TK_ERROR;
  313. }
  314. while (1) {
  315. char ch = getChar(parser->reader);
  316. if (iscntrl(ch) && !isspace(ch) && ch != NUL)
  317. printLexicalWarning(INCULDE_CONTROL(base), parser);
  318. switch (parser->lexical->status) {
  319. case lex_begin:
  320. re = doneBegin(ch, parser);
  321. break;
  322. case lex_prefix_block_p:
  323. case lex_prefix_block_b:
  324. case lex_prefix_block_c:
  325. re = donePrefixBlock(ch, parser);
  326. break;
  327. case lex_comment_before:
  328. re = doneCommentBefore(ch, parser);
  329. break;
  330. case lex_element_long:
  331. re = doneElementLong(ch, parser);
  332. break;
  333. case lex_mutli_comment:
  334. re = doneMutliComment(ch, parser);
  335. break;
  336. case lex_uni_comment:
  337. re = doneUniComment(ch, parser);
  338. break;
  339. case lex_mutli_comment_end_before:
  340. re = doneMutliCommentBeforeEnd(ch, parser);
  341. break;
  342. case lex_space:
  343. re = doneSpace(ch, parser);
  344. break;
  345. case lex_element_short:
  346. re = doneElementShort(ch, parser);
  347. break;
  348. case lex_element_long_end:
  349. re = doneElementLongEnd(ch, parser);
  350. break;
  351. default:
  352. printLexicalError(SYS_ERROR_STATUS(base), parser);
  353. re = -3;
  354. break;
  355. }
  356. if (re == -1) {
  357. char *word = readWord(parser->lexical->last, parser->reader);
  358. tt = parser->lexical->token;
  359. if (tt == TK_ELEMENT_SHORT || tt == TK_PREFIX)
  360. *text = word;
  361. else if (tt == TK_ELEMENT_LONG) {
  362. char *new = NEW_STR(STR_LEN(word) - 2); // 去除收尾|
  363. bool flat = false;
  364. char *p = word + 1;
  365. size_t count = 0;
  366. for(NULL; *p != NUL; p++) {
  367. if (*p == '|' && !flat) { // 跳过第一个 `|`, 如果是末尾|则自然跳过, 若不是则在遇到第二个`|`时写入数据
  368. flat = true; /* count不需要递增 */
  369. continue;
  370. } else if (*p != '|' && flat) // 遇到错误
  371. break;
  372. else
  373. flat = false;
  374. new[count] = *p;
  375. count++;
  376. }
  377. *text = strCopy(new);
  378. free(word);
  379. free(new);
  380. } else
  381. free(word);
  382. if (tt == TK_SPACE || tt == TK_COMMENT) {
  383. parser->lexical->status = lex_begin;
  384. parser->lexical->last = 0;
  385. continue;
  386. } else if (tt == TK_EOF)
  387. parser->lexical->is_end = true;
  388. break;
  389. } else if (re == 0) { // 删除该token, 继续执行
  390. char *word = readWord(parser->lexical->last, parser->reader);
  391. free(word);
  392. parser->lexical->status = lex_begin;
  393. parser->lexical->last = 0;
  394. continue;
  395. } else if (re == -2 || re == -3) {
  396. tt = TK_ERROR;
  397. *text = NULL;
  398. parser->lexical->is_error = true;
  399. break;
  400. }
  401. }
  402. return tt;
  403. }