lexical.c 13 KB


  1. /*
  2. * 文件名: lexical
  3. * 目标: aFunlang词法分析
  4. */
  5. #include <ctype.h>
  6. #include "aFun.h"
  7. #include "__parser.h"
  8. #include "lexical_warning_error.h"
  9. static void printLexicalError(char *info, af_Parser *parser) {
  10. if (parser->error == NULL)
  11. return;
  12. fprintf(parser->error, "[Lexical-Error] %s\n", info);
  13. }
  14. static void printLexicalWarning(char *info, af_Parser *parser) {
  15. if (parser->error == NULL)
  16. return;
  17. fprintf(parser->error, "[Lexical-Warning] %s\n", info);
  18. }
  19. static void setLexicalLast(af_LexicalStatus status, af_TokenType token, af_Parser *parser) {
  20. parser->lexical->status = status;
  21. parser->lexical->last = parser->reader->read - parser->reader->buf;
  22. parser->lexical->token = token;
  23. }
  24. /*
  25. * 函数族: done系列 (doneXXX)
  26. * 目标: 用于把转台xxx转换为其他状态
  27. * 返回值: 1 正常
  28. * 返回值: 0 遇到错误, 仍可继续
  29. * 返回值: -1 正常, 不可继续 -> 必须设置 setLexicalLast
  30. * 返回值: -2 遇到错误, 不可继续
  31. * 注意: 函数使用前不在检查`status`是否正确
  32. */
  33. /*
  34. * 状态机图:
  35. * [lex_begin]
  36. * -> NUL -> (lex_nul)
  37. * -> ALL_PREFIX -> [lex_prefix] # return -1
  38. * -> ! -> (lex_prefix_block_p)
  39. * -> @ -> (lex_prefix_block_b)
  40. * -> # -> (lex_prefix_block_c)
  41. * -> ( -> [lex_lp] # return -1
  42. * -> [ -> [lex_lb] # return -1
  43. * -> { -> [lex_lc] # return -1
  44. * -> ) -> [lex_rp] # return -1
  45. * -> ] -> [lex_rb] # return -1
  46. * -> } -> [lex_rc] # return -1
  47. * -> ; -> (lex_comment_before)
  48. * -> iscntrl(ch) || isspace(ch) -> [lex_space]
  49. * -> | -> (lex_element_long)
  50. * -> isgraph(ch) -> [lex_element]
  51. */
  52. static int doneBegin(char ch, af_Parser *parser) {
  53. if (ch == NUL) {
  54. setLexicalLast(lex_nul, TK_EOF, parser);
  55. return -1;
  56. } else if (strchr(ALL_PREFIX, ch)) { /* 属于前缀 */
  57. setLexicalLast(lex_prefix, TK_PREFIX, parser);
  58. return -1;
  59. } else if (strchr("!@#", ch)) {
  60. switch (ch) {
  61. case '!':
  62. parser->lexical->status = lex_prefix_block_p;
  63. return 1;
  64. case '@':
  65. parser->lexical->status = lex_prefix_block_b;
  66. return 1;
  67. case '#':
  68. parser->lexical->status = lex_prefix_block_c;
  69. return 1;
  70. default:
  71. printLexicalError(SYS_ILLEGAL_CHAR(lex_beging), parser);
  72. return -2;
  73. }
  74. } else if (strchr("([{)]}", ch)) { /* 括号 */
  75. switch (ch) {
  76. case '(':
  77. setLexicalLast(lex_lp, TK_LP, parser);
  78. return -1;
  79. case '[':
  80. setLexicalLast(lex_lb, TK_LB, parser);
  81. return -1;
  82. case '{':
  83. setLexicalLast(lex_lc, TK_LC, parser);
  84. return -1;
  85. case ')':
  86. setLexicalLast(lex_rp, TK_RP, parser);
  87. return -1;
  88. case ']':
  89. setLexicalLast(lex_rb, TK_RB, parser);
  90. return -1;
  91. case '}':
  92. setLexicalLast(lex_rc, TK_RC, parser);
  93. return -1;
  94. default:
  95. printLexicalError(SYS_ILLEGAL_CHAR(lex_beging), parser);
  96. return -2;
  97. }
  98. } else if (ch == ';') {
  99. parser->lexical->status = lex_comment_before;
  100. return 1;
  101. } else if (iscntrl(ch) || isspace(ch)) { // 空白符或控制字符被忽略
  102. setLexicalLast(lex_space, TK_SPACE, parser);
  103. return 1;
  104. } else if (ch == '|') {
  105. parser->lexical->status = lex_element_long;
  106. return 1;
  107. } else if (isgraph(ch)) { // 除空格外的可见字符
  108. setLexicalLast(lex_element_short, TK_ELEMENT_SHORT, parser);
  109. return 1;
  110. }
  111. printLexicalError(ILLEGAL_CHAR(lex_beging), parser);
  112. return -2;
  113. }
  114. /*
  115. * 状态机图:
  116. * [lex_prefix_block_p] -> ( -> [lex_lp] # return -1
  117. * [lex_prefix_block_b] -> ( -> [lex_lb] # return -1
  118. * [lex_prefix_block_c] -> ( -> [lex_lc] # return -1
  119. * [lex_prefix_block_p] -> ) -> [lex_rp] # return -1
  120. * [lex_prefix_block_b] -> ) -> [lex_rb] # return -1
  121. * [lex_prefix_block_c] -> ) -> [lex_rc] # return -1
  122. */
  123. static int donePrefixBlock(char ch, af_Parser *parser) {
  124. if (ch == '(') {
  125. switch (parser->lexical->status) {
  126. case lex_prefix_block_p:
  127. setLexicalLast(lex_lp, TK_LP, parser);
  128. return -1;
  129. case lex_prefix_block_b:
  130. setLexicalLast(lex_lb, TK_LB, parser);
  131. return -1;
  132. case lex_prefix_block_c:
  133. setLexicalLast(lex_lc, TK_LC, parser);
  134. return -1;
  135. default:
  136. printLexicalError(SYS_ERROR_STATUS(lex_prefix_block), parser);
  137. return -2;
  138. }
  139. } else if (ch == ')') {
  140. switch (parser->lexical->status) {
  141. case lex_prefix_block_p:
  142. setLexicalLast(lex_rp, TK_RP, parser);
  143. return -1;
  144. case lex_prefix_block_b:
  145. setLexicalLast(lex_rb, TK_RB, parser);
  146. return -1;
  147. case lex_prefix_block_c:
  148. setLexicalLast(lex_rc, TK_RC, parser);
  149. return -1;
  150. default:
  151. printLexicalError(SYS_ERROR_STATUS(lex_prefix_block), parser);
  152. return -2;
  153. }
  154. }
  155. printLexicalError(ILLEGAL_CHAR(lex_prefix_block), parser);
  156. return -2;
  157. }
  158. /*
  159. * 状态机图:
  160. * [lex_comment_before]
  161. * -> '\n' || NUL -> [lex_uni_comment_end] # return -1
  162. * -> ; -> (lex_mutli_comment) # mutli_comment = 0
  163. * -> other -> (lex_uni_comment)
  164. */
  165. static int doneCommentBefore(char ch, af_Parser *parser) {
  166. if (ch == '\n' || ch == NUL) {
  167. setLexicalLast(lex_uni_comment_end, TK_COMMENT, parser);
  168. return -1;
  169. } else if (ch == ';') { // 多行注释
  170. parser->lexical->status = lex_mutli_comment;
  171. parser->lexical->mutli_comment = 0;
  172. return 1;
  173. }
  174. parser->lexical->status = lex_uni_comment;
  175. return 1;
  176. }
  177. /*
  178. * 状态机图:
  179. * [lex_uni_comment]
  180. * -> '\n' || NUL -> [lex_uni_comment_end] # return -1
  181. * -> other -> (lex_uni_comment)
  182. */
  183. static int doneUniComment(char ch, af_Parser *parser) {
  184. if (ch == '\n' || ch == NUL) {
  185. setLexicalLast(lex_uni_comment_end, TK_COMMENT, parser);
  186. return -1;
  187. }
  188. parser->lexical->status = lex_uni_comment;
  189. return 1;
  190. }
  191. /*
  192. * 状态机图:
  193. * [lex_mutli_comment]
  194. * -> NUL -> [lex_mutli_comment_end] # return -1; [warning]
  195. * -> ; -> (lex_mutli_comment_end_before)
  196. * -> other -> (lex_mutli_comment)
  197. */
  198. static int doneMutliComment(char ch, af_Parser *parser) {
  199. if (ch == NUL) {
  200. parser->lexical->status = lex_mutli_comment_end;
  201. printLexicalWarning(INCOMPLETE_FILE(lex_mutli_comment), parser);
  202. return -1;
  203. } else if (ch == ';')
  204. parser->lexical->status = lex_mutli_comment_end_before;
  205. else
  206. parser->lexical->status = lex_mutli_comment;
  207. return 1;
  208. }
  209. /*
  210. * 状态机图:
  211. * [lex_mutli_comment_end_before]
  212. * -> NUL -> [lex_mutli_comment_end] # return -1; [warning]
  213. * -> ; -> (lex_mutli_comment) # mutli_comment++;
  214. * -> = ->
  215. * mutli_comment == 0 -> [lex_mutli_comment_end] # return -1
  216. * else -> (lex_mutli_comment)# mutli_comment--;
  217. */
  218. static int doneMutliCommentBeforeEnd(char ch, af_Parser *parser) {
  219. if (ch == NUL) {
  220. printLexicalWarning(INCOMPLETE_FILE(lex_mutli_comment_end_before), parser);
  221. setLexicalLast(lex_mutli_comment_end, TK_COMMENT, parser);
  222. return -1;
  223. } else if (ch == ';') {
  224. /* 嵌套注释 */
  225. parser->lexical->mutli_comment++;
  226. parser->lexical->status = lex_mutli_comment;
  227. } else if (ch == '=') {
  228. if (parser->lexical->mutli_comment == 0) {
  229. /* 注释结束 */
  230. setLexicalLast(lex_mutli_comment_end, TK_COMMENT, parser);
  231. return -1;
  232. } else {
  233. /* 嵌套注释 */
  234. parser->lexical->mutli_comment--;
  235. parser->lexical->status = lex_mutli_comment;
  236. }
  237. }
  238. parser->lexical->status = lex_mutli_comment;
  239. return 1;
  240. }
  241. /*
  242. * 状态机图:
  243. * [lex_element_long]
  244. * -> NUL -> error
  245. * -> | -> [lex_element_long_end]
  246. * -> other -> (lex_element_long)
  247. */
  248. static int doneElementLong(char ch, af_Parser *parser) {
  249. if (ch == '|') { // 结束符
  250. setLexicalLast(lex_element_long_end, TK_ELEMENT_LONG, parser);
  251. return 1;
  252. } else if (ch == NUL) {
  253. printLexicalError(INCOMPLETE_FILE(lex_element_long), parser);
  254. return -2;
  255. }
  256. parser->lexical->status = lex_element_long;
  257. return 1;
  258. }
  259. /*
  260. * 状态机图:
  261. * [lex_element_long]
  262. * -> | -> (lex_element_long)
  263. * -> other -> [lex_element_long_end] # return -1
  264. */
  265. static int doneElementLongEnd(char ch, af_Parser *parser) {
  266. if (ch == '|') { // ||表示非结束
  267. parser->lexical->status = lex_element_long;
  268. return 1;
  269. }
  270. parser->lexical->status = lex_element_long_end;
  271. return -1;
  272. }
  273. /*
  274. * 状态机图:
  275. * [lex_element_short]
  276. * -> !strchr("!@#([{}]);", ch) && isgraph(ch) -> (lex_element_short)
  277. * -> other -> (lex_element_short) # return -1
  278. */
  279. static int doneElementShort(char ch, af_Parser *parser) {
  280. if (!strchr("!@#([{}]);", ch) && isgraph(ch)) { // 除空格外的可见字符 (不包括NUL)
  281. setLexicalLast(lex_element_short, TK_ELEMENT_SHORT, parser);
  282. return 1;
  283. }
  284. parser->lexical->status = lex_element_short;
  285. return -1;
  286. }
  287. /*
  288. * 状态机图:
  289. * [lex_space]
  290. * -> ch != NUL && (iscntrl(ch) || isspace(ch)) -> (lex_space)
  291. * -> other -> (lex_space) # return -1
  292. */
  293. static int doneSpace(char ch, af_Parser *parser) {
  294. if (ch != NUL && (iscntrl(ch) || isspace(ch))) {
  295. setLexicalLast(lex_space, TK_SPACE, parser);
  296. return 1;
  297. }
  298. parser->lexical->status = lex_space;
  299. return -1;
  300. }
  301. /*
  302. * 函数名: getTokenFromLexical
  303. * 目标: 获取Lexical的TokenType以及相关值
  304. */
  305. af_TokenType getTokenFromLexical(char **text, af_Parser *parser) {
  306. af_TokenType tt;
  307. int re;
  308. parser->lexical->status = lex_begin;
  309. parser->lexical->last = 0;
  310. if (parser->lexical->is_end) {
  311. *text = NULL;
  312. return TK_EOF;
  313. }
  314. while (1) {
  315. char ch = getChar(parser->reader);
  316. if (iscntrl(ch) && !isspace(ch))
  317. printLexicalWarning(INCULDE_CONTROL(base), parser);
  318. switch (parser->lexical->status) {
  319. case lex_begin:
  320. re = doneBegin(ch, parser);
  321. break;
  322. case lex_prefix_block_p:
  323. case lex_prefix_block_b:
  324. case lex_prefix_block_c:
  325. re = donePrefixBlock(ch, parser);
  326. break;
  327. case lex_comment_before:
  328. re = doneCommentBefore(ch, parser);
  329. break;
  330. case lex_element_long:
  331. re = doneElementLong(ch, parser);
  332. break;
  333. case lex_mutli_comment:
  334. re = doneMutliComment(ch, parser);
  335. break;
  336. case lex_uni_comment:
  337. re = doneUniComment(ch, parser);
  338. break;
  339. case lex_mutli_comment_end_before:
  340. re = doneMutliCommentBeforeEnd(ch, parser);
  341. break;
  342. case lex_space:
  343. re = doneSpace(ch, parser);
  344. break;
  345. case lex_element_short:
  346. re = doneElementShort(ch, parser);
  347. break;
  348. case lex_element_long_end:
  349. re = doneElementLongEnd(ch, parser);
  350. break;
  351. default:
  352. printLexicalError(SYS_ERROR_STATUS(base), parser);
  353. re = -3;
  354. break;
  355. }
  356. if (re == -1) {
  357. char *word = readWord(parser->lexical->last, parser->reader);
  358. tt = parser->lexical->token;
  359. if (tt == TK_ELEMENT_SHORT)
  360. *text = word;
  361. else if (tt == TK_ELEMENT_LONG) {
  362. char *new = NEW_STR(STR_LEN(word) - 2); // 去除收尾|
  363. bool flat = false;
  364. char *p = word + 1;
  365. size_t count = 0;
  366. for(NULL; *p != NUL; p++) {
  367. if (*p == '|' && !flat) { // 跳过第一个 `|`, 如果是末尾|则自然跳过, 若不是则在遇到第二个`|`时写入数据
  368. flat = true; /* count不需要递增 */
  369. continue;
  370. } else if (*p != '|' && flat) // 遇到错误
  371. break;
  372. else
  373. flat = false;
  374. new[count] = *p;
  375. count++;
  376. }
  377. *text = strCopy(new);
  378. free(new);
  379. } else
  380. free(word);
  381. if (tt == TK_SPACE || tt == TK_COMMENT) {
  382. parser->lexical->status = lex_begin;
  383. parser->lexical->last = 0;
  384. continue;
  385. } else if (tt == TK_EOF)
  386. parser->lexical->is_end = true;
  387. break;
  388. } else if (re == -2 || re == -3) {
  389. tt = TK_ERROR;
  390. *text = NULL;
  391. break;
  392. }
  393. }
  394. return tt;
  395. }