scanner.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667
  1. package scanner
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "log"
  7. "os"
  8. "path/filepath"
  9. "strings"
  10. "github.com/wuntsong-org/go-zero-plus/tools/goctlwt/pkg/parser/api/token"
  11. )
  12. const (
  13. initMode mode = iota
  14. // document mode bg
  15. documentHalfOpen
  16. documentOpen
  17. documentHalfClose
  18. documentClose
  19. // document mode end
  20. // string mode bg
  21. stringOpen
  22. stringClose
  23. // string mode end
  24. )
  25. var missingInput = errors.New("missing input")
  26. type mode int
  27. // Scanner is a lexical scanner.
  28. type Scanner struct {
  29. filename string
  30. size int
  31. data []rune
  32. position int // current position in input (points to current char)
  33. readPosition int // current reading position in input (after current char)
  34. ch rune
  35. lines []int
  36. }
  37. // NextToken returns the next token.
  38. func (s *Scanner) NextToken() (token.Token, error) {
  39. s.skipWhiteSpace()
  40. switch s.ch {
  41. case '/':
  42. peekOne := s.peekRune()
  43. switch peekOne {
  44. case '/':
  45. return s.scanLineComment(), nil
  46. case '*':
  47. return s.scanDocument()
  48. default:
  49. return s.newToken(token.QUO), nil
  50. }
  51. case '-':
  52. return s.newToken(token.SUB), nil
  53. case '*':
  54. return s.newToken(token.MUL), nil
  55. case '(':
  56. return s.newToken(token.LPAREN), nil
  57. case '[':
  58. return s.newToken(token.LBRACK), nil
  59. case '{':
  60. return s.newToken(token.LBRACE), nil
  61. case ',':
  62. return s.newToken(token.COMMA), nil
  63. case '.':
  64. position := s.position
  65. peekOne := s.peekRune()
  66. if peekOne != '.' {
  67. return s.newToken(token.DOT), nil
  68. }
  69. s.readRune()
  70. peekOne = s.peekRune()
  71. if peekOne != '.' {
  72. return s.newToken(token.DOT), nil
  73. }
  74. s.readRune()
  75. s.readRune()
  76. return token.Token{
  77. Type: token.ELLIPSIS,
  78. Text: "...",
  79. Position: s.newPosition(position),
  80. }, nil
  81. case ')':
  82. return s.newToken(token.RPAREN), nil
  83. case ']':
  84. return s.newToken(token.RBRACK), nil
  85. case '}':
  86. return s.newToken(token.RBRACE), nil
  87. case ';':
  88. return s.newToken(token.SEMICOLON), nil
  89. case ':':
  90. return s.newToken(token.COLON), nil
  91. case '=':
  92. return s.newToken(token.ASSIGN), nil
  93. case '@':
  94. return s.scanAt()
  95. case '"':
  96. return s.scanString('"', token.STRING)
  97. case '`':
  98. return s.scanString('`', token.RAW_STRING)
  99. case 0:
  100. return token.EofToken, nil
  101. default:
  102. if s.isIdentifierLetter(s.ch) {
  103. return s.scanIdent(), nil
  104. }
  105. if s.isDigit(s.ch) {
  106. return s.scanIntOrDuration(), nil
  107. }
  108. tok := token.NewIllegalToken(s.ch, s.newPosition(s.position))
  109. s.readRune()
  110. return tok, nil
  111. }
  112. }
  113. func (s *Scanner) newToken(tp token.Type) token.Token {
  114. tok := token.Token{
  115. Type: tp,
  116. Text: string(s.ch),
  117. Position: s.positionAt(),
  118. }
  119. s.readRune()
  120. return tok
  121. }
  122. func (s *Scanner) readRune() {
  123. if s.readPosition >= s.size {
  124. s.ch = 0
  125. } else {
  126. s.ch = s.data[s.readPosition]
  127. }
  128. s.position = s.readPosition
  129. s.readPosition += 1
  130. }
  131. func (s *Scanner) peekRune() rune {
  132. if s.readPosition >= s.size {
  133. return 0
  134. }
  135. return s.data[s.readPosition]
  136. }
  137. func (s *Scanner) scanString(delim rune, tp token.Type) (token.Token, error) {
  138. position := s.position
  139. var stringMode = initMode
  140. for {
  141. switch s.ch {
  142. case delim:
  143. switch stringMode {
  144. case initMode:
  145. stringMode = stringOpen
  146. case stringOpen:
  147. stringMode = stringClose
  148. s.readRune()
  149. return token.Token{
  150. Type: tp,
  151. Text: string(s.data[position:s.position]),
  152. Position: s.newPosition(position),
  153. }, nil
  154. }
  155. case 0:
  156. switch stringMode {
  157. case initMode: // assert: dead code
  158. return token.ErrorToken, s.assertExpected(token.EOF, tp)
  159. case stringOpen:
  160. return token.ErrorToken, s.assertExpectedString(token.EOF.String(), string(delim))
  161. case stringClose: // assert: dead code
  162. return token.Token{
  163. Type: tp,
  164. Text: string(s.data[position:s.position]),
  165. Position: s.newPosition(position),
  166. }, nil
  167. }
  168. }
  169. s.readRune()
  170. }
  171. }
  172. func (s *Scanner) scanAt() (token.Token, error) {
  173. position := s.position
  174. peek := s.peekRune()
  175. if !s.isLetter(peek) {
  176. if peek == 0 {
  177. return token.NewIllegalToken(s.ch, s.positionAt()), nil
  178. }
  179. return token.ErrorToken, s.assertExpectedString(string(peek), token.IDENT.String())
  180. }
  181. s.readRune()
  182. letters := s.scanLetterSet()
  183. switch letters {
  184. case "handler":
  185. return token.Token{
  186. Type: token.AT_HANDLER,
  187. Text: "@handler",
  188. Position: s.newPosition(position),
  189. }, nil
  190. case "server":
  191. return token.Token{
  192. Type: token.AT_SERVER,
  193. Text: "@server",
  194. Position: s.newPosition(position),
  195. }, nil
  196. case "doc":
  197. return token.Token{
  198. Type: token.AT_DOC,
  199. Text: "@doc",
  200. Position: s.newPosition(position),
  201. }, nil
  202. default:
  203. return token.ErrorToken, s.assertExpectedString(
  204. "@"+letters,
  205. token.AT_DOC.String(),
  206. token.AT_HANDLER.String(),
  207. token.AT_SERVER.String())
  208. }
  209. }
  210. func (s *Scanner) scanIntOrDuration() token.Token {
  211. position := s.position
  212. for s.isDigit(s.ch) {
  213. s.readRune()
  214. }
  215. switch s.ch {
  216. case 'n', 'µ', 'm', 's', 'h':
  217. return s.scanDuration(position)
  218. default:
  219. return token.Token{
  220. Type: token.INT,
  221. Text: string(s.data[position:s.position]),
  222. Position: s.newPosition(position),
  223. }
  224. }
  225. }
  226. // scanDuration scans a duration literal, for example "1ns", "1µs", "1ms", "1s", "1m", "1h".
  227. func (s *Scanner) scanDuration(bgPos int) token.Token {
  228. switch s.ch {
  229. case 'n':
  230. return s.scanNanosecond(bgPos)
  231. case 'µ':
  232. return s.scanMicrosecond(bgPos)
  233. case 'm':
  234. return s.scanMillisecondOrMinute(bgPos)
  235. case 's':
  236. return s.scanSecond(bgPos)
  237. case 'h':
  238. return s.scanHour(bgPos)
  239. default:
  240. return s.illegalToken()
  241. }
  242. }
  243. func (s *Scanner) scanNanosecond(bgPos int) token.Token {
  244. s.readRune()
  245. if s.ch != 's' {
  246. return s.illegalToken()
  247. }
  248. s.readRune()
  249. return token.Token{
  250. Type: token.DURATION,
  251. Text: string(s.data[bgPos:s.position]),
  252. Position: s.newPosition(bgPos),
  253. }
  254. }
  255. func (s *Scanner) scanMicrosecond(bgPos int) token.Token {
  256. s.readRune()
  257. if s.ch != 's' {
  258. return s.illegalToken()
  259. }
  260. s.readRune()
  261. if !s.isDigit(s.ch) {
  262. return token.Token{
  263. Type: token.DURATION,
  264. Text: string(s.data[bgPos:s.position]),
  265. Position: s.newPosition(bgPos),
  266. }
  267. }
  268. for s.isDigit(s.ch) {
  269. s.readRune()
  270. }
  271. if s.ch != 'n' {
  272. return s.illegalToken()
  273. }
  274. return s.scanNanosecond(bgPos)
  275. }
  276. func (s *Scanner) scanMillisecondOrMinute(bgPos int) token.Token {
  277. s.readRune()
  278. if s.ch != 's' { // minute
  279. if s.ch == 0 || !s.isDigit(s.ch) {
  280. return token.Token{
  281. Type: token.DURATION,
  282. Text: string(s.data[bgPos:s.position]),
  283. Position: s.newPosition(bgPos),
  284. }
  285. }
  286. return s.scanMinute(bgPos)
  287. }
  288. return s.scanMillisecond(bgPos)
  289. }
  290. func (s *Scanner) scanMillisecond(bgPos int) token.Token {
  291. s.readRune()
  292. if !s.isDigit(s.ch) {
  293. return token.Token{
  294. Type: token.DURATION,
  295. Text: string(s.data[bgPos:s.position]),
  296. Position: s.newPosition(bgPos),
  297. }
  298. }
  299. for s.isDigit(s.ch) {
  300. s.readRune()
  301. }
  302. switch s.ch {
  303. case 'n':
  304. return s.scanNanosecond(bgPos)
  305. case 'µ':
  306. return s.scanMicrosecond(bgPos)
  307. default:
  308. return s.illegalToken()
  309. }
  310. }
  311. func (s *Scanner) scanSecond(bgPos int) token.Token {
  312. s.readRune()
  313. if !s.isDigit(s.ch) {
  314. return token.Token{
  315. Type: token.DURATION,
  316. Text: string(s.data[bgPos:s.position]),
  317. Position: s.newPosition(bgPos),
  318. }
  319. }
  320. for s.isDigit(s.ch) {
  321. s.readRune()
  322. }
  323. switch s.ch {
  324. case 'n':
  325. return s.scanNanosecond(bgPos)
  326. case 'µ':
  327. return s.scanMicrosecond(bgPos)
  328. case 'm':
  329. s.readRune()
  330. if s.ch != 's' {
  331. return s.illegalToken()
  332. }
  333. return s.scanMillisecond(bgPos)
  334. default:
  335. return s.illegalToken()
  336. }
  337. }
  338. func (s *Scanner) scanMinute(bgPos int) token.Token {
  339. if !s.isDigit(s.ch) {
  340. return token.Token{
  341. Type: token.DURATION,
  342. Text: string(s.data[bgPos:s.position]),
  343. Position: s.newPosition(bgPos),
  344. }
  345. }
  346. for s.isDigit(s.ch) {
  347. s.readRune()
  348. }
  349. switch s.ch {
  350. case 'n':
  351. return s.scanNanosecond(bgPos)
  352. case 'µ':
  353. return s.scanMicrosecond(bgPos)
  354. case 'm':
  355. s.readRune()
  356. if s.ch != 's' {
  357. return s.illegalToken()
  358. }
  359. return s.scanMillisecond(bgPos)
  360. case 's':
  361. return s.scanSecond(bgPos)
  362. default:
  363. return s.illegalToken()
  364. }
  365. }
  366. func (s *Scanner) scanHour(bgPos int) token.Token {
  367. s.readRune()
  368. if !s.isDigit(s.ch) {
  369. return token.Token{
  370. Type: token.DURATION,
  371. Text: string(s.data[bgPos:s.position]),
  372. Position: s.newPosition(bgPos),
  373. }
  374. }
  375. for s.isDigit(s.ch) {
  376. s.readRune()
  377. }
  378. switch s.ch {
  379. case 'n':
  380. return s.scanNanosecond(bgPos)
  381. case 'µ':
  382. return s.scanMicrosecond(bgPos)
  383. case 'm':
  384. return s.scanMillisecondOrMinute(bgPos)
  385. case 's':
  386. return s.scanSecond(bgPos)
  387. default:
  388. return s.illegalToken()
  389. }
  390. }
  391. func (s *Scanner) illegalToken() token.Token {
  392. tok := token.NewIllegalToken(s.ch, s.newPosition(s.position))
  393. s.readRune()
  394. return tok
  395. }
  396. func (s *Scanner) scanIdent() token.Token {
  397. position := s.position
  398. for s.isIdentifierLetter(s.ch) || s.isDigit(s.ch) {
  399. s.readRune()
  400. }
  401. ident := string(s.data[position:s.position])
  402. if s.ch == ':' {
  403. s.readRune()
  404. return token.Token{
  405. Type: token.KEY,
  406. Text: string(s.data[position:s.position]),
  407. Position: s.newPosition(position),
  408. }
  409. }
  410. if ident == "interface" && s.ch == '{' && s.peekRune() == '}' {
  411. s.readRune()
  412. s.readRune()
  413. return token.Token{
  414. Type: token.ANY,
  415. Text: string(s.data[position:s.position]),
  416. Position: s.newPosition(position),
  417. }
  418. }
  419. return token.Token{
  420. Type: token.IDENT,
  421. Text: ident,
  422. Position: s.newPosition(position),
  423. }
  424. }
  425. func (s *Scanner) scanLetterSet() string {
  426. position := s.position
  427. for s.isLetter(s.ch) {
  428. s.readRune()
  429. }
  430. return string(s.data[position:s.position])
  431. }
  432. func (s *Scanner) scanLineComment() token.Token {
  433. position := s.position
  434. for s.ch != '\n' && s.ch != 0 {
  435. s.readRune()
  436. }
  437. return token.Token{
  438. Type: token.COMMENT,
  439. Text: string(s.data[position:s.position]),
  440. Position: s.newPosition(position),
  441. }
  442. }
  443. func (s *Scanner) scanDocument() (token.Token, error) {
  444. position := s.position
  445. var documentMode = initMode
  446. for {
  447. switch s.ch {
  448. case '*':
  449. switch documentMode {
  450. case documentHalfOpen:
  451. documentMode = documentOpen // /*
  452. case documentOpen, documentHalfClose:
  453. documentMode = documentHalfClose // (?m)\/\*\*+
  454. }
  455. case 0:
  456. switch documentMode {
  457. case initMode, documentHalfOpen: // assert: dead code
  458. return token.ErrorToken, s.assertExpected(token.EOF, token.MUL)
  459. case documentOpen:
  460. return token.ErrorToken, s.assertExpected(token.EOF, token.MUL)
  461. case documentHalfClose:
  462. return token.ErrorToken, s.assertExpected(token.EOF, token.QUO)
  463. }
  464. case '/':
  465. switch documentMode {
  466. case initMode: // /
  467. documentMode = documentHalfOpen
  468. case documentHalfOpen: // assert: dead code
  469. return token.ErrorToken, s.assertExpected(token.QUO, token.MUL)
  470. case documentHalfClose:
  471. documentMode = documentClose // /*\*+*/
  472. s.readRune()
  473. tok := token.Token{
  474. Type: token.DOCUMENT,
  475. Text: string(s.data[position:s.position]),
  476. Position: s.newPosition(position),
  477. }
  478. return tok, nil
  479. }
  480. }
  481. s.readRune()
  482. }
  483. }
  484. func (s *Scanner) assertExpected(actual token.Type, expected ...token.Type) error {
  485. var expects []string
  486. for _, v := range expected {
  487. expects = append(expects, fmt.Sprintf("'%s'", v.String()))
  488. }
  489. text := fmt.Sprint(s.positionAt().String(), " ", fmt.Sprintf(
  490. "expected %s, got '%s'",
  491. strings.Join(expects, " | "),
  492. actual.String(),
  493. ))
  494. return errors.New(text)
  495. }
  496. func (s *Scanner) assertExpectedString(actual string, expected ...string) error {
  497. var expects []string
  498. for _, v := range expected {
  499. expects = append(expects, fmt.Sprintf("'%s'", v))
  500. }
  501. text := fmt.Sprint(s.positionAt().String(), " ", fmt.Sprintf(
  502. "expected %s, got '%s'",
  503. strings.Join(expects, " | "),
  504. actual,
  505. ))
  506. return errors.New(text)
  507. }
  508. func (s *Scanner) positionAt() token.Position {
  509. return s.newPosition(s.position)
  510. }
  511. func (s *Scanner) newPosition(position int) token.Position {
  512. line := s.lineCount()
  513. return token.Position{
  514. Filename: s.filename,
  515. Line: line,
  516. Column: position - s.lines[line-1],
  517. }
  518. }
  519. func (s *Scanner) lineCount() int {
  520. return len(s.lines)
  521. }
  522. func (s *Scanner) skipWhiteSpace() {
  523. for s.isWhiteSpace(s.ch) {
  524. s.readRune()
  525. }
  526. }
  527. func (s *Scanner) isDigit(b rune) bool {
  528. return b >= '0' && b <= '9'
  529. }
  530. func (s *Scanner) isLetter(b rune) bool {
  531. return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')
  532. }
  533. func (s *Scanner) isIdentifierLetter(b rune) bool {
  534. if s.isLetter(b) {
  535. return true
  536. }
  537. return b == '_'
  538. }
  539. func (s *Scanner) isWhiteSpace(b rune) bool {
  540. if b == '\n' {
  541. s.lines = append(s.lines, s.position)
  542. }
  543. return b == ' ' || b == '\t' || b == '\r' || b == '\f' || b == '\v' || b == '\n'
  544. }
  545. // MustNewScanner returns a new scanner for the given filename and data.
  546. func MustNewScanner(filename string, src interface{}) *Scanner {
  547. sc, err := NewScanner(filename, src)
  548. if err != nil {
  549. log.Fatalln(err)
  550. }
  551. return sc
  552. }
  553. // NewScanner returns a new scanner for the given filename and data.
  554. func NewScanner(filename string, src interface{}) (*Scanner, error) {
  555. data, err := readData(filename, src)
  556. if err != nil {
  557. return nil, err
  558. }
  559. if len(data) == 0 {
  560. return nil, missingInput
  561. }
  562. var runeList []rune
  563. for _, r := range string(data) {
  564. runeList = append(runeList, r)
  565. }
  566. filename = filepath.Base(filename)
  567. s := &Scanner{
  568. filename: filename,
  569. size: len(runeList),
  570. data: runeList,
  571. lines: []int{-1},
  572. readPosition: 0,
  573. }
  574. s.readRune()
  575. return s, nil
  576. }
  577. func readData(filename string, src interface{}) ([]byte, error) {
  578. data, err := os.ReadFile(filename)
  579. if err == nil {
  580. return data, nil
  581. }
  582. switch v := src.(type) {
  583. case []byte:
  584. data = append(data, v...)
  585. case *bytes.Buffer:
  586. data = v.Bytes()
  587. case string:
  588. data = []byte(v)
  589. default:
  590. return nil, fmt.Errorf("unsupported type: %T", src)
  591. }
  592. return data, nil
  593. }