tokenize.js 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682
  1. import {isIdentifierStart, isIdentifierChar} from "./identifier"
  2. import {types as tt, keywords as keywordTypes} from "./tokentype"
  3. import {Parser} from "./state"
  4. import {SourceLocation} from "./locutil"
  5. import {lineBreak, lineBreakG, isNewLine, nonASCIIwhitespace} from "./whitespace"
  6. // Object type used to represent tokens. Note that normally, tokens
  7. // simply exist as properties on the parser object. This is only
  8. // used for the onToken callback and the external tokenizer.
  9. export class Token {
  10. constructor(p) {
  11. this.type = p.type
  12. this.value = p.value
  13. this.start = p.start
  14. this.end = p.end
  15. if (p.options.locations)
  16. this.loc = new SourceLocation(p, p.startLoc, p.endLoc)
  17. if (p.options.ranges)
  18. this.range = [p.start, p.end]
  19. }
  20. }
  21. // ## Tokenizer
  22. const pp = Parser.prototype
  23. // Are we running under Rhino?
  24. const isRhino = typeof Packages == "object" && Object.prototype.toString.call(Packages) == "[object JavaPackage]"
  25. // Move to the next token
  26. pp.next = function() {
  27. if (this.options.onToken)
  28. this.options.onToken(new Token(this))
  29. this.lastTokEnd = this.end
  30. this.lastTokStart = this.start
  31. this.lastTokEndLoc = this.endLoc
  32. this.lastTokStartLoc = this.startLoc
  33. this.nextToken()
  34. }
  35. pp.getToken = function() {
  36. this.next()
  37. return new Token(this)
  38. }
  39. // If we're in an ES6 environment, make parsers iterable
  40. if (typeof Symbol !== "undefined")
  41. pp[Symbol.iterator] = function () {
  42. let self = this
  43. return {next: function () {
  44. let token = self.getToken()
  45. return {
  46. done: token.type === tt.eof,
  47. value: token
  48. }
  49. }}
  50. }
  51. // Toggle strict mode. Re-reads the next number or string to please
  52. // pedantic tests (`"use strict"; 010;` should fail).
  53. pp.setStrict = function(strict) {
  54. this.strict = strict
  55. if (this.type !== tt.num && this.type !== tt.string) return
  56. this.pos = this.start
  57. if (this.options.locations) {
  58. while (this.pos < this.lineStart) {
  59. this.lineStart = this.input.lastIndexOf("\n", this.lineStart - 2) + 1
  60. --this.curLine
  61. }
  62. }
  63. this.nextToken()
  64. }
  65. pp.curContext = function() {
  66. return this.context[this.context.length - 1]
  67. }
  68. // Read a single token, updating the parser object's token-related
  69. // properties.
  70. pp.nextToken = function() {
  71. let curContext = this.curContext()
  72. if (!curContext || !curContext.preserveSpace) this.skipSpace()
  73. this.start = this.pos
  74. if (this.options.locations) this.startLoc = this.curPosition()
  75. if (this.pos >= this.input.length) return this.finishToken(tt.eof)
  76. if (curContext.override) return curContext.override(this)
  77. else this.readToken(this.fullCharCodeAtPos())
  78. }
  79. pp.readToken = function(code) {
  80. // Identifier or keyword. '\uXXXX' sequences are allowed in
  81. // identifiers, so '\' also dispatches to that.
  82. if (isIdentifierStart(code, this.options.ecmaVersion >= 6) || code === 92 /* '\' */)
  83. return this.readWord()
  84. return this.getTokenFromCode(code)
  85. }
  86. pp.fullCharCodeAtPos = function() {
  87. let code = this.input.charCodeAt(this.pos)
  88. if (code <= 0xd7ff || code >= 0xe000) return code
  89. let next = this.input.charCodeAt(this.pos + 1)
  90. return (code << 10) + next - 0x35fdc00
  91. }
  92. pp.skipBlockComment = function() {
  93. let startLoc = this.options.onComment && this.curPosition()
  94. let start = this.pos, end = this.input.indexOf("*/", this.pos += 2)
  95. if (end === -1) this.raise(this.pos - 2, "Unterminated comment")
  96. this.pos = end + 2
  97. if (this.options.locations) {
  98. lineBreakG.lastIndex = start
  99. let match
  100. while ((match = lineBreakG.exec(this.input)) && match.index < this.pos) {
  101. ++this.curLine
  102. this.lineStart = match.index + match[0].length
  103. }
  104. }
  105. if (this.options.onComment)
  106. this.options.onComment(true, this.input.slice(start + 2, end), start, this.pos,
  107. startLoc, this.curPosition())
  108. }
  109. pp.skipLineComment = function(startSkip) {
  110. let start = this.pos
  111. let startLoc = this.options.onComment && this.curPosition()
  112. let ch = this.input.charCodeAt(this.pos+=startSkip)
  113. while (this.pos < this.input.length && ch !== 10 && ch !== 13 && ch !== 8232 && ch !== 8233) {
  114. ++this.pos
  115. ch = this.input.charCodeAt(this.pos)
  116. }
  117. if (this.options.onComment)
  118. this.options.onComment(false, this.input.slice(start + startSkip, this.pos), start, this.pos,
  119. startLoc, this.curPosition())
  120. }
  121. // Called at the start of the parse and after every token. Skips
  122. // whitespace and comments, and.
  123. pp.skipSpace = function() {
  124. loop: while (this.pos < this.input.length) {
  125. let ch = this.input.charCodeAt(this.pos)
  126. switch (ch) {
  127. case 32: case 160: // ' '
  128. ++this.pos
  129. break
  130. case 13:
  131. if (this.input.charCodeAt(this.pos + 1) === 10) {
  132. ++this.pos
  133. }
  134. case 10: case 8232: case 8233:
  135. ++this.pos
  136. if (this.options.locations) {
  137. ++this.curLine
  138. this.lineStart = this.pos
  139. }
  140. break
  141. case 47: // '/'
  142. switch (this.input.charCodeAt(this.pos + 1)) {
  143. case 42: // '*'
  144. this.skipBlockComment()
  145. break
  146. case 47:
  147. this.skipLineComment(2)
  148. break
  149. default:
  150. break loop
  151. }
  152. break
  153. default:
  154. if (ch > 8 && ch < 14 || ch >= 5760 && nonASCIIwhitespace.test(String.fromCharCode(ch))) {
  155. ++this.pos
  156. } else {
  157. break loop
  158. }
  159. }
  160. }
  161. }
  162. // Called at the end of every token. Sets `end`, `val`, and
  163. // maintains `context` and `exprAllowed`, and skips the space after
  164. // the token, so that the next one's `start` will point at the
  165. // right position.
  166. pp.finishToken = function(type, val) {
  167. this.end = this.pos
  168. if (this.options.locations) this.endLoc = this.curPosition()
  169. let prevType = this.type
  170. this.type = type
  171. this.value = val
  172. this.updateContext(prevType)
  173. }
  174. // ### Token reading
  175. // This is the function that is called to fetch the next token. It
  176. // is somewhat obscure, because it works in character codes rather
  177. // than characters, and because operator parsing has been inlined
  178. // into it.
  179. //
  180. // All in the name of speed.
  181. //
  182. pp.readToken_dot = function() {
  183. let next = this.input.charCodeAt(this.pos + 1)
  184. if (next >= 48 && next <= 57) return this.readNumber(true)
  185. let next2 = this.input.charCodeAt(this.pos + 2)
  186. if (this.options.ecmaVersion >= 6 && next === 46 && next2 === 46) { // 46 = dot '.'
  187. this.pos += 3
  188. return this.finishToken(tt.ellipsis)
  189. } else {
  190. ++this.pos
  191. return this.finishToken(tt.dot)
  192. }
  193. }
  194. pp.readToken_slash = function() { // '/'
  195. let next = this.input.charCodeAt(this.pos + 1)
  196. if (this.exprAllowed) {++this.pos; return this.readRegexp();}
  197. if (next === 61) return this.finishOp(tt.assign, 2)
  198. return this.finishOp(tt.slash, 1)
  199. }
  200. pp.readToken_mult_modulo = function(code) { // '%*'
  201. let next = this.input.charCodeAt(this.pos + 1)
  202. if (next === 61) return this.finishOp(tt.assign, 2)
  203. return this.finishOp(code === 42 ? tt.star : tt.modulo, 1)
  204. }
  205. pp.readToken_pipe_amp = function(code) { // '|&'
  206. let next = this.input.charCodeAt(this.pos + 1)
  207. if (next === code) return this.finishOp(code === 124 ? tt.logicalOR : tt.logicalAND, 2)
  208. if (next === 61) return this.finishOp(tt.assign, 2)
  209. return this.finishOp(code === 124 ? tt.bitwiseOR : tt.bitwiseAND, 1)
  210. }
  211. pp.readToken_caret = function() { // '^'
  212. let next = this.input.charCodeAt(this.pos + 1)
  213. if (next === 61) return this.finishOp(tt.assign, 2)
  214. return this.finishOp(tt.bitwiseXOR, 1)
  215. }
  216. pp.readToken_plus_min = function(code) { // '+-'
  217. let next = this.input.charCodeAt(this.pos + 1)
  218. if (next === code) {
  219. if (next == 45 && this.input.charCodeAt(this.pos + 2) == 62 &&
  220. lineBreak.test(this.input.slice(this.lastTokEnd, this.pos))) {
  221. // A `-->` line comment
  222. this.skipLineComment(3)
  223. this.skipSpace()
  224. return this.nextToken()
  225. }
  226. return this.finishOp(tt.incDec, 2)
  227. }
  228. if (next === 61) return this.finishOp(tt.assign, 2)
  229. return this.finishOp(tt.plusMin, 1)
  230. }
  231. pp.readToken_lt_gt = function(code) { // '<>'
  232. let next = this.input.charCodeAt(this.pos + 1)
  233. let size = 1
  234. if (next === code) {
  235. size = code === 62 && this.input.charCodeAt(this.pos + 2) === 62 ? 3 : 2
  236. if (this.input.charCodeAt(this.pos + size) === 61) return this.finishOp(tt.assign, size + 1)
  237. return this.finishOp(tt.bitShift, size)
  238. }
  239. if (next == 33 && code == 60 && this.input.charCodeAt(this.pos + 2) == 45 &&
  240. this.input.charCodeAt(this.pos + 3) == 45) {
  241. if (this.inModule) this.unexpected()
  242. // `<!--`, an XML-style comment that should be interpreted as a line comment
  243. this.skipLineComment(4)
  244. this.skipSpace()
  245. return this.nextToken()
  246. }
  247. if (next === 61)
  248. size = this.input.charCodeAt(this.pos + 2) === 61 ? 3 : 2
  249. return this.finishOp(tt.relational, size)
  250. }
  251. pp.readToken_eq_excl = function(code) { // '=!'
  252. let next = this.input.charCodeAt(this.pos + 1)
  253. if (next === 61) return this.finishOp(tt.equality, this.input.charCodeAt(this.pos + 2) === 61 ? 3 : 2)
  254. if (code === 61 && next === 62 && this.options.ecmaVersion >= 6) { // '=>'
  255. this.pos += 2
  256. return this.finishToken(tt.arrow)
  257. }
  258. return this.finishOp(code === 61 ? tt.eq : tt.prefix, 1)
  259. }
  260. pp.getTokenFromCode = function(code) {
  261. switch (code) {
  262. // The interpretation of a dot depends on whether it is followed
  263. // by a digit or another two dots.
  264. case 46: // '.'
  265. return this.readToken_dot()
  266. // Punctuation tokens.
  267. case 40: ++this.pos; return this.finishToken(tt.parenL)
  268. case 41: ++this.pos; return this.finishToken(tt.parenR)
  269. case 59: ++this.pos; return this.finishToken(tt.semi)
  270. case 44: ++this.pos; return this.finishToken(tt.comma)
  271. case 91: ++this.pos; return this.finishToken(tt.bracketL)
  272. case 93: ++this.pos; return this.finishToken(tt.bracketR)
  273. case 123: ++this.pos; return this.finishToken(tt.braceL)
  274. case 125: ++this.pos; return this.finishToken(tt.braceR)
  275. case 58: ++this.pos; return this.finishToken(tt.colon)
  276. case 63: ++this.pos; return this.finishToken(tt.question)
  277. case 96: // '`'
  278. if (this.options.ecmaVersion < 6) break
  279. ++this.pos
  280. return this.finishToken(tt.backQuote)
  281. case 48: // '0'
  282. let next = this.input.charCodeAt(this.pos + 1)
  283. if (next === 120 || next === 88) return this.readRadixNumber(16); // '0x', '0X' - hex number
  284. if (this.options.ecmaVersion >= 6) {
  285. if (next === 111 || next === 79) return this.readRadixNumber(8); // '0o', '0O' - octal number
  286. if (next === 98 || next === 66) return this.readRadixNumber(2); // '0b', '0B' - binary number
  287. }
  288. // Anything else beginning with a digit is an integer, octal
  289. // number, or float.
  290. case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56: case 57: // 1-9
  291. return this.readNumber(false)
  292. // Quotes produce strings.
  293. case 34: case 39: // '"', "'"
  294. return this.readString(code)
  295. // Operators are parsed inline in tiny state machines. '=' (61) is
  296. // often referred to. `finishOp` simply skips the amount of
  297. // characters it is given as second argument, and returns a token
  298. // of the type given by its first argument.
  299. case 47: // '/'
  300. return this.readToken_slash()
  301. case 37: case 42: // '%*'
  302. return this.readToken_mult_modulo(code)
  303. case 124: case 38: // '|&'
  304. return this.readToken_pipe_amp(code)
  305. case 94: // '^'
  306. return this.readToken_caret()
  307. case 43: case 45: // '+-'
  308. return this.readToken_plus_min(code)
  309. case 60: case 62: // '<>'
  310. return this.readToken_lt_gt(code)
  311. case 61: case 33: // '=!'
  312. return this.readToken_eq_excl(code)
  313. case 126: // '~'
  314. return this.finishOp(tt.prefix, 1)
  315. }
  316. this.raise(this.pos, "Unexpected character '" + codePointToString(code) + "'")
  317. }
  318. pp.finishOp = function(type, size) {
  319. let str = this.input.slice(this.pos, this.pos + size)
  320. this.pos += size
  321. return this.finishToken(type, str)
  322. }
  323. // Parse a regular expression. Some context-awareness is necessary,
  324. // since a '/' inside a '[]' set does not end the expression.
  325. function tryCreateRegexp(src, flags, throwErrorAt, parser) {
  326. try {
  327. return new RegExp(src, flags);
  328. } catch (e) {
  329. if (throwErrorAt !== undefined) {
  330. if (e instanceof SyntaxError) parser.raise(throwErrorAt, "Error parsing regular expression: " + e.message)
  331. throw e
  332. }
  333. }
  334. }
  335. var regexpUnicodeSupport = !!tryCreateRegexp("\uffff", "u");
  336. pp.readRegexp = function() {
  337. let escaped, inClass, start = this.pos
  338. for (;;) {
  339. if (this.pos >= this.input.length) this.raise(start, "Unterminated regular expression")
  340. let ch = this.input.charAt(this.pos)
  341. if (lineBreak.test(ch)) this.raise(start, "Unterminated regular expression")
  342. if (!escaped) {
  343. if (ch === "[") inClass = true
  344. else if (ch === "]" && inClass) inClass = false
  345. else if (ch === "/" && !inClass) break
  346. escaped = ch === "\\"
  347. } else escaped = false
  348. ++this.pos
  349. }
  350. let content = this.input.slice(start, this.pos)
  351. ++this.pos
  352. // Need to use `readWord1` because '\uXXXX' sequences are allowed
  353. // here (don't ask).
  354. let mods = this.readWord1()
  355. let tmp = content
  356. if (mods) {
  357. let validFlags = /^[gim]*$/
  358. if (this.options.ecmaVersion >= 6) validFlags = /^[gimuy]*$/
  359. if (!validFlags.test(mods)) this.raise(start, "Invalid regular expression flag")
  360. if (mods.indexOf('u') >= 0 && !regexpUnicodeSupport) {
  361. // Replace each astral symbol and every Unicode escape sequence that
  362. // possibly represents an astral symbol or a paired surrogate with a
  363. // single ASCII symbol to avoid throwing on regular expressions that
  364. // are only valid in combination with the `/u` flag.
  365. // Note: replacing with the ASCII symbol `x` might cause false
  366. // negatives in unlikely scenarios. For example, `[\u{61}-b]` is a
  367. // perfectly valid pattern that is equivalent to `[a-b]`, but it would
  368. // be replaced by `[x-b]` which throws an error.
  369. tmp = tmp.replace(/\\u\{([0-9a-fA-F]+)\}/g, (_match, code, offset) => {
  370. code = Number("0x" + code)
  371. if (code > 0x10FFFF) this.raise(start + offset + 3, "Code point out of bounds")
  372. return "x"
  373. });
  374. tmp = tmp.replace(/\\u([a-fA-F0-9]{4})|[\uD800-\uDBFF][\uDC00-\uDFFF]/g, "x")
  375. }
  376. }
  377. // Detect invalid regular expressions.
  378. let value = null
  379. // Rhino's regular expression parser is flaky and throws uncatchable exceptions,
  380. // so don't do detection if we are running under Rhino
  381. if (!isRhino) {
  382. tryCreateRegexp(tmp, undefined, start, this);
  383. // Get a regular expression object for this pattern-flag pair, or `null` in
  384. // case the current environment doesn't support the flags it uses.
  385. value = tryCreateRegexp(content, mods)
  386. }
  387. return this.finishToken(tt.regexp, {pattern: content, flags: mods, value: value})
  388. }
  389. // Read an integer in the given radix. Return null if zero digits
  390. // were read, the integer value otherwise. When `len` is given, this
  391. // will return `null` unless the integer has exactly `len` digits.
  392. pp.readInt = function(radix, len) {
  393. let start = this.pos, total = 0
  394. for (let i = 0, e = len == null ? Infinity : len; i < e; ++i) {
  395. let code = this.input.charCodeAt(this.pos), val
  396. if (code >= 97) val = code - 97 + 10; // a
  397. else if (code >= 65) val = code - 65 + 10; // A
  398. else if (code >= 48 && code <= 57) val = code - 48; // 0-9
  399. else val = Infinity
  400. if (val >= radix) break
  401. ++this.pos
  402. total = total * radix + val
  403. }
  404. if (this.pos === start || len != null && this.pos - start !== len) return null
  405. return total
  406. }
  407. pp.readRadixNumber = function(radix) {
  408. this.pos += 2; // 0x
  409. let val = this.readInt(radix)
  410. if (val == null) this.raise(this.start + 2, "Expected number in radix " + radix)
  411. if (isIdentifierStart(this.fullCharCodeAtPos())) this.raise(this.pos, "Identifier directly after number")
  412. return this.finishToken(tt.num, val)
  413. }
  414. // Read an integer, octal integer, or floating-point number.
  415. pp.readNumber = function(startsWithDot) {
  416. let start = this.pos, isFloat = false, octal = this.input.charCodeAt(this.pos) === 48
  417. if (!startsWithDot && this.readInt(10) === null) this.raise(start, "Invalid number")
  418. let next = this.input.charCodeAt(this.pos)
  419. if (next === 46) { // '.'
  420. ++this.pos
  421. this.readInt(10)
  422. isFloat = true
  423. next = this.input.charCodeAt(this.pos)
  424. }
  425. if (next === 69 || next === 101) { // 'eE'
  426. next = this.input.charCodeAt(++this.pos)
  427. if (next === 43 || next === 45) ++this.pos; // '+-'
  428. if (this.readInt(10) === null) this.raise(start, "Invalid number")
  429. isFloat = true
  430. }
  431. if (isIdentifierStart(this.fullCharCodeAtPos())) this.raise(this.pos, "Identifier directly after number")
  432. let str = this.input.slice(start, this.pos), val
  433. if (isFloat) val = parseFloat(str)
  434. else if (!octal || str.length === 1) val = parseInt(str, 10)
  435. else if (/[89]/.test(str) || this.strict) this.raise(start, "Invalid number")
  436. else val = parseInt(str, 8)
  437. return this.finishToken(tt.num, val)
  438. }
  439. // Read a string value, interpreting backslash-escapes.
  440. pp.readCodePoint = function() {
  441. let ch = this.input.charCodeAt(this.pos), code
  442. if (ch === 123) {
  443. if (this.options.ecmaVersion < 6) this.unexpected()
  444. let codePos = ++this.pos
  445. code = this.readHexChar(this.input.indexOf('}', this.pos) - this.pos)
  446. ++this.pos
  447. if (code > 0x10FFFF) this.raise(codePos, "Code point out of bounds")
  448. } else {
  449. code = this.readHexChar(4)
  450. }
  451. return code
  452. }
  453. function codePointToString(code) {
  454. // UTF-16 Decoding
  455. if (code <= 0xFFFF) return String.fromCharCode(code)
  456. code -= 0x10000
  457. return String.fromCharCode((code >> 10) + 0xD800, (code & 1023) + 0xDC00)
  458. }
  459. pp.readString = function(quote) {
  460. let out = "", chunkStart = ++this.pos
  461. for (;;) {
  462. if (this.pos >= this.input.length) this.raise(this.start, "Unterminated string constant")
  463. let ch = this.input.charCodeAt(this.pos)
  464. if (ch === quote) break
  465. if (ch === 92) { // '\'
  466. out += this.input.slice(chunkStart, this.pos)
  467. out += this.readEscapedChar(false)
  468. chunkStart = this.pos
  469. } else {
  470. if (isNewLine(ch)) this.raise(this.start, "Unterminated string constant")
  471. ++this.pos
  472. }
  473. }
  474. out += this.input.slice(chunkStart, this.pos++)
  475. return this.finishToken(tt.string, out)
  476. }
  477. // Reads template string tokens.
  478. pp.readTmplToken = function() {
  479. let out = "", chunkStart = this.pos
  480. for (;;) {
  481. if (this.pos >= this.input.length) this.raise(this.start, "Unterminated template")
  482. let ch = this.input.charCodeAt(this.pos)
  483. if (ch === 96 || ch === 36 && this.input.charCodeAt(this.pos + 1) === 123) { // '`', '${'
  484. if (this.pos === this.start && this.type === tt.template) {
  485. if (ch === 36) {
  486. this.pos += 2
  487. return this.finishToken(tt.dollarBraceL)
  488. } else {
  489. ++this.pos
  490. return this.finishToken(tt.backQuote)
  491. }
  492. }
  493. out += this.input.slice(chunkStart, this.pos)
  494. return this.finishToken(tt.template, out)
  495. }
  496. if (ch === 92) { // '\'
  497. out += this.input.slice(chunkStart, this.pos)
  498. out += this.readEscapedChar(true)
  499. chunkStart = this.pos
  500. } else if (isNewLine(ch)) {
  501. out += this.input.slice(chunkStart, this.pos)
  502. ++this.pos
  503. switch (ch) {
  504. case 13:
  505. if (this.input.charCodeAt(this.pos) === 10) ++this.pos;
  506. case 10:
  507. out += "\n";
  508. break;
  509. default:
  510. out += String.fromCharCode(ch);
  511. break;
  512. }
  513. if (this.options.locations) {
  514. ++this.curLine
  515. this.lineStart = this.pos
  516. }
  517. chunkStart = this.pos
  518. } else {
  519. ++this.pos
  520. }
  521. }
  522. }
  523. // Used to read escaped characters
  524. pp.readEscapedChar = function(inTemplate) {
  525. let ch = this.input.charCodeAt(++this.pos)
  526. ++this.pos
  527. switch (ch) {
  528. case 110: return "\n"; // 'n' -> '\n'
  529. case 114: return "\r"; // 'r' -> '\r'
  530. case 120: return String.fromCharCode(this.readHexChar(2)); // 'x'
  531. case 117: return codePointToString(this.readCodePoint()); // 'u'
  532. case 116: return "\t"; // 't' -> '\t'
  533. case 98: return "\b"; // 'b' -> '\b'
  534. case 118: return "\u000b"; // 'v' -> '\u000b'
  535. case 102: return "\f"; // 'f' -> '\f'
  536. case 13: if (this.input.charCodeAt(this.pos) === 10) ++this.pos; // '\r\n'
  537. case 10: // ' \n'
  538. if (this.options.locations) { this.lineStart = this.pos; ++this.curLine }
  539. return ""
  540. default:
  541. if (ch >= 48 && ch <= 55) {
  542. let octalStr = this.input.substr(this.pos - 1, 3).match(/^[0-7]+/)[0]
  543. let octal = parseInt(octalStr, 8)
  544. if (octal > 255) {
  545. octalStr = octalStr.slice(0, -1)
  546. octal = parseInt(octalStr, 8)
  547. }
  548. if (octalStr !== "0" && (this.strict || inTemplate)) {
  549. this.raise(this.pos - 2, "Octal literal in strict mode")
  550. }
  551. this.pos += octalStr.length - 1
  552. return String.fromCharCode(octal)
  553. }
  554. return String.fromCharCode(ch)
  555. }
  556. }
  557. // Used to read character escape sequences ('\x', '\u', '\U').
  558. pp.readHexChar = function(len) {
  559. let codePos = this.pos
  560. let n = this.readInt(16, len)
  561. if (n === null) this.raise(codePos, "Bad character escape sequence")
  562. return n
  563. }
  564. // Read an identifier, and return it as a string. Sets `this.containsEsc`
  565. // to whether the word contained a '\u' escape.
  566. //
  567. // Incrementally adds only escaped chars, adding other chunks as-is
  568. // as a micro-optimization.
  569. pp.readWord1 = function() {
  570. this.containsEsc = false
  571. let word = "", first = true, chunkStart = this.pos
  572. let astral = this.options.ecmaVersion >= 6
  573. while (this.pos < this.input.length) {
  574. let ch = this.fullCharCodeAtPos()
  575. if (isIdentifierChar(ch, astral)) {
  576. this.pos += ch <= 0xffff ? 1 : 2
  577. } else if (ch === 92) { // "\"
  578. this.containsEsc = true
  579. word += this.input.slice(chunkStart, this.pos)
  580. let escStart = this.pos
  581. if (this.input.charCodeAt(++this.pos) != 117) // "u"
  582. this.raise(this.pos, "Expecting Unicode escape sequence \\uXXXX")
  583. ++this.pos
  584. let esc = this.readCodePoint()
  585. if (!(first ? isIdentifierStart : isIdentifierChar)(esc, astral))
  586. this.raise(escStart, "Invalid Unicode escape")
  587. word += codePointToString(esc)
  588. chunkStart = this.pos
  589. } else {
  590. break
  591. }
  592. first = false
  593. }
  594. return word + this.input.slice(chunkStart, this.pos)
  595. }
  596. // Read an identifier or keyword token. Will check for reserved
  597. // words when necessary.
  598. pp.readWord = function() {
  599. let word = this.readWord1()
  600. let type = tt.name
  601. if ((this.options.ecmaVersion >= 6 || !this.containsEsc) && this.keywords.test(word))
  602. type = keywordTypes[word]
  603. return this.finishToken(type, word)
  604. }