JavaScriptで字句解析

関数の再帰呼び出しを使った簡易の字句解析器です。対応している字句は下記のようになります。

〈対応している字句〉


- 空白文字
- セパレータ(,.:;(){}[])
- 演算子(=<>!&|+\-*/%)※2回連続もOK:+=, +>, %&
- 数値(12, 34.56, 78.)
- 文字列("abcde")
- 識別子(先頭は英字:x1, abc123)
上記以外はエラーになります。

 

lexer.js


function getSpace(string) {
  if (string === "") return "";
  if (string[0].match(/[\s]/)) return string[0] + getSpace(string.slice(1));
  return "";
}
function getSeparator(string) {
  if (string === "") return "";
  if (string[0].match(/[,.:;(){}\[\]]/)) return string[0];
  return "";
}
function getOperator(string) {
  if (string === "") return "";
  if (string[0].match(/[=<>!&|+\-*/%]/)) return string[0] + getOperator1(string.slice(1));
  return "";
  function getOperator1(string) {
    if (string === "") return "";
    if (string[0].match(/[=<>!&|+\-*/%]/)) return string[0];
    return "";
  }
}
function getNumber(string) {
  if (string === "") return "";
  if (string[0].match(/[0-9]/)) return string[0] + getNumber1(string.slice(1));
  return "";
  function getNumber1(string) {
    if (string === "") return "";
    if (string[0].match(/[0-9]/)) return string[0] + getNumber1(string.slice(1));
    if (string[0].match(/[.]/)) return string[0] + getNumber2(string.slice(1));
    return "";
  }
  function getNumber2(string) {
    if (string === "") return "";
    if (string[0].match(/[0-9]/)) return string[0] + getNumber2(string.slice(1));
    return "";
  }
}
function getString(string) {
  const text = string;
  if (string === "") return "";
  if (string[0].match(/["]/)) return string[0] + getString1(string.slice(1));
  return "";
  function getString1(string) {
    if (string === "") throw `syntax error: " is missing in ${text}.`;
    if (string[0].match(/["]/)) return string[0];
    return string[0] + getString1(string.slice(1));
  }
}
function getIdentifier(string) {
  if (string === "") return "";
  if (string[0].match(/[A-Za-z]/)) return string[0] + getIdentifier1(string.slice(1));
  return "";
  function getIdentifier1(string) {
    if (string === "") return "";
    if (string[0].match(/[0-9A-Za-z]/)) return string[0] + getIdentifier1(string.slice(1));
    return "";
  }
}
function getTokens(string) {
  try {
    if (string === "")
      return [];
    else if (getSpace(string) !== "")
      return getTokens(string.slice(getSpace(string).length));
    else if (getSeparator(string) !== "")
      return [getSeparator(string)].concat(getTokens(string.slice(getSeparator(string).length)));
    else if (getOperator(string) !== "")
      return [getOperator(string)].concat(getTokens(string.slice(getOperator(string).length)));
    else if (getNumber(string) !== "")
      return [getNumber(string)].concat(getTokens(string.slice(getNumber(string).length)));
    else if (getString(string) !== "")
      return [getString(string)].concat(getTokens(string.slice(getString(string).length)));
    else if (getIdentifier(string) !== "")
      return [getIdentifier(string)].concat(getTokens(string.slice(getIdentifier(string).length)));
    else
      throw `syntax error: ${string} contains one or more unknown characters.`;
  }
  catch (error) {
    console.error(error);
  }
}
console.log(getTokens("y0 += 12.34 * x1 + x1y2z3 / 5678. i++ a&&b \"abcde\""));
console.log(getTokens("\"abcde"));
console.log(getTokens("abc@#xyz"));
 

 

結果


['y0', '+=', '12.34', '*', 'x1', '+', 'x1y2z3', '/', '5678.', 'i', '++', 'a', '&&', 'b', '"abcde"']
syntax error: " is missing in "abcde.
undefined
syntax error: @#xyz contains one or more unknown characters.
['abc', undefined]