JavaScriptで字句解析
関数の再帰呼び出しを使った簡易の字句解析器です。対応している字句は下記のようになります。
〈対応している字句〉
- 空白文字 - セパレータ(,.:;(){}[]) - 演算子(=<>!&|+\-*/%)※2回連続もOK:+=, +>, %& - 数値(12, 34.56, 78.) - 文字列("abcde") - 識別子(先頭は英字:x1, abc123) 上記以外はエラーになります。
lexer.js
function getSpace(string) {
if (string === "") return "";
if (string[0].match(/[\s]/)) return string[0] + getSpace(string.slice(1));
return "";
}
function getSeparator(string) {
if (string === "") return "";
if (string[0].match(/[,.:;(){}\[\]]/)) return string[0];
return "";
}
function getOperator(string) {
if (string === "") return "";
if (string[0].match(/[=<>!&|+\-*/%]/)) return string[0] + getOperator1(string.slice(1));
return "";
function getOperator1(string) {
if (string === "") return "";
if (string[0].match(/[=<>!&|+\-*/%]/)) return string[0];
return "";
}
}
function getNumber(string) {
if (string === "") return "";
if (string[0].match(/[0-9]/)) return string[0] + getNumber1(string.slice(1));
return "";
function getNumber1(string) {
if (string === "") return "";
if (string[0].match(/[0-9]/)) return string[0] + getNumber1(string.slice(1));
if (string[0].match(/[.]/)) return string[0] + getNumber2(string.slice(1));
return "";
}
function getNumber2(string) {
if (string === "") return "";
if (string[0].match(/[0-9]/)) return string[0] + getNumber2(string.slice(1));
return "";
}
}
function getString(string) {
const text = string;
if (string === "") return "";
if (string[0].match(/["]/)) return string[0] + getString1(string.slice(1));
return "";
function getString1(string) {
if (string === "") throw `syntax error: " is missing in ${text}.`;
if (string[0].match(/["]/)) return string[0];
return string[0] + getString1(string.slice(1));
}
}
function getIdentifier(string) {
if (string === "") return "";
if (string[0].match(/[A-Za-z]/)) return string[0] + getIdentifier1(string.slice(1));
return "";
function getIdentifier1(string) {
if (string === "") return "";
if (string[0].match(/[0-9A-Za-z]/)) return string[0] + getIdentifier1(string.slice(1));
return "";
}
}
function getTokens(string) {
try {
if (string === "")
return [];
else if (getSpace(string) !== "")
return getTokens(string.slice(getSpace(string).length));
else if (getSeparator(string) !== "")
return [getSeparator(string)].concat(getTokens(string.slice(getSeparator(string).length)));
else if (getOperator(string) !== "")
return [getOperator(string)].concat(getTokens(string.slice(getOperator(string).length)));
else if (getNumber(string) !== "")
return [getNumber(string)].concat(getTokens(string.slice(getNumber(string).length)));
else if (getString(string) !== "")
return [getString(string)].concat(getTokens(string.slice(getString(string).length)));
else if (getIdentifier(string) !== "")
return [getIdentifier(string)].concat(getTokens(string.slice(getIdentifier(string).length)));
else
throw `syntax error: ${string} contains one or more unknown characters.`;
}
catch (error) {
console.error(error);
}
}
console.log(getTokens("y0 += 12.34 * x1 + x1y2z3 / 5678. i++ a&&b \"abcde\""));
console.log(getTokens("\"abcde"));
console.log(getTokens("abc@#xyz"));
結果
['y0', '+=', '12.34', '*', 'x1', '+', 'x1y2z3', '/', '5678.', 'i', '++', 'a', '&&', 'b', '"abcde"'] syntax error: " is missing in "abcde. undefined syntax error: @#xyz contains one or more unknown characters. ['abc', undefined]