forked from tools/josh
611 lines
20 KiB
TypeScript
611 lines
20 KiB
TypeScript
import {Environment} from "./Environment";
|
|
import {Directory, File, FileSystem, Path} from "./FileSystem";
|
|
import {InputArgs} from "./InputArgs";
|
|
import {IllegalArgumentError} from "./Shared";
|
|
|
|
|
|
/**
|
|
* A parser for input strings.
|
|
*/
|
|
export class InputParser {
|
|
/**
|
|
* The tokenizer turn the input into tokens with.
|
|
*/
|
|
private readonly tokenizer: Tokenizer;
|
|
/**
|
|
* The expander to expand tokens with.
|
|
*/
|
|
private readonly expander: Expander;
|
|
|
|
|
|
/**
|
|
* Constructs a new input parser.
|
|
*
|
|
* Usually, you'll want to use the static `InputParser#create` method instead.
|
|
*
|
|
* @param tokenizer the tokenizer turn the input into tokens with
|
|
* @param expander the expander to expand tokens with
|
|
*/
|
|
constructor(tokenizer: Tokenizer, expander: Expander) {
|
|
this.tokenizer = tokenizer;
|
|
this.expander = expander;
|
|
}
|
|
|
|
/**
|
|
* Constructs a new input parser.
|
|
*
|
|
* @param environment the environment containing the variables to substitute
|
|
* @param fileSystem the file system describing the valid paths to glob
|
|
*/
|
|
static create(environment: Environment, fileSystem: FileSystem): InputParser {
|
|
return new InputParser(
|
|
new Tokenizer(),
|
|
new Expander(environment, new Globber(fileSystem, environment.get("cwd")))
|
|
);
|
|
}
|
|
|
|
|
|
/**
|
|
* Parses the given input string to an array of input arguments to execute.
|
|
*
|
|
* @param input the string to parse
|
|
*/
|
|
parseCommands(input: string): InputArgs[] {
|
|
return this.tokenizer
|
|
.tokenize(escape(input))
|
|
.reduce((acc, token) => {
|
|
if (token === ";")
|
|
acc.push([]);
|
|
else
|
|
acc[acc.length - 1].push(token);
|
|
|
|
return acc;
|
|
}, <string[][]> [[]])
|
|
.filter(tokens => tokens.length !== 0)
|
|
.map(tokens => this.parseCommand(tokens));
|
|
}
|
|
|
|
/**
|
|
* Turns a set of tokens into input arguments to execute.
|
|
*
|
|
* @param tokens the tokens to interpret as a command
|
|
*/
|
|
parseCommand(tokens: string[]): InputArgs {
|
|
const textTokens = tokens.filter(it => !it.match(/^[0-9]*>/))
|
|
.reduce((acc, it) => acc.concat(this.expander.expand(it)), <string[]> [])
|
|
.map(it => unescape(it));
|
|
const redirectTokens = tokens.map(it => unescape(it));
|
|
|
|
const command = tokens[0] ?? "";
|
|
const [options, args] = this.parseOpts(textTokens.slice(1));
|
|
const outTargets = this.getRedirectTargets(redirectTokens);
|
|
|
|
return new InputArgs(command, options, args, outTargets);
|
|
}
|
|
|
|
|
|
/**
|
|
* Returns the redirect target described by the last token that describes a redirect target, or the default redirect
|
|
* target if no token describes a redirect target.
|
|
*
|
|
* @param tokens an array of tokens of which some tokens may describe a redirect target
|
|
*/
|
|
private getRedirectTargets(tokens: string[]): InputArgs.RedirectTarget[] {
|
|
const targets: InputArgs.RedirectTarget[] = [];
|
|
|
|
tokens.forEach(token => {
|
|
const stream = token.startsWith(">") ? 1 : parseInt(token.slice(0, token.indexOf(">")));
|
|
|
|
const target = token.slice(token.indexOf(">"));
|
|
if (target.startsWith(">>"))
|
|
targets[stream] = {type: "append", target: target.slice(2)};
|
|
else if (target.startsWith(">"))
|
|
targets[stream] = {type: "write", target: target.slice(1)};
|
|
});
|
|
|
|
return targets;
|
|
}
|
|
|
|
/**
|
|
* Parses options and arguments.
|
|
*
|
|
* @param tokens the tokens that form the options and arguments
|
|
*/
|
|
private parseOpts(tokens: string[]): [InputArgs.Options, string[]] {
|
|
const options: { [key: string]: string | null } = {};
|
|
|
|
let i;
|
|
for (i = 0; i < tokens.length; i++) {
|
|
const arg = tokens[i];
|
|
|
|
if (!arg.startsWith("-"))
|
|
break;
|
|
if (arg === "--") {
|
|
i++;
|
|
break;
|
|
}
|
|
|
|
const argsParts = arg.split("=");
|
|
if (argsParts.length === 0)
|
|
throw new IllegalArgumentError("Unexpected number of parts.");
|
|
if (argsParts[0].includes(" ") || argsParts[0].match(/[0-9]/))
|
|
break;
|
|
|
|
const value = argsParts.length === 1 ? null : argsParts.slice(1).join("=");
|
|
|
|
if (argsParts[0].startsWith("--")) {
|
|
const key = argsParts[0].substr(2);
|
|
if (key === "")
|
|
break;
|
|
|
|
options[`--${key}`] = value;
|
|
} else {
|
|
const keys = argsParts[0].substr(1);
|
|
if (keys === "")
|
|
break;
|
|
|
|
if (keys.length === 1) {
|
|
options[`-${keys}`] = value;
|
|
} else {
|
|
if (value !== null)
|
|
throw new IllegalArgumentError("Cannot assign value to multiple short options.");
|
|
|
|
for (const key of keys)
|
|
options[`-${key}`] = value;
|
|
}
|
|
}
|
|
}
|
|
|
|
return [options, tokens.slice(i)];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Turns an input string into a series of tokens.
|
|
*/
|
|
export class Tokenizer {
|
|
/**
|
|
* Separates the input string into a series of tokens, respecting the semantics of grouping, redirection parameters,
|
|
* etc.
|
|
*
|
|
* Joining the returned array with spaces in between will give back the input string, disregarding extra whitespaces
|
|
* in between tokens. That is, no bytes are added, removed, or escaped inside tokens.
|
|
*
|
|
* @param input the string to tokenize
|
|
* @return the tokens
|
|
*/
|
|
tokenize(input: string): string[] {
|
|
const tokens: string[] = [];
|
|
|
|
let token = "";
|
|
let isInSingleQuotes = false;
|
|
let isInDoubleQuotes = false;
|
|
let isInCurlyBraces = 0;
|
|
for (let i = 0; i < input.length; i++) {
|
|
const char = input[i];
|
|
switch (char) {
|
|
// Escape character
|
|
case "\\":
|
|
i++;
|
|
const nextChar = input[i];
|
|
if (nextChar === undefined)
|
|
throw new IllegalArgumentError(
|
|
"Unexpected end of input. '\\' was used but there was nothing to escape.");
|
|
|
|
token += char + nextChar;
|
|
break;
|
|
// Grouping
|
|
case "'":
|
|
if (!isInDoubleQuotes)
|
|
isInSingleQuotes = !isInSingleQuotes;
|
|
|
|
token += char;
|
|
break;
|
|
case "\"":
|
|
if (!isInSingleQuotes)
|
|
isInDoubleQuotes = !isInDoubleQuotes;
|
|
|
|
token += char;
|
|
break;
|
|
case "{":
|
|
if (!isInSingleQuotes && !isInDoubleQuotes)
|
|
isInCurlyBraces++;
|
|
|
|
token += char;
|
|
break;
|
|
case "}":
|
|
if (!isInSingleQuotes && !isInDoubleQuotes) {
|
|
isInCurlyBraces--;
|
|
|
|
if (isInCurlyBraces < 0)
|
|
throw new IllegalArgumentError("Unexpected closing '}' without corresponding '{'.");
|
|
}
|
|
|
|
token += char;
|
|
break;
|
|
// Separators
|
|
case " ":
|
|
if (isInSingleQuotes || isInDoubleQuotes || isInCurlyBraces > 0) {
|
|
token += char;
|
|
} else {
|
|
if (token !== "")
|
|
tokens.push(token);
|
|
|
|
token = "";
|
|
}
|
|
break;
|
|
case ";":
|
|
if (isInSingleQuotes || isInDoubleQuotes || isInCurlyBraces > 0) {
|
|
token += char;
|
|
} else {
|
|
if (token !== "")
|
|
tokens.push(token);
|
|
|
|
if (tokens.length !== 0 && tokens[tokens.length - 1] !== ";")
|
|
tokens.push(char);
|
|
|
|
token = "";
|
|
}
|
|
break;
|
|
// Redirection
|
|
case ">":
|
|
if (isInSingleQuotes || isInDoubleQuotes || isInCurlyBraces > 0) {
|
|
token += char;
|
|
break;
|
|
}
|
|
|
|
if (token !== "" && !token.match(/^[0-9]+$/)) {
|
|
tokens.push(token);
|
|
token = "";
|
|
}
|
|
|
|
token += ">";
|
|
if (input[i + 1] === ">") {
|
|
token += ">";
|
|
i++;
|
|
}
|
|
while (input[i + 1] === " ")
|
|
i++;
|
|
|
|
break;
|
|
// Miscellaneous character
|
|
default:
|
|
token += char;
|
|
break;
|
|
}
|
|
}
|
|
if (token !== "")
|
|
tokens.push(token);
|
|
|
|
if (isInSingleQuotes)
|
|
throw new IllegalArgumentError("Unexpected end of input. Missing closing '.");
|
|
if (isInDoubleQuotes)
|
|
throw new IllegalArgumentError("Unexpected end of input. Missing closing \".");
|
|
if (isInCurlyBraces > 0)
|
|
throw new IllegalArgumentError("Unexpected end of input. Missing closing }.");
|
|
|
|
return tokens;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Expands individual tokens.
|
|
*/
|
|
export class Expander {
|
|
/**
|
|
* The environment containing the variables to substitute.
|
|
*/
|
|
private readonly environment: Environment;
|
|
/**
|
|
* The globber to expand glob patterns with.
|
|
*/
|
|
private readonly globber: Globber;
|
|
|
|
|
|
/**
|
|
* Constructs a new tokenizer.
|
|
*
|
|
* @param environment the environment containing the variables to substitute
|
|
* @param globber the globber to expand glob patterns with
|
|
*/
|
|
constructor(environment: Environment, globber: Globber) {
|
|
this.environment = environment;
|
|
this.globber = globber;
|
|
}
|
|
|
|
|
|
/**
|
|
* Expands environment variables and glob patterns in the given token.
|
|
*
|
|
* It is assumed that the given token is valid; for example, its quotes and brackets should match.
|
|
*
|
|
* @param token the valid token to expand
|
|
*/
|
|
expand(token: string): string[] {
|
|
let expandedToken = "";
|
|
|
|
let isInSingleQuotes = false;
|
|
let isInDoubleQuotes = false;
|
|
let isInCurlyBraces = 0;
|
|
for (let i = 0; i < token.length; i++) {
|
|
const char = token[i];
|
|
switch (char) {
|
|
// Escape character
|
|
case "\\":
|
|
i++;
|
|
const nextChar = token[i];
|
|
|
|
if (isInSingleQuotes || isInDoubleQuotes) {
|
|
if ((isInSingleQuotes && nextChar === "'") || (isInDoubleQuotes && nextChar === "\""))
|
|
expandedToken += nextChar;
|
|
else
|
|
expandedToken += char + nextChar;
|
|
break;
|
|
}
|
|
|
|
switch (nextChar) {
|
|
case "\\":
|
|
case " ":
|
|
case ";":
|
|
case "~":
|
|
case "$":
|
|
case ">":
|
|
case "?":
|
|
case "*":
|
|
case "'":
|
|
case "\"":
|
|
case "{":
|
|
case "}":
|
|
expandedToken += nextChar;
|
|
break;
|
|
default:
|
|
expandedToken += char + nextChar;
|
|
break;
|
|
}
|
|
break;
|
|
// Grouping
|
|
case "'":
|
|
if (!isInDoubleQuotes)
|
|
isInSingleQuotes = !isInSingleQuotes;
|
|
else
|
|
expandedToken += char;
|
|
break;
|
|
case "\"":
|
|
if (!isInSingleQuotes)
|
|
isInDoubleQuotes = !isInDoubleQuotes;
|
|
else
|
|
expandedToken += char;
|
|
break;
|
|
case "{":
|
|
if (!isInSingleQuotes && !isInDoubleQuotes)
|
|
isInCurlyBraces++;
|
|
else
|
|
expandedToken += char;
|
|
break;
|
|
case "}":
|
|
if (!isInSingleQuotes && !isInDoubleQuotes)
|
|
isInCurlyBraces--;
|
|
else
|
|
expandedToken += char;
|
|
break;
|
|
// Environment variable
|
|
case "$":
|
|
if (isInSingleQuotes) {
|
|
expandedToken += char;
|
|
break;
|
|
}
|
|
|
|
let key = "";
|
|
for (; i + 1 < token.length; i++) {
|
|
const nextChar = token[i + 1];
|
|
if (nextChar.match(/^[0-9a-z_]+$/i))
|
|
key += nextChar;
|
|
else
|
|
break;
|
|
}
|
|
if (key === "")
|
|
throw new IllegalArgumentError("Missing variable name after '$'.");
|
|
|
|
expandedToken += this.environment.getOrDefault(key, "");
|
|
break;
|
|
// Glob characters
|
|
case "*":
|
|
case "?":
|
|
if (isInSingleQuotes || isInDoubleQuotes)
|
|
expandedToken += char;
|
|
else
|
|
expandedToken += InputParser.EscapeChar + char;
|
|
break;
|
|
// Home directory
|
|
case "~":
|
|
if (isInSingleQuotes || isInDoubleQuotes || isInCurlyBraces > 0 || expandedToken !== "")
|
|
expandedToken += char;
|
|
else if (token[i + 1] === undefined || token[i + 1] === "/")
|
|
expandedToken += this.environment.get("home");
|
|
else
|
|
expandedToken += char;
|
|
break;
|
|
// Miscellaneous character
|
|
default:
|
|
expandedToken += char;
|
|
break;
|
|
}
|
|
}
|
|
|
|
const tokens = this.globber.glob(expandedToken);
|
|
if (tokens.length === 0)
|
|
throw new IllegalArgumentError(`Token '${unescape(expandedToken)}' does not match any files.`);
|
|
|
|
return tokens;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Globs file paths in tokens.
|
|
*/
|
|
export class Globber {
|
|
/**
|
|
* The file system describing the valid paths to glob.
|
|
*/
|
|
private readonly fileSystem: FileSystem;
|
|
/**
|
|
* The path to the current working directory to which globbing is relative.
|
|
*/
|
|
private readonly cwd: Path;
|
|
|
|
|
|
/**
|
|
* Constructs a new globber.
|
|
*
|
|
* @param fileSystem the file system describing the valid paths to glob
|
|
* @param cwd the path to the current working directory to which globbing is relative
|
|
*/
|
|
constructor(fileSystem: FileSystem, cwd: string) {
|
|
this.fileSystem = fileSystem;
|
|
this.cwd = new Path(cwd);
|
|
}
|
|
|
|
|
|
/**
|
|
* Returns globbed tokens.
|
|
*
|
|
* @param token the token to glob
|
|
*/
|
|
glob(token: string): string[] {
|
|
if (!this.isGlob(token))
|
|
return [token];
|
|
|
|
return token.startsWith("/")
|
|
? this.glob2("/", token.slice(1), new Path("/"))
|
|
: this.glob2("", token, this.cwd);
|
|
}
|
|
|
|
|
|
/**
|
|
* Recursively traverses the given path according to the glob pattern provided, keeping track of file system
|
|
* location with the given path, and returns all paths that match the glob pattern.
|
|
*
|
|
* @param history the "de-globbed" pattern until now; must end with a slash in between recursive calls
|
|
* @param glob the glob pattern that is still to be traversed
|
|
* @param path the current location in the file system
|
|
*/
|
|
private glob2(history: string, glob: string, path: Path): string[] {
|
|
const dir = this.fileSystem.get(path);
|
|
if (!(dir instanceof Directory))
|
|
return [];
|
|
|
|
const nextPart = glob.includes("/") ? glob.substring(0, glob.indexOf("/")) : glob; // excluding /
|
|
const remainder = glob.includes("/") ? glob.substring(glob.indexOf("/") + 1) : ""; // excluding /
|
|
|
|
if (nextPart === ".")
|
|
return this.glob2(history + nextPart + "/", remainder, path);
|
|
if (nextPart === "..")
|
|
return this.glob2(history + nextPart + "/", remainder, path.parent);
|
|
|
|
return Object.keys(dir.nodes)
|
|
.filter(it => it.match(this.glob2regex(nextPart)) && (it.startsWith(".") == nextPart.startsWith(".")))
|
|
.map(it => escape(it))
|
|
.map(fileName => {
|
|
if (dir.nodes[fileName] instanceof File) {
|
|
// Only match files if there are no more /s to match
|
|
if (!glob.includes("/"))
|
|
return [history + fileName];
|
|
return <string[]> [];
|
|
}
|
|
|
|
// Only recurse if there is still recurring to do
|
|
if (remainder !== "")
|
|
return this.glob2(`${history}${fileName}/`, remainder, path.getChild(fileName));
|
|
|
|
// Add / depending on user input
|
|
if (glob.includes("/"))
|
|
return [history + fileName + "/"];
|
|
else
|
|
return [history + fileName];
|
|
})
|
|
.reduce((acc, it) => acc.concat(it), []);
|
|
}
|
|
|
|
|
|
/**
|
|
* Returns `true` if and only if the given glob string uses any special glob characters.
|
|
*
|
|
* @param glob the string to check for globness
|
|
*/
|
|
private isGlob(glob: string): boolean {
|
|
for (let i = 0; i < glob.length; i++) {
|
|
const char = glob[i];
|
|
|
|
if (char !== InputParser.EscapeChar)
|
|
continue;
|
|
|
|
i++;
|
|
const nextChar = glob[i];
|
|
if (nextChar === "?" || nextChar === "*")
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Converts a glob string to a regular expression.
|
|
*
|
|
* @param glob the glob string to convert
|
|
*/
|
|
private glob2regex(glob: string): RegExp {
|
|
let regex = "";
|
|
|
|
for (let i = 0; i < glob.length; i++) {
|
|
const char = glob[i];
|
|
if (char !== InputParser.EscapeChar) {
|
|
if ("-\/\\^$*+?.()|[\]{}".includes(char))
|
|
regex += "\\" + char;
|
|
else
|
|
regex += char;
|
|
continue;
|
|
}
|
|
|
|
i++;
|
|
const nextChar = glob[i];
|
|
if (nextChar === undefined)
|
|
throw new IllegalArgumentError("Unescaped escape character inside input parser.");
|
|
|
|
if (nextChar === "?")
|
|
regex += ".";
|
|
else if (nextChar === "*")
|
|
regex += "[^/]*";
|
|
else
|
|
regex += nextChar;
|
|
}
|
|
|
|
return new RegExp(`^${regex}$`);
|
|
}
|
|
}
|
|
|
|
|
|
export module InputParser {
|
|
/**
|
|
* The token used to internally escape characters in the input parser.
|
|
*/
|
|
export const EscapeChar = "\u001b";
|
|
}
|
|
|
|
/**
|
|
* Escapes all occurrences of the input parser's escape character.
|
|
*
|
|
* @param string the string to escape in
|
|
*/
|
|
function escape(string: string): string {
|
|
return string.replace(new RegExp(InputParser.EscapeChar, "g"), InputParser.EscapeChar + InputParser.EscapeChar);
|
|
}
|
|
|
|
/**
|
|
* Unescapes all occurrences of the input parser's escape character.
|
|
*
|
|
* @param string the string to unescape in
|
|
*/
|
|
function unescape(string: string): string {
|
|
return string.replace(new RegExp(InputParser.EscapeChar + InputParser.EscapeChar, "g"), InputParser.EscapeChar);
|
|
}
|