forked from tools/josh
1
0
Fork 0

Transparently use escape characters in parser

Fixes #68
This commit is contained in:
Florine W. Dekker 2019-11-11 22:37:34 +01:00
parent fea0235d4a
commit a02130585e
Signed by: FWDekker
GPG Key ID: B1B567AF58D6EE0F
3 changed files with 160 additions and 40 deletions

View File

@ -1,6 +1,6 @@
{
"name": "fwdekker.com",
"version": "1.9.2",
"version": "1.9.3",
"description": "The source code of [my personal website](https://fwdekker.com/).",
"author": "Felix W. Dekker",
"repository": {

View File

@ -2,7 +2,6 @@ import {Environment} from "./Environment";
import {Directory, File, FileSystem, Path} from "./FileSystem";
import {IllegalArgumentError} from "./Shared";
import {InputArgs} from "./Shell";
import {EscapeCharacters} from "./Terminal";
/**
@ -49,9 +48,13 @@ export class InputParser {
* @param input the string to parse
*/
parse(input: string): InputArgs {
const tokens = this.tokenizer.tokenize(input);
const textTokens = this.globber.glob(tokens.filter(it => it instanceof InputParser.TextToken));
const redirectTokens = tokens.filter(it => it instanceof InputParser.RedirectToken);
const tokens = this.tokenizer.tokenize(escape(input));
const textTokens = this.globber.glob(tokens.filter(it => it instanceof InputParser.TextToken))
.map(it => new InputParser.TextToken(unescape(it.contents)));
const redirectTokens = tokens
.filter(it => it instanceof InputParser.RedirectToken)
.map(it => new InputParser.RedirectToken(unescape(it.contents)));
const command = tokens[0]?.contents ?? "";
const [options, args] = this.parseOpts(textTokens.slice(1));
@ -256,10 +259,13 @@ export class Tokenizer {
break;
case "*":
case "?":
if (token instanceof InputParser.RedirectToken)
throw new IllegalArgumentError(`Invalid token '${char}' in redirect target.`);
if (isInSingleQuotes || isInDoubleQuotes)
token.contents += char;
else
token.contents += EscapeCharacters.Escape + char;
token.contents += InputParser.EscapeChar + char;
break;
case "~":
if (isInSingleQuotes || isInDoubleQuotes || isInCurlyBraces || token.contents !== "")
@ -319,12 +325,20 @@ export class Globber {
return tokens
.map(it => it.contents)
.map(token => {
if (token.startsWith("/"))
return this.glob2("/", token.slice(1), new Path("/"));
else
return this.glob2("", token, this.cwd);
}
)
if (!this.isGlob(token))
return [token];
let tokens: string[];
if (token.startsWith("/"))
tokens = this.glob2("/", token.slice(1), new Path("/"));
else
tokens = this.glob2("", token, this.cwd);
if (tokens.length === 0)
throw new IllegalArgumentError(`Token '${unescape(token)}' does not match any files.`);
return tokens;
})
.reduce((acc, tokens) => acc.concat(tokens), [])
.map(it => new InputParser.TextToken(it));
}
@ -338,9 +352,6 @@ export class Globber {
* @param path the current location in the file system
*/
private glob2(history: string, glob: string, path: Path): string[] {
if (!glob.includes(EscapeCharacters.Escape + "?") && !glob.includes(EscapeCharacters.Escape + "*"))
return [history + glob];
const dir = this.fileSystem.get(path);
if (!(dir instanceof Directory))
return [history + glob];
@ -353,13 +364,9 @@ export class Globber {
if (nextPart === "..")
return this.glob2(history + nextPart + "/", remainder, path.parent);
const pattern = nextPart
.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&") // Escape regex from user input
.replaceAll(new RegExp(`${EscapeCharacters.Escape}\\\\\\?`), ".")
.replaceAll(new RegExp(`${EscapeCharacters.Escape}\\\\\\*`), "[^/]*");
return Object.keys(dir.nodes)
.filter(fileName => fileName.match(new RegExp(`^${pattern}$`)))
.filter(it => it.match(this.glob2regex(nextPart)))
.map(it => escape(it))
.map(fileName => {
if (dir.nodes[fileName] instanceof File) {
// Only match files if there are no more /s to match
@ -380,6 +387,62 @@ export class Globber {
})
.reduce((acc, it) => acc.concat(it), []);
}
/**
* Returns `true` if and only if the given glob string uses any special glob characters.
*
* @param glob the string to check for globness
*/
private isGlob(glob: string): boolean {
for (let i = 0; i < glob.length; i++) {
const char = glob[i];
if (char !== InputParser.EscapeChar)
continue;
i++;
const nextChar = glob[i];
if (nextChar === "?" || nextChar === "*")
return true;
}
return false;
}
/**
* Converts a glob string to a regular expression.
*
* @param glob the glob string to convert
*/
private glob2regex(glob: string): RegExp {
let regex = "";
for (let i = 0; i < glob.length; i++) {
const char = glob[i];
if (char !== InputParser.EscapeChar) {
if ("-\/\\^$*+?.()|[\]{}".includes(char))
regex += "\\" + char;
else
regex += char;
continue;
}
i++;
const nextChar = glob[i];
if (nextChar === undefined)
throw new IllegalArgumentError("Unescaped escape character inside input parser.");
if (nextChar === "?")
regex += ".";
else if (nextChar === "*")
regex += "[^/]*";
else
regex += nextChar;
}
return new RegExp(`^${regex}$`);
}
}
@ -420,3 +483,23 @@ export module InputParser {
readonly type: string = "redirect";
}
}
/**
* Escapes all occurrences of the input parser's escape character.
*
* @param string the string to escape in
*/
function escape(string: string): string {
return string.replace(new RegExp(InputParser.EscapeChar, "g"), InputParser.EscapeChar + InputParser.EscapeChar);
}
/**
* Unescapes all occurrences of the input parser's escape character.
*
* @param string the string to unescape in
*/
function unescape(string: string): string {
return string.replace(new RegExp(InputParser.EscapeChar + InputParser.EscapeChar, "g"), InputParser.EscapeChar);
}

View File

@ -4,11 +4,20 @@ import {expect} from "chai";
import {Environment} from "../main/js/Environment";
import {Globber, InputParser, Tokenizer} from "../main/js/InputParser";
import {Directory, File, FileSystem, Node, Path} from "../main/js/FileSystem";
import {EscapeCharacters} from "../main/js/Terminal";
import TextToken = InputParser.TextToken;
import RedirectToken = InputParser.RedirectToken;
/**
* Shorthand for the escape character used internally in the input parser.
*/
const escape = InputParser.EscapeChar;
/**
* Converts the given strings to text tokens.
*
* @param strings the strings to convert to text token
*/
function tokens(...strings: string[]): InputParser.TextToken[] {
return strings.map(it => new InputParser.TextToken(it));
}
@ -71,7 +80,7 @@ describe("input parser", () => {
});
it("does not assign a value to grouped short options", () => {
expect(() => parser.parse("command -opq=arg -r")).to.throw;
expect(() => parser.parse("command -opq=arg -r")).to.throw();
});
it("stops parsing options if a short option name contains a space", () => {
@ -252,7 +261,6 @@ describe("input parser", () => {
});
describe("tokenizer", () => {
const escape = EscapeCharacters.Escape;
let tokenizer: Tokenizer;
@ -276,7 +284,7 @@ describe("tokenizer", () => {
});
});
describe("escape characters", () => {
describe("input escape characters", () => {
it("includes escaped spaces into the token", () => {
expect(tokenizer.tokenize("com\\ mand")).to.have.deep.members(tokens("com mand"));
});
@ -296,7 +304,7 @@ describe("tokenizer", () => {
});
it("throws an error if an escape occurs but no character follows", () => {
expect(() => tokenizer.tokenize("\\")).to.throw;
expect(() => tokenizer.tokenize("\\")).to.throw();
});
});
@ -311,11 +319,11 @@ describe("tokenizer", () => {
});
it("throws an error if single quotes are not closed", () => {
expect(() => tokenizer.tokenize("a'ba")).to.throw;
expect(() => tokenizer.tokenize("a'ba")).to.throw();
});
it("throws an error if double quotes are not closed", () => {
expect(() => tokenizer.tokenize(`a"ba`)).to.throw;
expect(() => tokenizer.tokenize(`a"ba`)).to.throw();
});
it("does not group double quotes within single quotes", () => {
@ -337,15 +345,15 @@ describe("tokenizer", () => {
});
it("throws an error if curly braces are not closed", () => {
expect(() => tokenizer.tokenize("a{ba")).to.throw;
expect(() => tokenizer.tokenize("a{ba")).to.throw();
});
it("throws an error if curly braces are not opened", () => {
expect(() => tokenizer.tokenize("a}ba")).to.throw;
expect(() => tokenizer.tokenize("a}ba")).to.throw();
});
it("throws an error if nested curly braces are not closed", () => {
expect(() => tokenizer.tokenize("a{{b}a")).to.throw;
expect(() => tokenizer.tokenize("a{{b}a")).to.throw();
});
it("does not group curly braces within single quotes", () => {
@ -378,7 +386,7 @@ describe("tokenizer", () => {
});
it("throws an error for nameless environment variables", () => {
expect(() => tokenizer.tokenize("$")).to.throw;
expect(() => tokenizer.tokenize("$")).to.throw();
});
it("does not substitute environment variables in the middle of a single-quoted string", () => {
@ -425,35 +433,45 @@ describe("tokenizer", () => {
});
});
describe("escapes", () => {
it("escapes output target characters", () => {
describe("internal escape characters", () => {
it("puts redirect targets in redirect tokens", () => {
expect(tokenizer.tokenize("a >b")).to.have.deep.members([new TextToken("a"), new RedirectToken(">b")]);
expect(tokenizer.tokenize("a >>b")).to.have.deep.members([new TextToken("a"), new RedirectToken(">>b")]);
});
it("does not escape escaped target characters", () => {
it("does not put escaped redirect targets in redirect tokens", () => {
expect(tokenizer.tokenize("a \\>b"))
.to.have.deep.members([new TextToken("a"), new TextToken(">b")]);
expect(tokenizer.tokenize("a \\>>b"))
.to.have.deep.members([new TextToken("a"), new TextToken(">"), new RedirectToken(">b")]);
});
it("throws an error if a glob character is used in the redirect target", () => {
expect(() => tokenizer.tokenize("a >a?")).to.throw();
expect(() => tokenizer.tokenize("a >a*")).to.throw();
});
it("retains the escape character in a redirect target", () => {
expect(tokenizer.tokenize(`>${escape}`)[0]).to.deep.equal(new RedirectToken(`>${escape}`));
});
it("escapes glob characters", () => {
expect(tokenizer.tokenize("a b?")).to.have.deep.members(tokens("a", `b${escape}?`));
expect(tokenizer.tokenize("a b*")).to.have.deep.members(tokens("a", `b${escape}*`));
});
it("does not escape escaped glob characters", () => {
it("does not escape user-escaped glob characters", () => {
expect(tokenizer.tokenize("a b\\?")).to.have.deep.members(tokens("a", "b?"));
expect(tokenizer.tokenize("a b\\*")).to.have.deep.members(tokens("a", "b*"));
});
it("does not escape internally-escaped glob characters", () => {
expect(tokenizer.tokenize(`a ${escape}\\?`)).to.have.deep.members(tokens("a", `${escape}?`));
});
});
});
describe("globber", () => {
const escape = EscapeCharacters.Escape;
const createGlobber = function(nodes: { [path: string]: Node } = {}, cwd: string = "/"): Globber {
const fs = new FileSystem(new Directory());
for (const path of Object.getOwnPropertyNames(nodes))
@ -464,6 +482,13 @@ describe("globber", () => {
describe("?", () => {
it("does not remove internal escape characters from the output", () => {
const globber = createGlobber({[`/${escape}1`]: new File()});
expect(globber.glob(tokens(`${escape}${escape}${escape}?`)))
.to.have.deep.members(tokens(`${escape}${escape}1`));
});
it("does not expand unescaped ?s", () => {
const globber = createGlobber({"/ab": new File()});
@ -544,6 +569,13 @@ describe("globber", () => {
});
describe("*", () => {
it("does not remove internal escape characters from the output", () => {
const globber = createGlobber({[`/${escape}1`]: new File()});
expect(globber.glob(tokens(`${escape}${escape}${escape}*`)))
.to.have.deep.members(tokens(`${escape}${escape}1`));
});
it("does not process unescaped *s", () => {
const globber = createGlobber({"/ab": new File()});
@ -637,13 +669,18 @@ describe("globber", () => {
describe("shared edge cases", () => {
it("throws an error if no matches are found", () => {
expect(() => createGlobber().glob(tokens(`x${escape}?`))).to.throw;
expect(() => createGlobber().glob(tokens(`x${escape}?`))).to.throw();
});
it("returns an empty token without change", () => {
expect(createGlobber().glob(tokens(""))).to.have.deep.members(tokens(""));
});
it("does not remove escape characters from glob-less inputs", () => {
expect(createGlobber().glob(tokens(`${escape}${escape}`)))
.to.have.deep.members(tokens(`${escape}${escape}`));
});
it("returns a glob-less token without change", () => {
expect(createGlobber().glob(tokens("abc"))).to.have.deep.members(tokens("abc"));
});