Transparently use escape characters in parser

Fixes #68
2019-11-11 22:37:34 +01:00 · 2019-11-11 22:37:34 +01:00 · a02130585e
parent fea0235d4a
commit a02130585e
3 changed files with 160 additions and 40 deletions
--- a/package.json
+++ b/package.json
@ -1,6 +1,6 @@
 {
    "name": "fwdekker.com",
-    "version": "1.9.2",
+    "version": "1.9.3",
    "description": "The source code of [my personal website](https://fwdekker.com/).",
    "author": "Felix W. Dekker",
    "repository": {
--- a/src/main/js/InputParser.ts
+++ b/src/main/js/InputParser.ts
@ -2,7 +2,6 @@ import {Environment} from "./Environment";
 import {Directory, File, FileSystem, Path} from "./FileSystem";
 import {IllegalArgumentError} from "./Shared";
 import {InputArgs} from "./Shell";
-import {EscapeCharacters} from "./Terminal";


 /**
@ -49,9 +48,13 @@ export class InputParser {
     * @param input the string to parse
     */
    parse(input: string): InputArgs {
-        const tokens = this.tokenizer.tokenize(input);
-        const textTokens = this.globber.glob(tokens.filter(it => it instanceof InputParser.TextToken));
-        const redirectTokens = tokens.filter(it => it instanceof InputParser.RedirectToken);
+        const tokens = this.tokenizer.tokenize(escape(input));
+
+        const textTokens = this.globber.glob(tokens.filter(it => it instanceof InputParser.TextToken))
+            .map(it => new InputParser.TextToken(unescape(it.contents)));
+        const redirectTokens = tokens
+            .filter(it => it instanceof InputParser.RedirectToken)
+            .map(it => new InputParser.RedirectToken(unescape(it.contents)));

        const command = tokens[0]?.contents ?? "";
        const [options, args] = this.parseOpts(textTokens.slice(1));
@ -256,10 +259,13 @@ export class Tokenizer {
                    break;
                case "*":
                case "?":
+                    if (token instanceof InputParser.RedirectToken)
+                        throw new IllegalArgumentError(`Invalid token '${char}' in redirect target.`);
+
                    if (isInSingleQuotes || isInDoubleQuotes)
                        token.contents += char;
                    else
-                        token.contents += EscapeCharacters.Escape + char;
+                        token.contents += InputParser.EscapeChar + char;
                    break;
                case "~":
                    if (isInSingleQuotes || isInDoubleQuotes || isInCurlyBraces || token.contents !== "")
@ -319,12 +325,20 @@ export class Globber {
        return tokens
            .map(it => it.contents)
            .map(token => {
-                    if (token.startsWith("/"))
-                        return this.glob2("/", token.slice(1), new Path("/"));
-                    else
-                        return this.glob2("", token, this.cwd);
-                }
-            )
+                if (!this.isGlob(token))
+                    return [token];
+
+                let tokens: string[];
+                if (token.startsWith("/"))
+                    tokens = this.glob2("/", token.slice(1), new Path("/"));
+                else
+                    tokens = this.glob2("", token, this.cwd);
+
+                if (tokens.length === 0)
+                    throw new IllegalArgumentError(`Token '${unescape(token)}' does not match any files.`);
+
+                return tokens;
+            })
            .reduce((acc, tokens) => acc.concat(tokens), [])
            .map(it => new InputParser.TextToken(it));
    }
@ -338,9 +352,6 @@ export class Globber {
     * @param path the current location in the file system
     */
    private glob2(history: string, glob: string, path: Path): string[] {
-        if (!glob.includes(EscapeCharacters.Escape + "?") && !glob.includes(EscapeCharacters.Escape + "*"))
-            return [history + glob];
-
        const dir = this.fileSystem.get(path);
        if (!(dir instanceof Directory))
            return [history + glob];
@ -353,13 +364,9 @@ export class Globber {
        if (nextPart === "..")
            return this.glob2(history + nextPart + "/", remainder, path.parent);

-        const pattern = nextPart
-            .replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&") // Escape regex from user input
-            .replaceAll(new RegExp(`${EscapeCharacters.Escape}\\\\\\?`), ".")
-            .replaceAll(new RegExp(`${EscapeCharacters.Escape}\\\\\\*`), "[^/]*");
-
        return Object.keys(dir.nodes)
-            .filter(fileName => fileName.match(new RegExp(`^${pattern}$`)))
+            .filter(it => it.match(this.glob2regex(nextPart)))
+            .map(it => escape(it))
            .map(fileName => {
                if (dir.nodes[fileName] instanceof File) {
                    // Only match files if there are no more /s to match
@ -380,6 +387,62 @@ export class Globber {
            })
            .reduce((acc, it) => acc.concat(it), []);
    }
+
+
+    /**
+     * Returns `true` if and only if the given glob string uses any special glob characters.
+     *
+     * @param glob the string to check for globness
+     */
+    private isGlob(glob: string): boolean {
+        for (let i = 0; i < glob.length; i++) {
+            const char = glob[i];
+
+            if (char !== InputParser.EscapeChar)
+                continue;
+
+            i++;
+            const nextChar = glob[i];
+            if (nextChar === "?" || nextChar === "*")
+                return true;
+        }
+
+        return false;
+    }
+
+    /**
+     * Converts a glob string to a regular expression.
+     *
+     * @param glob the glob string to convert
+     */
+    private glob2regex(glob: string): RegExp {
+        let regex = "";
+
+        for (let i = 0; i < glob.length; i++) {
+            const char = glob[i];
+            if (char !== InputParser.EscapeChar) {
+                if ("-\/\\^$*+?.()|[\]{}".includes(char))
+                    regex += "\\" + char;
+                else
+                    regex += char;
+                continue;
+            }
+
+            i++;
+            const nextChar = glob[i];
+            if (nextChar === undefined)
+                throw new IllegalArgumentError("Unescaped escape character inside input parser.");
+
+            if (nextChar === "?")
+                regex += ".";
+            else if (nextChar === "*")
+                regex += "[^/]*";
+            else
+                regex += nextChar;
+        }
+
+        return new RegExp(`^${regex}$`);
+    }
 }


@ -420,3 +483,23 @@ export module InputParser {
        readonly type: string = "redirect";
    }
 }
+
+
+
+/**
+ * Escapes all occurrences of the input parser's escape character.
+ *
+ * @param string the string to escape in
+ */
+function escape(string: string): string {
+    return string.replace(new RegExp(InputParser.EscapeChar, "g"), InputParser.EscapeChar + InputParser.EscapeChar);
+}
+
+/**
+ * Unescapes all occurrences of the input parser's escape character.
+ *
+ * @param string the string to unescape in
+ */
+function unescape(string: string): string {
+    return string.replace(new RegExp(InputParser.EscapeChar + InputParser.EscapeChar, "g"), InputParser.EscapeChar);
+}
--- a/src/test/InputParser.spec.ts
+++ b/src/test/InputParser.spec.ts
@ -4,11 +4,20 @@ import {expect} from "chai";
 import {Environment} from "../main/js/Environment";
 import {Globber, InputParser, Tokenizer} from "../main/js/InputParser";
 import {Directory, File, FileSystem, Node, Path} from "../main/js/FileSystem";
-import {EscapeCharacters} from "../main/js/Terminal";
 import TextToken = InputParser.TextToken;
 import RedirectToken = InputParser.RedirectToken;


+/**
+ * Shorthand for the escape character used internally in the input parser.
+ */
+const escape = InputParser.EscapeChar;
+
+/**
+ * Converts the given strings to text tokens.
+ *
+ * @param strings the strings to convert to text token
+ */
 function tokens(...strings: string[]): InputParser.TextToken[] {
    return strings.map(it => new InputParser.TextToken(it));
 }
@ -71,7 +80,7 @@ describe("input parser", () => {
            });

            it("does not assign a value to grouped short options", () => {
-                expect(() => parser.parse("command -opq=arg -r")).to.throw;
+                expect(() => parser.parse("command -opq=arg -r")).to.throw();
            });

            it("stops parsing options if a short option name contains a space", () => {
@ -252,7 +261,6 @@ describe("input parser", () => {
 });

 describe("tokenizer", () => {
-    const escape = EscapeCharacters.Escape;
    let tokenizer: Tokenizer;


@ -276,7 +284,7 @@ describe("tokenizer", () => {
            });
        });

-        describe("escape characters", () => {
+        describe("input escape characters", () => {
            it("includes escaped spaces into the token", () => {
                expect(tokenizer.tokenize("com\\ mand")).to.have.deep.members(tokens("com mand"));
            });
@ -296,7 +304,7 @@ describe("tokenizer", () => {
            });

            it("throws an error if an escape occurs but no character follows", () => {
-                expect(() => tokenizer.tokenize("\\")).to.throw;
+                expect(() => tokenizer.tokenize("\\")).to.throw();
            });
        });

@ -311,11 +319,11 @@ describe("tokenizer", () => {
                });

                it("throws an error if single quotes are not closed", () => {
-                    expect(() => tokenizer.tokenize("a'ba")).to.throw;
+                    expect(() => tokenizer.tokenize("a'ba")).to.throw();
                });

                it("throws an error if double quotes are not closed", () => {
-                    expect(() => tokenizer.tokenize(`a"ba`)).to.throw;
+                    expect(() => tokenizer.tokenize(`a"ba`)).to.throw();
                });

                it("does not group double quotes within single quotes", () => {
@ -337,15 +345,15 @@ describe("tokenizer", () => {
                });

                it("throws an error if curly braces are not closed", () => {
-                    expect(() => tokenizer.tokenize("a{ba")).to.throw;
+                    expect(() => tokenizer.tokenize("a{ba")).to.throw();
                });

                it("throws an error if curly braces are not opened", () => {
-                    expect(() => tokenizer.tokenize("a}ba")).to.throw;
+                    expect(() => tokenizer.tokenize("a}ba")).to.throw();
                });

                it("throws an error if nested curly braces are not closed", () => {
-                    expect(() => tokenizer.tokenize("a{{b}a")).to.throw;
+                    expect(() => tokenizer.tokenize("a{{b}a")).to.throw();
                });

                it("does not group curly braces within single quotes", () => {
@ -378,7 +386,7 @@ describe("tokenizer", () => {
        });

        it("throws an error for nameless environment variables", () => {
-            expect(() => tokenizer.tokenize("$")).to.throw;
+            expect(() => tokenizer.tokenize("$")).to.throw();
        });

        it("does not substitute environment variables in the middle of a single-quoted string", () => {
@ -425,35 +433,45 @@ describe("tokenizer", () => {
        });
    });

-    describe("escapes", () => {
-        it("escapes output target characters", () => {
+    describe("internal escape characters", () => {
+        it("puts redirect targets in redirect tokens", () => {
            expect(tokenizer.tokenize("a >b")).to.have.deep.members([new TextToken("a"), new RedirectToken(">b")]);
            expect(tokenizer.tokenize("a >>b")).to.have.deep.members([new TextToken("a"), new RedirectToken(">>b")]);
        });

-        it("does not escape escaped target characters", () => {
+        it("does not put escaped redirect targets in redirect tokens", () => {
            expect(tokenizer.tokenize("a \\>b"))
                .to.have.deep.members([new TextToken("a"), new TextToken(">b")]);
            expect(tokenizer.tokenize("a \\>>b"))
                .to.have.deep.members([new TextToken("a"), new TextToken(">"), new RedirectToken(">b")]);
        });

+        it("throws an error if a glob character is used in the redirect target", () => {
+            expect(() => tokenizer.tokenize("a >a?")).to.throw();
+            expect(() => tokenizer.tokenize("a >a*")).to.throw();
+        });
+
+        it("retains the escape character in a redirect target", () => {
+            expect(tokenizer.tokenize(`>${escape}`)[0]).to.deep.equal(new RedirectToken(`>${escape}`));
+        });
+
        it("escapes glob characters", () => {
            expect(tokenizer.tokenize("a b?")).to.have.deep.members(tokens("a", `b${escape}?`));
            expect(tokenizer.tokenize("a b*")).to.have.deep.members(tokens("a", `b${escape}*`));
        });

-        it("does not escape escaped glob characters", () => {
+        it("does not escape user-escaped glob characters", () => {
            expect(tokenizer.tokenize("a b\\?")).to.have.deep.members(tokens("a", "b?"));
            expect(tokenizer.tokenize("a b\\*")).to.have.deep.members(tokens("a", "b*"));
        });
+
+        it("does not escape internally-escaped glob characters", () => {
+            expect(tokenizer.tokenize(`a ${escape}\\?`)).to.have.deep.members(tokens("a", `${escape}?`));
+        });
    });
 });

 describe("globber", () => {
-    const escape = EscapeCharacters.Escape;
-
-
    const createGlobber = function(nodes: { [path: string]: Node } = {}, cwd: string = "/"): Globber {
        const fs = new FileSystem(new Directory());
        for (const path of Object.getOwnPropertyNames(nodes))
@ -464,6 +482,13 @@ describe("globber", () => {


    describe("?", () => {
+        it("does not remove internal escape characters from the output", () => {
+            const globber = createGlobber({[`/${escape}1`]: new File()});
+
+            expect(globber.glob(tokens(`${escape}${escape}${escape}?`)))
+                .to.have.deep.members(tokens(`${escape}${escape}1`));
+        });
+
        it("does not expand unescaped ?s", () => {
            const globber = createGlobber({"/ab": new File()});

@ -544,6 +569,13 @@ describe("globber", () => {
    });

    describe("*", () => {
+        it("does not remove internal escape characters from the output", () => {
+            const globber = createGlobber({[`/${escape}1`]: new File()});
+
+            expect(globber.glob(tokens(`${escape}${escape}${escape}*`)))
+                .to.have.deep.members(tokens(`${escape}${escape}1`));
+        });
+
        it("does not process unescaped *s", () => {
            const globber = createGlobber({"/ab": new File()});

@ -637,13 +669,18 @@ describe("globber", () => {

    describe("shared edge cases", () => {
        it("throws an error if no matches are found", () => {
-            expect(() => createGlobber().glob(tokens(`x${escape}?`))).to.throw;
+            expect(() => createGlobber().glob(tokens(`x${escape}?`))).to.throw();
        });

        it("returns an empty token without change", () => {
            expect(createGlobber().glob(tokens(""))).to.have.deep.members(tokens(""));
        });

+        it("does not remove escape characters from glob-less inputs", () => {
+            expect(createGlobber().glob(tokens(`${escape}${escape}`)))
+                .to.have.deep.members(tokens(`${escape}${escape}`));
+        });
+
        it("returns a glob-less token without change", () => {
            expect(createGlobber().glob(tokens("abc"))).to.have.deep.members(tokens("abc"));
        });