508 lines
15 KiB
Rust
508 lines
15 KiB
Rust
|
// Copyright 2018 Tomasz Miąsko
|
|||
|
//
|
|||
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE>
|
|||
|
// or the MIT license <LICENSE-MIT>, at your option.
|
|||
|
//
|
|||
|
//! Process command line according to parsing rules of Unix shell as specified
|
|||
|
//! in [Shell Command Language in POSIX.1-2008][posix-shell].
|
|||
|
//!
|
|||
|
//! [posix-shell]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
|
|||
|
|
|||
|
#![cfg_attr(not(feature = "std"), no_std)]
|
|||
|
#![forbid(unsafe_code)]
|
|||
|
|
|||
|
#[cfg(feature = "std")]
|
|||
|
extern crate core;
|
|||
|
|
|||
|
use core::fmt;
|
|||
|
use core::mem;
|
|||
|
|
|||
|
#[cfg(not(feature = "std"))]
|
|||
|
#[macro_use]
|
|||
|
extern crate alloc;
|
|||
|
|
|||
|
#[cfg(not(feature = "std"))]
|
|||
|
use alloc::string::String;
|
|||
|
#[cfg(not(feature = "std"))]
|
|||
|
use alloc::vec::Vec;
|
|||
|
|
|||
|
#[cfg(not(feature = "std"))]
|
|||
|
use alloc::borrow::Cow;
|
|||
|
#[cfg(feature = "std")]
|
|||
|
use std::borrow::Cow;
|
|||
|
|
|||
|
/// An error returned when shell parsing fails.
|
|||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|||
|
pub struct ParseError;
|
|||
|
|
|||
|
impl fmt::Display for ParseError {
|
|||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|||
|
f.write_str("missing closing quote")
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
#[cfg(feature = "std")]
|
|||
|
impl std::error::Error for ParseError {}
|
|||
|
|
|||
|
enum State {
|
|||
|
/// Within a delimiter.
|
|||
|
Delimiter,
|
|||
|
/// After backslash, but before starting word.
|
|||
|
Backslash,
|
|||
|
/// Within an unquoted word.
|
|||
|
Unquoted,
|
|||
|
/// After backslash in an unquoted word.
|
|||
|
UnquotedBackslash,
|
|||
|
/// Within a single quoted word.
|
|||
|
SingleQuoted,
|
|||
|
/// Within a double quoted word.
|
|||
|
DoubleQuoted,
|
|||
|
/// After backslash inside a double quoted word.
|
|||
|
DoubleQuotedBackslash,
|
|||
|
/// Inside a comment.
|
|||
|
Comment,
|
|||
|
}
|
|||
|
|
|||
|
/// Splits command line into separate arguments, in much the same way Unix shell
|
|||
|
/// would, but without many of expansion the shell would perform.
|
|||
|
///
|
|||
|
/// The split functionality is compatible with behaviour of Unix shell, but with
|
|||
|
/// word expansions limited to quote removal, and without special token
|
|||
|
/// recognition rules for operators.
|
|||
|
///
|
|||
|
/// The result is exactly the same as one obtained from Unix shell as long as
|
|||
|
/// those unsupported features are not present in input: no operators, no
|
|||
|
/// variable assignments, no tilde expansion, no parameter expansion, no command
|
|||
|
/// substitution, no arithmetic expansion, no pathname expansion.
|
|||
|
///
|
|||
|
/// In case those unsupported shell features are present, the syntax that
|
|||
|
/// introduce them is interpreted literally.
|
|||
|
///
|
|||
|
/// # Errors
|
|||
|
///
|
|||
|
/// When input contains unmatched quote, an error is returned.
|
|||
|
///
|
|||
|
/// # Compatibility with other implementations
|
|||
|
///
|
|||
|
/// It should be fully compatible with g_shell_parse_argv from GLib, except that
|
|||
|
/// in GLib it is an error not to have any words after tokenization.
|
|||
|
///
|
|||
|
/// It is also very close to shlex.split available in Python standard library,
|
|||
|
/// when used in POSIX mode with support for comments. Though, shlex
|
|||
|
/// implementation diverges from POSIX, and from implementation contained herein
|
|||
|
/// in three aspects. First, it doesn't support line continuations.
|
|||
|
/// Second, inside double quotes, the backslash characters retains its special
|
|||
|
/// meaning as an escape character only when followed by \\ or \", whereas POSIX
|
|||
|
/// specifies that it should retain its special meaning when followed by: $, \`,
|
|||
|
/// \", \\, or a newline. Third, it treats carriage return as one of delimiters.
|
|||
|
///
|
|||
|
/// # Examples
|
|||
|
///
|
|||
|
/// Building an executable using compiler obtained from CC environment variable
|
|||
|
/// and compiler flags from both CFLAGS and CPPFLAGS. Similar to default build
|
|||
|
/// rule for C used in GNU Make:
|
|||
|
///
|
|||
|
/// ```rust,no_run
|
|||
|
/// use std::env::var;
|
|||
|
/// use std::process::Command;
|
|||
|
///
|
|||
|
/// let cc = var("CC").unwrap_or_else(|_| "cc".to_owned());
|
|||
|
///
|
|||
|
/// let cflags_str = var("CFLAGS").unwrap_or_else(|_| String::new());
|
|||
|
/// let cflags = shell_words::split(&cflags_str).expect("failed to parse CFLAGS");
|
|||
|
///
|
|||
|
/// let cppflags_str = var("CPPFLAGS").unwrap_or_else(|_| String::new());
|
|||
|
/// let cppflags = shell_words::split(&cppflags_str).expect("failed to parse CPPFLAGS");
|
|||
|
///
|
|||
|
/// Command::new(cc)
|
|||
|
/// .args(cflags)
|
|||
|
/// .args(cppflags)
|
|||
|
/// .args(&["-c", "a.c", "-o", "a.out"])
|
|||
|
/// .spawn()
|
|||
|
/// .expect("failed to start subprocess")
|
|||
|
/// .wait()
|
|||
|
/// .expect("failed to wait for subprocess");
|
|||
|
/// ```
|
|||
|
pub fn split(s: &str) -> Result<Vec<String>, ParseError> {
|
|||
|
use State::*;
|
|||
|
|
|||
|
let mut words = Vec::new();
|
|||
|
let mut word = String::new();
|
|||
|
let mut chars = s.chars();
|
|||
|
let mut state = Delimiter;
|
|||
|
|
|||
|
loop {
|
|||
|
let c = chars.next();
|
|||
|
state = match state {
|
|||
|
Delimiter => match c {
|
|||
|
None => break,
|
|||
|
Some('\'') => SingleQuoted,
|
|||
|
Some('\"') => DoubleQuoted,
|
|||
|
Some('\\') => Backslash,
|
|||
|
Some('\t') | Some(' ') | Some('\n') => Delimiter,
|
|||
|
Some('#') => Comment,
|
|||
|
Some(c) => {
|
|||
|
word.push(c);
|
|||
|
Unquoted
|
|||
|
}
|
|||
|
},
|
|||
|
Backslash => match c {
|
|||
|
None => {
|
|||
|
word.push('\\');
|
|||
|
words.push(mem::replace(&mut word, String::new()));
|
|||
|
break;
|
|||
|
}
|
|||
|
Some('\n') => Delimiter,
|
|||
|
Some(c) => {
|
|||
|
word.push(c);
|
|||
|
Unquoted
|
|||
|
}
|
|||
|
},
|
|||
|
Unquoted => match c {
|
|||
|
None => {
|
|||
|
words.push(mem::replace(&mut word, String::new()));
|
|||
|
break;
|
|||
|
}
|
|||
|
Some('\'') => SingleQuoted,
|
|||
|
Some('\"') => DoubleQuoted,
|
|||
|
Some('\\') => UnquotedBackslash,
|
|||
|
Some('\t') | Some(' ') | Some('\n') => {
|
|||
|
words.push(mem::replace(&mut word, String::new()));
|
|||
|
Delimiter
|
|||
|
}
|
|||
|
Some(c) => {
|
|||
|
word.push(c);
|
|||
|
Unquoted
|
|||
|
}
|
|||
|
},
|
|||
|
UnquotedBackslash => match c {
|
|||
|
None => {
|
|||
|
word.push('\\');
|
|||
|
words.push(mem::replace(&mut word, String::new()));
|
|||
|
break;
|
|||
|
}
|
|||
|
Some('\n') => Unquoted,
|
|||
|
Some(c) => {
|
|||
|
word.push(c);
|
|||
|
Unquoted
|
|||
|
}
|
|||
|
},
|
|||
|
SingleQuoted => match c {
|
|||
|
None => return Err(ParseError),
|
|||
|
Some('\'') => Unquoted,
|
|||
|
Some(c) => {
|
|||
|
word.push(c);
|
|||
|
SingleQuoted
|
|||
|
}
|
|||
|
},
|
|||
|
DoubleQuoted => match c {
|
|||
|
None => return Err(ParseError),
|
|||
|
Some('\"') => Unquoted,
|
|||
|
Some('\\') => DoubleQuotedBackslash,
|
|||
|
Some(c) => {
|
|||
|
word.push(c);
|
|||
|
DoubleQuoted
|
|||
|
}
|
|||
|
},
|
|||
|
DoubleQuotedBackslash => match c {
|
|||
|
None => return Err(ParseError),
|
|||
|
Some('\n') => DoubleQuoted,
|
|||
|
Some(c @ '$') | Some(c @ '`') | Some(c @ '"') | Some(c @ '\\') => {
|
|||
|
word.push(c);
|
|||
|
DoubleQuoted
|
|||
|
}
|
|||
|
Some(c) => {
|
|||
|
word.push('\\');
|
|||
|
word.push(c);
|
|||
|
DoubleQuoted
|
|||
|
}
|
|||
|
},
|
|||
|
Comment => match c {
|
|||
|
None => break,
|
|||
|
Some('\n') => Delimiter,
|
|||
|
Some(_) => Comment,
|
|||
|
},
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
Ok(words)
|
|||
|
}
|
|||
|
|
|||
|
enum EscapeStyle {
|
|||
|
/// No escaping.
|
|||
|
None,
|
|||
|
/// Wrap in single quotes.
|
|||
|
SingleQuoted,
|
|||
|
/// Single quotes combined with backslash.
|
|||
|
Mixed,
|
|||
|
}
|
|||
|
|
|||
|
/// Determines escaping style to use.
|
|||
|
fn escape_style(s: &str) -> EscapeStyle {
|
|||
|
if s.is_empty() {
|
|||
|
return EscapeStyle::SingleQuoted;
|
|||
|
}
|
|||
|
|
|||
|
let mut special = false;
|
|||
|
let mut newline = false;
|
|||
|
let mut single_quote = false;
|
|||
|
|
|||
|
for c in s.chars() {
|
|||
|
match c {
|
|||
|
'\n' => {
|
|||
|
newline = true;
|
|||
|
special = true;
|
|||
|
}
|
|||
|
'\'' => {
|
|||
|
single_quote = true;
|
|||
|
special = true;
|
|||
|
}
|
|||
|
'|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | ' ' | '\t' | '*'
|
|||
|
| '?' | '[' | '#' | '˜' | '=' | '%' => {
|
|||
|
special = true;
|
|||
|
}
|
|||
|
_ => continue,
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if !special {
|
|||
|
EscapeStyle::None
|
|||
|
} else if newline && !single_quote {
|
|||
|
EscapeStyle::SingleQuoted
|
|||
|
} else {
|
|||
|
EscapeStyle::Mixed
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/// Escapes special characters in a string, so that it will retain its literal
|
|||
|
/// meaning when used as a part of command in Unix shell.
|
|||
|
///
|
|||
|
/// It tries to avoid introducing any unnecessary quotes or escape characters,
|
|||
|
/// but specifics regarding quoting style are left unspecified.
|
|||
|
pub fn quote(s: &str) -> Cow<str> {
|
|||
|
// We are going somewhat out of the way to provide
|
|||
|
// minimal amount of quoting in typical cases.
|
|||
|
match escape_style(s) {
|
|||
|
EscapeStyle::None => s.into(),
|
|||
|
EscapeStyle::SingleQuoted => format!("'{}'", s).into(),
|
|||
|
EscapeStyle::Mixed => {
|
|||
|
let mut quoted = String::new();
|
|||
|
quoted.push('\'');
|
|||
|
for c in s.chars() {
|
|||
|
if c == '\'' {
|
|||
|
quoted.push_str("'\\''");
|
|||
|
} else {
|
|||
|
quoted.push(c);
|
|||
|
}
|
|||
|
}
|
|||
|
quoted.push('\'');
|
|||
|
quoted.into()
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/// Joins arguments into a single command line suitable for execution in Unix
|
|||
|
/// shell.
|
|||
|
///
|
|||
|
/// Each argument is quoted using [`quote`] to preserve its literal meaning when
|
|||
|
/// parsed by Unix shell.
|
|||
|
///
|
|||
|
/// Note: This function is essentially an inverse of [`split`].
|
|||
|
///
|
|||
|
/// # Examples
|
|||
|
///
|
|||
|
/// Logging executed commands in format that can be easily copied and pasted
|
|||
|
/// into an actual shell:
|
|||
|
///
|
|||
|
/// ```rust,no_run
|
|||
|
/// fn execute(args: &[&str]) {
|
|||
|
/// use std::process::Command;
|
|||
|
/// println!("Executing: {}", shell_words::join(args));
|
|||
|
/// Command::new(&args[0])
|
|||
|
/// .args(&args[1..])
|
|||
|
/// .spawn()
|
|||
|
/// .expect("failed to start subprocess")
|
|||
|
/// .wait()
|
|||
|
/// .expect("failed to wait for subprocess");
|
|||
|
/// }
|
|||
|
///
|
|||
|
/// execute(&["python", "-c", "print('Hello world!')"]);
|
|||
|
/// ```
|
|||
|
///
|
|||
|
/// [`quote`]: fn.quote.html
|
|||
|
/// [`split`]: fn.split.html
|
|||
|
pub fn join<I, S>(words: I) -> String
|
|||
|
where
|
|||
|
I: IntoIterator<Item = S>,
|
|||
|
S: AsRef<str>,
|
|||
|
{
|
|||
|
let mut line = words.into_iter().fold(String::new(), |mut line, word| {
|
|||
|
let quoted = quote(word.as_ref());
|
|||
|
line.push_str(quoted.as_ref());
|
|||
|
line.push(' ');
|
|||
|
line
|
|||
|
});
|
|||
|
line.pop();
|
|||
|
line
|
|||
|
}
|
|||
|
|
|||
|
#[cfg(test)]
|
|||
|
mod tests {
|
|||
|
use super::*;
|
|||
|
|
|||
|
fn split_ok(cases: &[(&str, &[&str])]) {
|
|||
|
for &(input, expected) in cases {
|
|||
|
match split(input) {
|
|||
|
Err(actual) => {
|
|||
|
panic!(
|
|||
|
"After split({:?})\nexpected: Ok({:?})\n actual: Err({:?})\n",
|
|||
|
input, expected, actual
|
|||
|
);
|
|||
|
}
|
|||
|
Ok(actual) => {
|
|||
|
assert!(
|
|||
|
expected == actual.as_slice(),
|
|||
|
"After split({:?}).unwrap()\nexpected: {:?}\n actual: {:?}\n",
|
|||
|
input,
|
|||
|
expected,
|
|||
|
actual
|
|||
|
);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
#[test]
|
|||
|
fn split_empty() {
|
|||
|
split_ok(&[("", &[])]);
|
|||
|
}
|
|||
|
|
|||
|
#[test]
|
|||
|
fn split_initial_whitespace_is_removed() {
|
|||
|
split_ok(&[
|
|||
|
(" a", &["a"]),
|
|||
|
("\t\t\t\tbar", &["bar"]),
|
|||
|
("\t \nc", &["c"]),
|
|||
|
]);
|
|||
|
}
|
|||
|
|
|||
|
#[test]
|
|||
|
fn split_trailing_whitespace_is_removed() {
|
|||
|
split_ok(&[
|
|||
|
("a ", &["a"]),
|
|||
|
("b\t", &["b"]),
|
|||
|
("c\t \n \n \n", &["c"]),
|
|||
|
("d\n\n", &["d"]),
|
|||
|
]);
|
|||
|
}
|
|||
|
|
|||
|
#[test]
|
|||
|
fn split_carriage_return_is_not_special() {
|
|||
|
split_ok(&[("c\ra\r'\r'\r", &["c\ra\r\r\r"])]);
|
|||
|
}
|
|||
|
|
|||
|
#[test]
|
|||
|
fn split_single_quotes() {
|
|||
|
split_ok(&[
|
|||
|
(r#"''"#, &[r#""#]),
|
|||
|
(r#"'a'"#, &[r#"a"#]),
|
|||
|
(r#"'\'"#, &[r#"\"#]),
|
|||
|
(r#"' \ '"#, &[r#" \ "#]),
|
|||
|
(r#"'#'"#, &[r#"#"#]),
|
|||
|
]);
|
|||
|
}
|
|||
|
|
|||
|
#[test]
|
|||
|
fn split_double_quotes() {
|
|||
|
split_ok(&[
|
|||
|
(r#""""#, &[""]),
|
|||
|
(r#""""""#, &[""]),
|
|||
|
(r#""a b c' d""#, &["a b c' d"]),
|
|||
|
(r#""\a""#, &["\\a"]),
|
|||
|
(r#""$""#, &["$"]),
|
|||
|
(r#""\$""#, &["$"]),
|
|||
|
(r#""`""#, &["`"]),
|
|||
|
(r#""\`""#, &["`"]),
|
|||
|
(r#""\"""#, &["\""]),
|
|||
|
(r#""\\""#, &["\\"]),
|
|||
|
("\"\n\"", &["\n"]),
|
|||
|
("\"\\\n\"", &[""]),
|
|||
|
]);
|
|||
|
}
|
|||
|
|
|||
|
#[test]
|
|||
|
fn split_unquoted() {
|
|||
|
split_ok(&[
|
|||
|
(r#"\|\&\;"#, &[r#"|&;"#]),
|
|||
|
(r#"\<\>"#, &[r#"<>"#]),
|
|||
|
(r#"\(\)"#, &[r#"()"#]),
|
|||
|
(r#"\$"#, &[r#"$"#]),
|
|||
|
(r#"\`"#, &[r#"`"#]),
|
|||
|
(r#"\""#, &[r#"""#]),
|
|||
|
(r#"\'"#, &[r#"'"#]),
|
|||
|
("\\\n", &[]),
|
|||
|
(" \\\n \n", &[]),
|
|||
|
("a\nb\nc", &["a", "b", "c"]),
|
|||
|
("a\\\nb\\\nc", &["abc"]),
|
|||
|
("foo bar baz", &["foo", "bar", "baz"]),
|
|||
|
(r#"\🦉"#, &[r"🦉"]),
|
|||
|
]);
|
|||
|
}
|
|||
|
|
|||
|
#[test]
|
|||
|
fn split_trailing_backslash() {
|
|||
|
split_ok(&[("\\", &["\\"]), (" \\", &["\\"]), ("a\\", &["a\\"])]);
|
|||
|
}
|
|||
|
|
|||
|
#[test]
|
|||
|
fn split_errors() {
|
|||
|
assert_eq!(split("'abc"), Err(ParseError));
|
|||
|
assert_eq!(split("\""), Err(ParseError));
|
|||
|
assert_eq!(split("'\\"), Err(ParseError));
|
|||
|
assert_eq!(split("'\\"), Err(ParseError));
|
|||
|
}
|
|||
|
|
|||
|
#[test]
|
|||
|
fn split_comments() {
|
|||
|
split_ok(&[
|
|||
|
(r#" x # comment "#, &["x"]),
|
|||
|
(r#" w1#w2 "#, &["w1#w2"]),
|
|||
|
(r#"'not really a # comment'"#, &["not really a # comment"]),
|
|||
|
(" a # very long comment \n b # another comment", &["a", "b"]),
|
|||
|
]);
|
|||
|
}
|
|||
|
|
|||
|
#[test]
|
|||
|
fn test_quote() {
|
|||
|
assert_eq!(quote(""), "''");
|
|||
|
assert_eq!(quote("'"), "''\\'''");
|
|||
|
assert_eq!(quote("abc"), "abc");
|
|||
|
assert_eq!(quote("a \n b"), "'a \n b'");
|
|||
|
assert_eq!(quote("X'\nY"), "'X'\\''\nY'");
|
|||
|
}
|
|||
|
|
|||
|
#[test]
|
|||
|
fn test_join() {
|
|||
|
assert_eq!(join(&["a", "b", "c"]), "a b c");
|
|||
|
assert_eq!(join(&[" ", "$", "\n"]), "' ' '$' '\n'");
|
|||
|
}
|
|||
|
|
|||
|
#[test]
|
|||
|
fn join_followed_by_split_is_identity() {
|
|||
|
let cases: Vec<&[&str]> = vec![
|
|||
|
&["a"],
|
|||
|
&["python", "-c", "print('Hello world!')"],
|
|||
|
&["echo", " arg with spaces ", "arg \' with \" quotes"],
|
|||
|
&["even newlines are quoted correctly\n", "\n", "\n\n\t "],
|
|||
|
&["$", "`test`"],
|
|||
|
&["cat", "~user/log*"],
|
|||
|
&["test", "'a \"b", "\"X'"],
|
|||
|
&["empty", "", "", ""],
|
|||
|
];
|
|||
|
for argv in cases {
|
|||
|
let args = join(argv);
|
|||
|
assert_eq!(split(&args).unwrap(), argv);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|