508 lines
15 KiB
Rust
508 lines
15 KiB
Rust
// Copyright 2018 Tomasz Miąsko
|
||
//
|
||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE>
|
||
// or the MIT license <LICENSE-MIT>, at your option.
|
||
//
|
||
//! Process command line according to parsing rules of Unix shell as specified
|
||
//! in [Shell Command Language in POSIX.1-2008][posix-shell].
|
||
//!
|
||
//! [posix-shell]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
|
||
|
||
#![cfg_attr(not(feature = "std"), no_std)]
|
||
#![forbid(unsafe_code)]
|
||
|
||
#[cfg(feature = "std")]
|
||
extern crate core;
|
||
|
||
use core::fmt;
|
||
use core::mem;
|
||
|
||
#[cfg(not(feature = "std"))]
|
||
#[macro_use]
|
||
extern crate alloc;
|
||
|
||
#[cfg(not(feature = "std"))]
|
||
use alloc::string::String;
|
||
#[cfg(not(feature = "std"))]
|
||
use alloc::vec::Vec;
|
||
|
||
#[cfg(not(feature = "std"))]
|
||
use alloc::borrow::Cow;
|
||
#[cfg(feature = "std")]
|
||
use std::borrow::Cow;
|
||
|
||
/// An error returned when shell parsing fails.
|
||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||
pub struct ParseError;
|
||
|
||
impl fmt::Display for ParseError {
|
||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||
f.write_str("missing closing quote")
|
||
}
|
||
}
|
||
|
||
#[cfg(feature = "std")]
|
||
impl std::error::Error for ParseError {}
|
||
|
||
enum State {
|
||
/// Within a delimiter.
|
||
Delimiter,
|
||
/// After backslash, but before starting word.
|
||
Backslash,
|
||
/// Within an unquoted word.
|
||
Unquoted,
|
||
/// After backslash in an unquoted word.
|
||
UnquotedBackslash,
|
||
/// Within a single quoted word.
|
||
SingleQuoted,
|
||
/// Within a double quoted word.
|
||
DoubleQuoted,
|
||
/// After backslash inside a double quoted word.
|
||
DoubleQuotedBackslash,
|
||
/// Inside a comment.
|
||
Comment,
|
||
}
|
||
|
||
/// Splits command line into separate arguments, in much the same way Unix shell
|
||
/// would, but without many of expansion the shell would perform.
|
||
///
|
||
/// The split functionality is compatible with behaviour of Unix shell, but with
|
||
/// word expansions limited to quote removal, and without special token
|
||
/// recognition rules for operators.
|
||
///
|
||
/// The result is exactly the same as one obtained from Unix shell as long as
|
||
/// those unsupported features are not present in input: no operators, no
|
||
/// variable assignments, no tilde expansion, no parameter expansion, no command
|
||
/// substitution, no arithmetic expansion, no pathname expansion.
|
||
///
|
||
/// In case those unsupported shell features are present, the syntax that
|
||
/// introduce them is interpreted literally.
|
||
///
|
||
/// # Errors
|
||
///
|
||
/// When input contains unmatched quote, an error is returned.
|
||
///
|
||
/// # Compatibility with other implementations
|
||
///
|
||
/// It should be fully compatible with g_shell_parse_argv from GLib, except that
|
||
/// in GLib it is an error not to have any words after tokenization.
|
||
///
|
||
/// It is also very close to shlex.split available in Python standard library,
|
||
/// when used in POSIX mode with support for comments. Though, shlex
|
||
/// implementation diverges from POSIX, and from implementation contained herein
|
||
/// in three aspects. First, it doesn't support line continuations.
|
||
/// Second, inside double quotes, the backslash characters retains its special
|
||
/// meaning as an escape character only when followed by \\ or \", whereas POSIX
|
||
/// specifies that it should retain its special meaning when followed by: $, \`,
|
||
/// \", \\, or a newline. Third, it treats carriage return as one of delimiters.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Building an executable using compiler obtained from CC environment variable
|
||
/// and compiler flags from both CFLAGS and CPPFLAGS. Similar to default build
|
||
/// rule for C used in GNU Make:
|
||
///
|
||
/// ```rust,no_run
|
||
/// use std::env::var;
|
||
/// use std::process::Command;
|
||
///
|
||
/// let cc = var("CC").unwrap_or_else(|_| "cc".to_owned());
|
||
///
|
||
/// let cflags_str = var("CFLAGS").unwrap_or_else(|_| String::new());
|
||
/// let cflags = shell_words::split(&cflags_str).expect("failed to parse CFLAGS");
|
||
///
|
||
/// let cppflags_str = var("CPPFLAGS").unwrap_or_else(|_| String::new());
|
||
/// let cppflags = shell_words::split(&cppflags_str).expect("failed to parse CPPFLAGS");
|
||
///
|
||
/// Command::new(cc)
|
||
/// .args(cflags)
|
||
/// .args(cppflags)
|
||
/// .args(&["-c", "a.c", "-o", "a.out"])
|
||
/// .spawn()
|
||
/// .expect("failed to start subprocess")
|
||
/// .wait()
|
||
/// .expect("failed to wait for subprocess");
|
||
/// ```
|
||
pub fn split(s: &str) -> Result<Vec<String>, ParseError> {
|
||
use State::*;
|
||
|
||
let mut words = Vec::new();
|
||
let mut word = String::new();
|
||
let mut chars = s.chars();
|
||
let mut state = Delimiter;
|
||
|
||
loop {
|
||
let c = chars.next();
|
||
state = match state {
|
||
Delimiter => match c {
|
||
None => break,
|
||
Some('\'') => SingleQuoted,
|
||
Some('\"') => DoubleQuoted,
|
||
Some('\\') => Backslash,
|
||
Some('\t') | Some(' ') | Some('\n') => Delimiter,
|
||
Some('#') => Comment,
|
||
Some(c) => {
|
||
word.push(c);
|
||
Unquoted
|
||
}
|
||
},
|
||
Backslash => match c {
|
||
None => {
|
||
word.push('\\');
|
||
words.push(mem::replace(&mut word, String::new()));
|
||
break;
|
||
}
|
||
Some('\n') => Delimiter,
|
||
Some(c) => {
|
||
word.push(c);
|
||
Unquoted
|
||
}
|
||
},
|
||
Unquoted => match c {
|
||
None => {
|
||
words.push(mem::replace(&mut word, String::new()));
|
||
break;
|
||
}
|
||
Some('\'') => SingleQuoted,
|
||
Some('\"') => DoubleQuoted,
|
||
Some('\\') => UnquotedBackslash,
|
||
Some('\t') | Some(' ') | Some('\n') => {
|
||
words.push(mem::replace(&mut word, String::new()));
|
||
Delimiter
|
||
}
|
||
Some(c) => {
|
||
word.push(c);
|
||
Unquoted
|
||
}
|
||
},
|
||
UnquotedBackslash => match c {
|
||
None => {
|
||
word.push('\\');
|
||
words.push(mem::replace(&mut word, String::new()));
|
||
break;
|
||
}
|
||
Some('\n') => Unquoted,
|
||
Some(c) => {
|
||
word.push(c);
|
||
Unquoted
|
||
}
|
||
},
|
||
SingleQuoted => match c {
|
||
None => return Err(ParseError),
|
||
Some('\'') => Unquoted,
|
||
Some(c) => {
|
||
word.push(c);
|
||
SingleQuoted
|
||
}
|
||
},
|
||
DoubleQuoted => match c {
|
||
None => return Err(ParseError),
|
||
Some('\"') => Unquoted,
|
||
Some('\\') => DoubleQuotedBackslash,
|
||
Some(c) => {
|
||
word.push(c);
|
||
DoubleQuoted
|
||
}
|
||
},
|
||
DoubleQuotedBackslash => match c {
|
||
None => return Err(ParseError),
|
||
Some('\n') => DoubleQuoted,
|
||
Some(c @ '$') | Some(c @ '`') | Some(c @ '"') | Some(c @ '\\') => {
|
||
word.push(c);
|
||
DoubleQuoted
|
||
}
|
||
Some(c) => {
|
||
word.push('\\');
|
||
word.push(c);
|
||
DoubleQuoted
|
||
}
|
||
},
|
||
Comment => match c {
|
||
None => break,
|
||
Some('\n') => Delimiter,
|
||
Some(_) => Comment,
|
||
},
|
||
}
|
||
}
|
||
|
||
Ok(words)
|
||
}
|
||
|
||
enum EscapeStyle {
|
||
/// No escaping.
|
||
None,
|
||
/// Wrap in single quotes.
|
||
SingleQuoted,
|
||
/// Single quotes combined with backslash.
|
||
Mixed,
|
||
}
|
||
|
||
/// Determines escaping style to use.
|
||
fn escape_style(s: &str) -> EscapeStyle {
|
||
if s.is_empty() {
|
||
return EscapeStyle::SingleQuoted;
|
||
}
|
||
|
||
let mut special = false;
|
||
let mut newline = false;
|
||
let mut single_quote = false;
|
||
|
||
for c in s.chars() {
|
||
match c {
|
||
'\n' => {
|
||
newline = true;
|
||
special = true;
|
||
}
|
||
'\'' => {
|
||
single_quote = true;
|
||
special = true;
|
||
}
|
||
'|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | ' ' | '\t' | '*'
|
||
| '?' | '[' | '#' | '˜' | '=' | '%' => {
|
||
special = true;
|
||
}
|
||
_ => continue,
|
||
}
|
||
}
|
||
|
||
if !special {
|
||
EscapeStyle::None
|
||
} else if newline && !single_quote {
|
||
EscapeStyle::SingleQuoted
|
||
} else {
|
||
EscapeStyle::Mixed
|
||
}
|
||
}
|
||
|
||
/// Escapes special characters in a string, so that it will retain its literal
|
||
/// meaning when used as a part of command in Unix shell.
|
||
///
|
||
/// It tries to avoid introducing any unnecessary quotes or escape characters,
|
||
/// but specifics regarding quoting style are left unspecified.
|
||
pub fn quote(s: &str) -> Cow<str> {
|
||
// We are going somewhat out of the way to provide
|
||
// minimal amount of quoting in typical cases.
|
||
match escape_style(s) {
|
||
EscapeStyle::None => s.into(),
|
||
EscapeStyle::SingleQuoted => format!("'{}'", s).into(),
|
||
EscapeStyle::Mixed => {
|
||
let mut quoted = String::new();
|
||
quoted.push('\'');
|
||
for c in s.chars() {
|
||
if c == '\'' {
|
||
quoted.push_str("'\\''");
|
||
} else {
|
||
quoted.push(c);
|
||
}
|
||
}
|
||
quoted.push('\'');
|
||
quoted.into()
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Joins arguments into a single command line suitable for execution in Unix
|
||
/// shell.
|
||
///
|
||
/// Each argument is quoted using [`quote`] to preserve its literal meaning when
|
||
/// parsed by Unix shell.
|
||
///
|
||
/// Note: This function is essentially an inverse of [`split`].
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Logging executed commands in format that can be easily copied and pasted
|
||
/// into an actual shell:
|
||
///
|
||
/// ```rust,no_run
|
||
/// fn execute(args: &[&str]) {
|
||
/// use std::process::Command;
|
||
/// println!("Executing: {}", shell_words::join(args));
|
||
/// Command::new(&args[0])
|
||
/// .args(&args[1..])
|
||
/// .spawn()
|
||
/// .expect("failed to start subprocess")
|
||
/// .wait()
|
||
/// .expect("failed to wait for subprocess");
|
||
/// }
|
||
///
|
||
/// execute(&["python", "-c", "print('Hello world!')"]);
|
||
/// ```
|
||
///
|
||
/// [`quote`]: fn.quote.html
|
||
/// [`split`]: fn.split.html
|
||
pub fn join<I, S>(words: I) -> String
|
||
where
|
||
I: IntoIterator<Item = S>,
|
||
S: AsRef<str>,
|
||
{
|
||
let mut line = words.into_iter().fold(String::new(), |mut line, word| {
|
||
let quoted = quote(word.as_ref());
|
||
line.push_str(quoted.as_ref());
|
||
line.push(' ');
|
||
line
|
||
});
|
||
line.pop();
|
||
line
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
fn split_ok(cases: &[(&str, &[&str])]) {
|
||
for &(input, expected) in cases {
|
||
match split(input) {
|
||
Err(actual) => {
|
||
panic!(
|
||
"After split({:?})\nexpected: Ok({:?})\n actual: Err({:?})\n",
|
||
input, expected, actual
|
||
);
|
||
}
|
||
Ok(actual) => {
|
||
assert!(
|
||
expected == actual.as_slice(),
|
||
"After split({:?}).unwrap()\nexpected: {:?}\n actual: {:?}\n",
|
||
input,
|
||
expected,
|
||
actual
|
||
);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn split_empty() {
|
||
split_ok(&[("", &[])]);
|
||
}
|
||
|
||
#[test]
|
||
fn split_initial_whitespace_is_removed() {
|
||
split_ok(&[
|
||
(" a", &["a"]),
|
||
("\t\t\t\tbar", &["bar"]),
|
||
("\t \nc", &["c"]),
|
||
]);
|
||
}
|
||
|
||
#[test]
|
||
fn split_trailing_whitespace_is_removed() {
|
||
split_ok(&[
|
||
("a ", &["a"]),
|
||
("b\t", &["b"]),
|
||
("c\t \n \n \n", &["c"]),
|
||
("d\n\n", &["d"]),
|
||
]);
|
||
}
|
||
|
||
#[test]
|
||
fn split_carriage_return_is_not_special() {
|
||
split_ok(&[("c\ra\r'\r'\r", &["c\ra\r\r\r"])]);
|
||
}
|
||
|
||
#[test]
|
||
fn split_single_quotes() {
|
||
split_ok(&[
|
||
(r#"''"#, &[r#""#]),
|
||
(r#"'a'"#, &[r#"a"#]),
|
||
(r#"'\'"#, &[r#"\"#]),
|
||
(r#"' \ '"#, &[r#" \ "#]),
|
||
(r#"'#'"#, &[r#"#"#]),
|
||
]);
|
||
}
|
||
|
||
#[test]
|
||
fn split_double_quotes() {
|
||
split_ok(&[
|
||
(r#""""#, &[""]),
|
||
(r#""""""#, &[""]),
|
||
(r#""a b c' d""#, &["a b c' d"]),
|
||
(r#""\a""#, &["\\a"]),
|
||
(r#""$""#, &["$"]),
|
||
(r#""\$""#, &["$"]),
|
||
(r#""`""#, &["`"]),
|
||
(r#""\`""#, &["`"]),
|
||
(r#""\"""#, &["\""]),
|
||
(r#""\\""#, &["\\"]),
|
||
("\"\n\"", &["\n"]),
|
||
("\"\\\n\"", &[""]),
|
||
]);
|
||
}
|
||
|
||
#[test]
|
||
fn split_unquoted() {
|
||
split_ok(&[
|
||
(r#"\|\&\;"#, &[r#"|&;"#]),
|
||
(r#"\<\>"#, &[r#"<>"#]),
|
||
(r#"\(\)"#, &[r#"()"#]),
|
||
(r#"\$"#, &[r#"$"#]),
|
||
(r#"\`"#, &[r#"`"#]),
|
||
(r#"\""#, &[r#"""#]),
|
||
(r#"\'"#, &[r#"'"#]),
|
||
("\\\n", &[]),
|
||
(" \\\n \n", &[]),
|
||
("a\nb\nc", &["a", "b", "c"]),
|
||
("a\\\nb\\\nc", &["abc"]),
|
||
("foo bar baz", &["foo", "bar", "baz"]),
|
||
(r#"\🦉"#, &[r"🦉"]),
|
||
]);
|
||
}
|
||
|
||
#[test]
|
||
fn split_trailing_backslash() {
|
||
split_ok(&[("\\", &["\\"]), (" \\", &["\\"]), ("a\\", &["a\\"])]);
|
||
}
|
||
|
||
#[test]
|
||
fn split_errors() {
|
||
assert_eq!(split("'abc"), Err(ParseError));
|
||
assert_eq!(split("\""), Err(ParseError));
|
||
assert_eq!(split("'\\"), Err(ParseError));
|
||
assert_eq!(split("'\\"), Err(ParseError));
|
||
}
|
||
|
||
#[test]
|
||
fn split_comments() {
|
||
split_ok(&[
|
||
(r#" x # comment "#, &["x"]),
|
||
(r#" w1#w2 "#, &["w1#w2"]),
|
||
(r#"'not really a # comment'"#, &["not really a # comment"]),
|
||
(" a # very long comment \n b # another comment", &["a", "b"]),
|
||
]);
|
||
}
|
||
|
||
#[test]
|
||
fn test_quote() {
|
||
assert_eq!(quote(""), "''");
|
||
assert_eq!(quote("'"), "''\\'''");
|
||
assert_eq!(quote("abc"), "abc");
|
||
assert_eq!(quote("a \n b"), "'a \n b'");
|
||
assert_eq!(quote("X'\nY"), "'X'\\''\nY'");
|
||
}
|
||
|
||
#[test]
|
||
fn test_join() {
|
||
assert_eq!(join(&["a", "b", "c"]), "a b c");
|
||
assert_eq!(join(&[" ", "$", "\n"]), "' ' '$' '\n'");
|
||
}
|
||
|
||
#[test]
|
||
fn join_followed_by_split_is_identity() {
|
||
let cases: Vec<&[&str]> = vec![
|
||
&["a"],
|
||
&["python", "-c", "print('Hello world!')"],
|
||
&["echo", " arg with spaces ", "arg \' with \" quotes"],
|
||
&["even newlines are quoted correctly\n", "\n", "\n\n\t "],
|
||
&["$", "`test`"],
|
||
&["cat", "~user/log*"],
|
||
&["test", "'a \"b", "\"X'"],
|
||
&["empty", "", "", ""],
|
||
];
|
||
for argv in cases {
|
||
let args = join(argv);
|
||
assert_eq!(split(&args).unwrap(), argv);
|
||
}
|
||
}
|
||
}
|