Files
fparkan/vendor/shell-words/src/lib.rs
2024-01-08 01:21:28 +04:00

508 lines
15 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright 2018 Tomasz Miąsko
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE>
// or the MIT license <LICENSE-MIT>, at your option.
//
//! Process command line according to parsing rules of Unix shell as specified
//! in [Shell Command Language in POSIX.1-2008][posix-shell].
//!
//! [posix-shell]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
#![cfg_attr(not(feature = "std"), no_std)]
#![forbid(unsafe_code)]
#[cfg(feature = "std")]
extern crate core;
use core::fmt;
use core::mem;
#[cfg(not(feature = "std"))]
#[macro_use]
extern crate alloc;
#[cfg(not(feature = "std"))]
use alloc::string::String;
#[cfg(not(feature = "std"))]
use alloc::vec::Vec;
#[cfg(not(feature = "std"))]
use alloc::borrow::Cow;
#[cfg(feature = "std")]
use std::borrow::Cow;
/// An error returned when shell parsing fails.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct ParseError;
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str("missing closing quote")
}
}
#[cfg(feature = "std")]
impl std::error::Error for ParseError {}
enum State {
/// Within a delimiter.
Delimiter,
/// After backslash, but before starting word.
Backslash,
/// Within an unquoted word.
Unquoted,
/// After backslash in an unquoted word.
UnquotedBackslash,
/// Within a single quoted word.
SingleQuoted,
/// Within a double quoted word.
DoubleQuoted,
/// After backslash inside a double quoted word.
DoubleQuotedBackslash,
/// Inside a comment.
Comment,
}
/// Splits command line into separate arguments, in much the same way Unix shell
/// would, but without many of expansion the shell would perform.
///
/// The split functionality is compatible with behaviour of Unix shell, but with
/// word expansions limited to quote removal, and without special token
/// recognition rules for operators.
///
/// The result is exactly the same as one obtained from Unix shell as long as
/// those unsupported features are not present in input: no operators, no
/// variable assignments, no tilde expansion, no parameter expansion, no command
/// substitution, no arithmetic expansion, no pathname expansion.
///
/// In case those unsupported shell features are present, the syntax that
/// introduce them is interpreted literally.
///
/// # Errors
///
/// When input contains unmatched quote, an error is returned.
///
/// # Compatibility with other implementations
///
/// It should be fully compatible with g_shell_parse_argv from GLib, except that
/// in GLib it is an error not to have any words after tokenization.
///
/// It is also very close to shlex.split available in Python standard library,
/// when used in POSIX mode with support for comments. Though, shlex
/// implementation diverges from POSIX, and from implementation contained herein
/// in three aspects. First, it doesn't support line continuations.
/// Second, inside double quotes, the backslash characters retains its special
/// meaning as an escape character only when followed by \\ or \", whereas POSIX
/// specifies that it should retain its special meaning when followed by: $, \`,
/// \", \\, or a newline. Third, it treats carriage return as one of delimiters.
///
/// # Examples
///
/// Building an executable using compiler obtained from CC environment variable
/// and compiler flags from both CFLAGS and CPPFLAGS. Similar to default build
/// rule for C used in GNU Make:
///
/// ```rust,no_run
/// use std::env::var;
/// use std::process::Command;
///
/// let cc = var("CC").unwrap_or_else(|_| "cc".to_owned());
///
/// let cflags_str = var("CFLAGS").unwrap_or_else(|_| String::new());
/// let cflags = shell_words::split(&cflags_str).expect("failed to parse CFLAGS");
///
/// let cppflags_str = var("CPPFLAGS").unwrap_or_else(|_| String::new());
/// let cppflags = shell_words::split(&cppflags_str).expect("failed to parse CPPFLAGS");
///
/// Command::new(cc)
/// .args(cflags)
/// .args(cppflags)
/// .args(&["-c", "a.c", "-o", "a.out"])
/// .spawn()
/// .expect("failed to start subprocess")
/// .wait()
/// .expect("failed to wait for subprocess");
/// ```
pub fn split(s: &str) -> Result<Vec<String>, ParseError> {
use State::*;
let mut words = Vec::new();
let mut word = String::new();
let mut chars = s.chars();
let mut state = Delimiter;
loop {
let c = chars.next();
state = match state {
Delimiter => match c {
None => break,
Some('\'') => SingleQuoted,
Some('\"') => DoubleQuoted,
Some('\\') => Backslash,
Some('\t') | Some(' ') | Some('\n') => Delimiter,
Some('#') => Comment,
Some(c) => {
word.push(c);
Unquoted
}
},
Backslash => match c {
None => {
word.push('\\');
words.push(mem::replace(&mut word, String::new()));
break;
}
Some('\n') => Delimiter,
Some(c) => {
word.push(c);
Unquoted
}
},
Unquoted => match c {
None => {
words.push(mem::replace(&mut word, String::new()));
break;
}
Some('\'') => SingleQuoted,
Some('\"') => DoubleQuoted,
Some('\\') => UnquotedBackslash,
Some('\t') | Some(' ') | Some('\n') => {
words.push(mem::replace(&mut word, String::new()));
Delimiter
}
Some(c) => {
word.push(c);
Unquoted
}
},
UnquotedBackslash => match c {
None => {
word.push('\\');
words.push(mem::replace(&mut word, String::new()));
break;
}
Some('\n') => Unquoted,
Some(c) => {
word.push(c);
Unquoted
}
},
SingleQuoted => match c {
None => return Err(ParseError),
Some('\'') => Unquoted,
Some(c) => {
word.push(c);
SingleQuoted
}
},
DoubleQuoted => match c {
None => return Err(ParseError),
Some('\"') => Unquoted,
Some('\\') => DoubleQuotedBackslash,
Some(c) => {
word.push(c);
DoubleQuoted
}
},
DoubleQuotedBackslash => match c {
None => return Err(ParseError),
Some('\n') => DoubleQuoted,
Some(c @ '$') | Some(c @ '`') | Some(c @ '"') | Some(c @ '\\') => {
word.push(c);
DoubleQuoted
}
Some(c) => {
word.push('\\');
word.push(c);
DoubleQuoted
}
},
Comment => match c {
None => break,
Some('\n') => Delimiter,
Some(_) => Comment,
},
}
}
Ok(words)
}
enum EscapeStyle {
/// No escaping.
None,
/// Wrap in single quotes.
SingleQuoted,
/// Single quotes combined with backslash.
Mixed,
}
/// Determines escaping style to use.
fn escape_style(s: &str) -> EscapeStyle {
if s.is_empty() {
return EscapeStyle::SingleQuoted;
}
let mut special = false;
let mut newline = false;
let mut single_quote = false;
for c in s.chars() {
match c {
'\n' => {
newline = true;
special = true;
}
'\'' => {
single_quote = true;
special = true;
}
'|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | ' ' | '\t' | '*'
| '?' | '[' | '#' | '˜' | '=' | '%' => {
special = true;
}
_ => continue,
}
}
if !special {
EscapeStyle::None
} else if newline && !single_quote {
EscapeStyle::SingleQuoted
} else {
EscapeStyle::Mixed
}
}
/// Escapes special characters in a string, so that it will retain its literal
/// meaning when used as a part of command in Unix shell.
///
/// It tries to avoid introducing any unnecessary quotes or escape characters,
/// but specifics regarding quoting style are left unspecified.
pub fn quote(s: &str) -> Cow<str> {
// We are going somewhat out of the way to provide
// minimal amount of quoting in typical cases.
match escape_style(s) {
EscapeStyle::None => s.into(),
EscapeStyle::SingleQuoted => format!("'{}'", s).into(),
EscapeStyle::Mixed => {
let mut quoted = String::new();
quoted.push('\'');
for c in s.chars() {
if c == '\'' {
quoted.push_str("'\\''");
} else {
quoted.push(c);
}
}
quoted.push('\'');
quoted.into()
}
}
}
/// Joins arguments into a single command line suitable for execution in Unix
/// shell.
///
/// Each argument is quoted using [`quote`] to preserve its literal meaning when
/// parsed by Unix shell.
///
/// Note: This function is essentially an inverse of [`split`].
///
/// # Examples
///
/// Logging executed commands in format that can be easily copied and pasted
/// into an actual shell:
///
/// ```rust,no_run
/// fn execute(args: &[&str]) {
/// use std::process::Command;
/// println!("Executing: {}", shell_words::join(args));
/// Command::new(&args[0])
/// .args(&args[1..])
/// .spawn()
/// .expect("failed to start subprocess")
/// .wait()
/// .expect("failed to wait for subprocess");
/// }
///
/// execute(&["python", "-c", "print('Hello world!')"]);
/// ```
///
/// [`quote`]: fn.quote.html
/// [`split`]: fn.split.html
pub fn join<I, S>(words: I) -> String
where
I: IntoIterator<Item = S>,
S: AsRef<str>,
{
let mut line = words.into_iter().fold(String::new(), |mut line, word| {
let quoted = quote(word.as_ref());
line.push_str(quoted.as_ref());
line.push(' ');
line
});
line.pop();
line
}
#[cfg(test)]
mod tests {
use super::*;
fn split_ok(cases: &[(&str, &[&str])]) {
for &(input, expected) in cases {
match split(input) {
Err(actual) => {
panic!(
"After split({:?})\nexpected: Ok({:?})\n actual: Err({:?})\n",
input, expected, actual
);
}
Ok(actual) => {
assert!(
expected == actual.as_slice(),
"After split({:?}).unwrap()\nexpected: {:?}\n actual: {:?}\n",
input,
expected,
actual
);
}
}
}
}
#[test]
fn split_empty() {
split_ok(&[("", &[])]);
}
#[test]
fn split_initial_whitespace_is_removed() {
split_ok(&[
(" a", &["a"]),
("\t\t\t\tbar", &["bar"]),
("\t \nc", &["c"]),
]);
}
#[test]
fn split_trailing_whitespace_is_removed() {
split_ok(&[
("a ", &["a"]),
("b\t", &["b"]),
("c\t \n \n \n", &["c"]),
("d\n\n", &["d"]),
]);
}
#[test]
fn split_carriage_return_is_not_special() {
split_ok(&[("c\ra\r'\r'\r", &["c\ra\r\r\r"])]);
}
#[test]
fn split_single_quotes() {
split_ok(&[
(r#"''"#, &[r#""#]),
(r#"'a'"#, &[r#"a"#]),
(r#"'\'"#, &[r#"\"#]),
(r#"' \ '"#, &[r#" \ "#]),
(r#"'#'"#, &[r#"#"#]),
]);
}
#[test]
fn split_double_quotes() {
split_ok(&[
(r#""""#, &[""]),
(r#""""""#, &[""]),
(r#""a b c' d""#, &["a b c' d"]),
(r#""\a""#, &["\\a"]),
(r#""$""#, &["$"]),
(r#""\$""#, &["$"]),
(r#""`""#, &["`"]),
(r#""\`""#, &["`"]),
(r#""\"""#, &["\""]),
(r#""\\""#, &["\\"]),
("\"\n\"", &["\n"]),
("\"\\\n\"", &[""]),
]);
}
#[test]
fn split_unquoted() {
split_ok(&[
(r#"\|\&\;"#, &[r#"|&;"#]),
(r#"\<\>"#, &[r#"<>"#]),
(r#"\(\)"#, &[r#"()"#]),
(r#"\$"#, &[r#"$"#]),
(r#"\`"#, &[r#"`"#]),
(r#"\""#, &[r#"""#]),
(r#"\'"#, &[r#"'"#]),
("\\\n", &[]),
(" \\\n \n", &[]),
("a\nb\nc", &["a", "b", "c"]),
("a\\\nb\\\nc", &["abc"]),
("foo bar baz", &["foo", "bar", "baz"]),
(r#"\🦉"#, &[r"🦉"]),
]);
}
#[test]
fn split_trailing_backslash() {
split_ok(&[("\\", &["\\"]), (" \\", &["\\"]), ("a\\", &["a\\"])]);
}
#[test]
fn split_errors() {
assert_eq!(split("'abc"), Err(ParseError));
assert_eq!(split("\""), Err(ParseError));
assert_eq!(split("'\\"), Err(ParseError));
assert_eq!(split("'\\"), Err(ParseError));
}
#[test]
fn split_comments() {
split_ok(&[
(r#" x # comment "#, &["x"]),
(r#" w1#w2 "#, &["w1#w2"]),
(r#"'not really a # comment'"#, &["not really a # comment"]),
(" a # very long comment \n b # another comment", &["a", "b"]),
]);
}
#[test]
fn test_quote() {
assert_eq!(quote(""), "''");
assert_eq!(quote("'"), "''\\'''");
assert_eq!(quote("abc"), "abc");
assert_eq!(quote("a \n b"), "'a \n b'");
assert_eq!(quote("X'\nY"), "'X'\\''\nY'");
}
#[test]
fn test_join() {
assert_eq!(join(&["a", "b", "c"]), "a b c");
assert_eq!(join(&[" ", "$", "\n"]), "' ' '$' '\n'");
}
#[test]
fn join_followed_by_split_is_identity() {
let cases: Vec<&[&str]> = vec![
&["a"],
&["python", "-c", "print('Hello world!')"],
&["echo", " arg with spaces ", "arg \' with \" quotes"],
&["even newlines are quoted correctly\n", "\n", "\n\n\t "],
&["$", "`test`"],
&["cat", "~user/log*"],
&["test", "'a \"b", "\"X'"],
&["empty", "", "", ""],
];
for argv in cases {
let args = join(argv);
assert_eq!(split(&args).unwrap(), argv);
}
}
}