Initial vendor packages
Signed-off-by: Valentin Popov <valentin@popov.link>
This commit is contained in:
		
							
								
								
									
										433
									
								
								vendor/textwrap/src/core.rs
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										433
									
								
								vendor/textwrap/src/core.rs
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,433 @@
 | 
			
		||||
//! Building blocks for advanced wrapping functionality.
 | 
			
		||||
//!
 | 
			
		||||
//! The functions and structs in this module can be used to implement
 | 
			
		||||
//! advanced wrapping functionality when the [`wrap`](super::wrap) and
 | 
			
		||||
//! [`fill`](super::fill) function don't do what you want.
 | 
			
		||||
//!
 | 
			
		||||
//! In general, you want to follow these steps when wrapping
 | 
			
		||||
//! something:
 | 
			
		||||
//!
 | 
			
		||||
//! 1. Split your input into [`Fragment`]s. These are abstract blocks
 | 
			
		||||
//!    of text or content which can be wrapped into lines. See
 | 
			
		||||
//!    [`WordSeparator`](crate::word_separators::WordSeparator) for
 | 
			
		||||
//!    how to do this for text.
 | 
			
		||||
//!
 | 
			
		||||
//! 2. Potentially split your fragments into smaller pieces. This
 | 
			
		||||
//!    allows you to implement things like hyphenation. If you use the
 | 
			
		||||
//!    `Word` type, you can use [`WordSplitter`](crate::WordSplitter)
 | 
			
		||||
//!    enum for this.
 | 
			
		||||
//!
 | 
			
		||||
//! 3. Potentially break apart fragments that are still too large to
 | 
			
		||||
//!    fit on a single line. This is implemented in [`break_words`].
 | 
			
		||||
//!
 | 
			
		||||
//! 4. Finally take your fragments and put them into lines. There are
 | 
			
		||||
//!    two algorithms for this in the
 | 
			
		||||
//!    [`wrap_algorithms`](crate::wrap_algorithms) module:
 | 
			
		||||
//!    [`wrap_optimal_fit`](crate::wrap_algorithms::wrap_optimal_fit)
 | 
			
		||||
//!    and [`wrap_first_fit`](crate::wrap_algorithms::wrap_first_fit).
 | 
			
		||||
//!    The former produces better line breaks, the latter is faster.
 | 
			
		||||
//!
 | 
			
		||||
//! 5. Iterate through the slices returned by the wrapping functions
 | 
			
		||||
//!    and construct your lines of output.
 | 
			
		||||
//!
 | 
			
		||||
//! Please [open an issue](https://github.com/mgeisler/textwrap/) if
 | 
			
		||||
//! the functionality here is not sufficient or if you have ideas for
 | 
			
		||||
//! improving it. We would love to hear from you!
 | 
			
		||||
 | 
			
		||||
/// The CSI or “Control Sequence Introducer” introduces an ANSI escape
 | 
			
		||||
/// sequence. This is typically used for colored text and will be
 | 
			
		||||
/// ignored when computing the text width.
 | 
			
		||||
const CSI: (char, char) = ('\x1b', '[');
 | 
			
		||||
/// The final bytes of an ANSI escape sequence must be in this range.
 | 
			
		||||
const ANSI_FINAL_BYTE: std::ops::RangeInclusive<char> = '\x40'..='\x7e';
 | 
			
		||||
 | 
			
		||||
/// Skip ANSI escape sequences. The `ch` is the current `char`, the
 | 
			
		||||
/// `chars` provide the following characters. The `chars` will be
 | 
			
		||||
/// modified if `ch` is the start of an ANSI escape sequence.
 | 
			
		||||
#[inline]
 | 
			
		||||
pub(crate) fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(ch: char, chars: &mut I) -> bool {
 | 
			
		||||
    if ch == CSI.0 && chars.next() == Some(CSI.1) {
 | 
			
		||||
        // We have found the start of an ANSI escape code, typically
 | 
			
		||||
        // used for colored terminal text. We skip until we find a
 | 
			
		||||
        // "final byte" in the range 0x40–0x7E.
 | 
			
		||||
        for ch in chars {
 | 
			
		||||
            if ANSI_FINAL_BYTE.contains(&ch) {
 | 
			
		||||
                return true;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    false
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[cfg(feature = "unicode-width")]
 | 
			
		||||
#[inline]
 | 
			
		||||
fn ch_width(ch: char) -> usize {
 | 
			
		||||
    unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// First character which [`ch_width`] will classify as double-width.
 | 
			
		||||
/// Please see [`display_width`].
 | 
			
		||||
#[cfg(not(feature = "unicode-width"))]
 | 
			
		||||
const DOUBLE_WIDTH_CUTOFF: char = '\u{1100}';
 | 
			
		||||
 | 
			
		||||
#[cfg(not(feature = "unicode-width"))]
 | 
			
		||||
#[inline]
 | 
			
		||||
fn ch_width(ch: char) -> usize {
 | 
			
		||||
    if ch < DOUBLE_WIDTH_CUTOFF {
 | 
			
		||||
        1
 | 
			
		||||
    } else {
 | 
			
		||||
        2
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Compute the display width of `text` while skipping over ANSI
 | 
			
		||||
/// escape sequences.
 | 
			
		||||
///
 | 
			
		||||
/// # Examples
 | 
			
		||||
///
 | 
			
		||||
/// ```
 | 
			
		||||
/// use textwrap::core::display_width;
 | 
			
		||||
///
 | 
			
		||||
/// assert_eq!(display_width("Café Plain"), 10);
 | 
			
		||||
/// assert_eq!(display_width("\u{1b}[31mCafé Rouge\u{1b}[0m"), 10);
 | 
			
		||||
/// ```
 | 
			
		||||
///
 | 
			
		||||
/// **Note:** When the `unicode-width` Cargo feature is disabled, the
 | 
			
		||||
/// width of a `char` is determined by a crude approximation which
 | 
			
		||||
/// simply counts chars below U+1100 as 1 column wide, and all other
 | 
			
		||||
/// characters as 2 columns wide. With the feature enabled, function
 | 
			
		||||
/// will correctly deal with [combining characters] in their
 | 
			
		||||
/// decomposed form (see [Unicode equivalence]).
 | 
			
		||||
///
 | 
			
		||||
/// An example of a decomposed character is “é”, which can be
 | 
			
		||||
/// decomposed into: “e” followed by a combining acute accent: “◌́”.
 | 
			
		||||
/// Without the `unicode-width` Cargo feature, every `char` below
 | 
			
		||||
/// U+1100 has a width of 1. This includes the combining accent:
 | 
			
		||||
///
 | 
			
		||||
/// ```
 | 
			
		||||
/// use textwrap::core::display_width;
 | 
			
		||||
///
 | 
			
		||||
/// assert_eq!(display_width("Cafe Plain"), 10);
 | 
			
		||||
/// #[cfg(feature = "unicode-width")]
 | 
			
		||||
/// assert_eq!(display_width("Cafe\u{301} Plain"), 10);
 | 
			
		||||
/// #[cfg(not(feature = "unicode-width"))]
 | 
			
		||||
/// assert_eq!(display_width("Cafe\u{301} Plain"), 11);
 | 
			
		||||
/// ```
 | 
			
		||||
///
 | 
			
		||||
/// ## Emojis and CJK Characters
 | 
			
		||||
///
 | 
			
		||||
/// Characters such as emojis and [CJK characters] used in the
 | 
			
		||||
/// Chinese, Japanese, and Korean langauges are seen as double-width,
 | 
			
		||||
/// even if the `unicode-width` feature is disabled:
 | 
			
		||||
///
 | 
			
		||||
/// ```
 | 
			
		||||
/// use textwrap::core::display_width;
 | 
			
		||||
///
 | 
			
		||||
/// assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20);
 | 
			
		||||
/// assert_eq!(display_width("你好"), 4);  // “Nǐ hǎo” or “Hello” in Chinese
 | 
			
		||||
/// ```
 | 
			
		||||
///
 | 
			
		||||
/// # Limitations
 | 
			
		||||
///
 | 
			
		||||
/// The displayed width of a string cannot always be computed from the
 | 
			
		||||
/// string alone. This is because the width depends on the rendering
 | 
			
		||||
/// engine used. This is particularly visible with [emoji modifier
 | 
			
		||||
/// sequences] where a base emoji is modified with, e.g., skin tone or
 | 
			
		||||
/// hair color modifiers. It is up to the rendering engine to detect
 | 
			
		||||
/// this and to produce a suitable emoji.
 | 
			
		||||
///
 | 
			
		||||
/// A simple example is “❤️”, which consists of “❤” (U+2764: Black
 | 
			
		||||
/// Heart Symbol) followed by U+FE0F (Variation Selector-16). By
 | 
			
		||||
/// itself, “❤” is a black heart, but if you follow it with the
 | 
			
		||||
/// variant selector, you may get a wider red heart.
 | 
			
		||||
///
 | 
			
		||||
/// A more complex example would be “👨🦰” which should depict a man
 | 
			
		||||
/// with red hair. Here the computed width is too large — and the
 | 
			
		||||
/// width differs depending on the use of the `unicode-width` feature:
 | 
			
		||||
///
 | 
			
		||||
/// ```
 | 
			
		||||
/// use textwrap::core::display_width;
 | 
			
		||||
///
 | 
			
		||||
/// assert_eq!("👨🦰".chars().collect::<Vec<char>>(), ['\u{1f468}', '\u{200d}', '\u{1f9b0}']);
 | 
			
		||||
/// #[cfg(feature = "unicode-width")]
 | 
			
		||||
/// assert_eq!(display_width("👨🦰"), 4);
 | 
			
		||||
/// #[cfg(not(feature = "unicode-width"))]
 | 
			
		||||
/// assert_eq!(display_width("👨🦰"), 6);
 | 
			
		||||
/// ```
 | 
			
		||||
///
 | 
			
		||||
/// This happens because the grapheme consists of three code points:
 | 
			
		||||
/// “👨” (U+1F468: Man), Zero Width Joiner (U+200D), and “🦰”
 | 
			
		||||
/// (U+1F9B0: Red Hair). You can see them above in the test. With
 | 
			
		||||
/// `unicode-width` enabled, the ZWJ is correctly seen as having zero
 | 
			
		||||
/// width, without it is counted as a double-width character.
 | 
			
		||||
///
 | 
			
		||||
/// ## Terminal Support
 | 
			
		||||
///
 | 
			
		||||
/// Modern browsers typically do a great job at combining characters
 | 
			
		||||
/// as shown above, but terminals often struggle more. As an example,
 | 
			
		||||
/// Gnome Terminal version 3.38.1, shows “❤️” as a big red heart, but
 | 
			
		||||
/// shows "👨🦰" as “👨🦰”.
 | 
			
		||||
///
 | 
			
		||||
/// [combining characters]: https://en.wikipedia.org/wiki/Combining_character
 | 
			
		||||
/// [Unicode equivalence]: https://en.wikipedia.org/wiki/Unicode_equivalence
 | 
			
		||||
/// [CJK characters]: https://en.wikipedia.org/wiki/CJK_characters
 | 
			
		||||
/// [emoji modifier sequences]: https://unicode.org/emoji/charts/full-emoji-modifiers.html
 | 
			
		||||
pub fn display_width(text: &str) -> usize {
 | 
			
		||||
    let mut chars = text.chars();
 | 
			
		||||
    let mut width = 0;
 | 
			
		||||
    while let Some(ch) = chars.next() {
 | 
			
		||||
        if skip_ansi_escape_sequence(ch, &mut chars) {
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        width += ch_width(ch);
 | 
			
		||||
    }
 | 
			
		||||
    width
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// A (text) fragment denotes the unit which we wrap into lines.
 | 
			
		||||
///
 | 
			
		||||
/// Fragments represent an abstract _word_ plus the _whitespace_
 | 
			
		||||
/// following the word. In case the word falls at the end of the line,
 | 
			
		||||
/// the whitespace is dropped and a so-called _penalty_ is inserted
 | 
			
		||||
/// instead (typically `"-"` if the word was hyphenated).
 | 
			
		||||
///
 | 
			
		||||
/// For wrapping purposes, the precise content of the word, the
 | 
			
		||||
/// whitespace, and the penalty is irrelevant. All we need to know is
 | 
			
		||||
/// the displayed width of each part, which this trait provides.
 | 
			
		||||
pub trait Fragment: std::fmt::Debug {
 | 
			
		||||
    /// Displayed width of word represented by this fragment.
 | 
			
		||||
    fn width(&self) -> f64;
 | 
			
		||||
 | 
			
		||||
    /// Displayed width of the whitespace that must follow the word
 | 
			
		||||
    /// when the word is not at the end of a line.
 | 
			
		||||
    fn whitespace_width(&self) -> f64;
 | 
			
		||||
 | 
			
		||||
    /// Displayed width of the penalty that must be inserted if the
 | 
			
		||||
    /// word falls at the end of a line.
 | 
			
		||||
    fn penalty_width(&self) -> f64;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// A piece of wrappable text, including any trailing whitespace.
 | 
			
		||||
///
 | 
			
		||||
/// A `Word` is an example of a [`Fragment`], so it has a width,
 | 
			
		||||
/// trailing whitespace, and potentially a penalty item.
 | 
			
		||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
 | 
			
		||||
pub struct Word<'a> {
 | 
			
		||||
    /// Word content.
 | 
			
		||||
    pub word: &'a str,
 | 
			
		||||
    /// Whitespace to insert if the word does not fall at the end of a line.
 | 
			
		||||
    pub whitespace: &'a str,
 | 
			
		||||
    /// Penalty string to insert if the word falls at the end of a line.
 | 
			
		||||
    pub penalty: &'a str,
 | 
			
		||||
    // Cached width in columns.
 | 
			
		||||
    pub(crate) width: usize,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl std::ops::Deref for Word<'_> {
 | 
			
		||||
    type Target = str;
 | 
			
		||||
 | 
			
		||||
    fn deref(&self) -> &Self::Target {
 | 
			
		||||
        self.word
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl<'a> Word<'a> {
 | 
			
		||||
    /// Construct a `Word` from a string.
 | 
			
		||||
    ///
 | 
			
		||||
    /// A trailing stretch of `' '` is automatically taken to be the
 | 
			
		||||
    /// whitespace part of the word.
 | 
			
		||||
    pub fn from(word: &str) -> Word<'_> {
 | 
			
		||||
        let trimmed = word.trim_end_matches(' ');
 | 
			
		||||
        Word {
 | 
			
		||||
            word: trimmed,
 | 
			
		||||
            width: display_width(trimmed),
 | 
			
		||||
            whitespace: &word[trimmed.len()..],
 | 
			
		||||
            penalty: "",
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Break this word into smaller words with a width of at most
 | 
			
		||||
    /// `line_width`. The whitespace and penalty from this `Word` is
 | 
			
		||||
    /// added to the last piece.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Examples
 | 
			
		||||
    ///
 | 
			
		||||
    /// ```
 | 
			
		||||
    /// use textwrap::core::Word;
 | 
			
		||||
    /// assert_eq!(
 | 
			
		||||
    ///     Word::from("Hello!  ").break_apart(3).collect::<Vec<_>>(),
 | 
			
		||||
    ///     vec![Word::from("Hel"), Word::from("lo!  ")]
 | 
			
		||||
    /// );
 | 
			
		||||
    /// ```
 | 
			
		||||
    pub fn break_apart<'b>(&'b self, line_width: usize) -> impl Iterator<Item = Word<'a>> + 'b {
 | 
			
		||||
        let mut char_indices = self.word.char_indices();
 | 
			
		||||
        let mut offset = 0;
 | 
			
		||||
        let mut width = 0;
 | 
			
		||||
 | 
			
		||||
        std::iter::from_fn(move || {
 | 
			
		||||
            while let Some((idx, ch)) = char_indices.next() {
 | 
			
		||||
                if skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) {
 | 
			
		||||
                    continue;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                if width > 0 && width + ch_width(ch) > line_width {
 | 
			
		||||
                    let word = Word {
 | 
			
		||||
                        word: &self.word[offset..idx],
 | 
			
		||||
                        width: width,
 | 
			
		||||
                        whitespace: "",
 | 
			
		||||
                        penalty: "",
 | 
			
		||||
                    };
 | 
			
		||||
                    offset = idx;
 | 
			
		||||
                    width = ch_width(ch);
 | 
			
		||||
                    return Some(word);
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                width += ch_width(ch);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if offset < self.word.len() {
 | 
			
		||||
                let word = Word {
 | 
			
		||||
                    word: &self.word[offset..],
 | 
			
		||||
                    width: width,
 | 
			
		||||
                    whitespace: self.whitespace,
 | 
			
		||||
                    penalty: self.penalty,
 | 
			
		||||
                };
 | 
			
		||||
                offset = self.word.len();
 | 
			
		||||
                return Some(word);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            None
 | 
			
		||||
        })
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl Fragment for Word<'_> {
 | 
			
		||||
    #[inline]
 | 
			
		||||
    fn width(&self) -> f64 {
 | 
			
		||||
        self.width as f64
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // We assume the whitespace consist of ' ' only. This allows us to
 | 
			
		||||
    // compute the display width in constant time.
 | 
			
		||||
    #[inline]
 | 
			
		||||
    fn whitespace_width(&self) -> f64 {
 | 
			
		||||
        self.whitespace.len() as f64
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // We assume the penalty is `""` or `"-"`. This allows us to
 | 
			
		||||
    // compute the display width in constant time.
 | 
			
		||||
    #[inline]
 | 
			
		||||
    fn penalty_width(&self) -> f64 {
 | 
			
		||||
        self.penalty.len() as f64
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Forcibly break words wider than `line_width` into smaller words.
 | 
			
		||||
///
 | 
			
		||||
/// This simply calls [`Word::break_apart`] on words that are too
 | 
			
		||||
/// wide. This means that no extra `'-'` is inserted, the word is
 | 
			
		||||
/// simply broken into smaller pieces.
 | 
			
		||||
pub fn break_words<'a, I>(words: I, line_width: usize) -> Vec<Word<'a>>
 | 
			
		||||
where
 | 
			
		||||
    I: IntoIterator<Item = Word<'a>>,
 | 
			
		||||
{
 | 
			
		||||
    let mut shortened_words = Vec::new();
 | 
			
		||||
    for word in words {
 | 
			
		||||
        if word.width() > line_width as f64 {
 | 
			
		||||
            shortened_words.extend(word.break_apart(line_width));
 | 
			
		||||
        } else {
 | 
			
		||||
            shortened_words.push(word);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    shortened_words
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[cfg(test)]
 | 
			
		||||
mod tests {
 | 
			
		||||
    use super::*;
 | 
			
		||||
 | 
			
		||||
    #[cfg(feature = "unicode-width")]
 | 
			
		||||
    use unicode_width::UnicodeWidthChar;
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn skip_ansi_escape_sequence_works() {
 | 
			
		||||
        let blue_text = "\u{1b}[34mHello\u{1b}[0m";
 | 
			
		||||
        let mut chars = blue_text.chars();
 | 
			
		||||
        let ch = chars.next().unwrap();
 | 
			
		||||
        assert!(skip_ansi_escape_sequence(ch, &mut chars));
 | 
			
		||||
        assert_eq!(chars.next(), Some('H'));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn emojis_have_correct_width() {
 | 
			
		||||
        use unic_emoji_char::is_emoji;
 | 
			
		||||
 | 
			
		||||
        // Emojis in the Basic Latin (ASCII) and Latin-1 Supplement
 | 
			
		||||
        // blocks all have a width of 1 column. This includes
 | 
			
		||||
        // characters such as '#' and '©'.
 | 
			
		||||
        for ch in '\u{1}'..'\u{FF}' {
 | 
			
		||||
            if is_emoji(ch) {
 | 
			
		||||
                let desc = format!("{:?} U+{:04X}", ch, ch as u32);
 | 
			
		||||
 | 
			
		||||
                #[cfg(feature = "unicode-width")]
 | 
			
		||||
                assert_eq!(ch.width().unwrap(), 1, "char: {}", desc);
 | 
			
		||||
 | 
			
		||||
                #[cfg(not(feature = "unicode-width"))]
 | 
			
		||||
                assert_eq!(ch_width(ch), 1, "char: {}", desc);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Emojis in the remaining blocks of the Basic Multilingual
 | 
			
		||||
        // Plane (BMP), in the Supplementary Multilingual Plane (SMP),
 | 
			
		||||
        // and in the Supplementary Ideographic Plane (SIP), are all 1
 | 
			
		||||
        // or 2 columns wide when unicode-width is used, and always 2
 | 
			
		||||
        // columns wide otherwise. This includes all of our favorite
 | 
			
		||||
        // emojis such as 😊.
 | 
			
		||||
        for ch in '\u{FF}'..'\u{2FFFF}' {
 | 
			
		||||
            if is_emoji(ch) {
 | 
			
		||||
                let desc = format!("{:?} U+{:04X}", ch, ch as u32);
 | 
			
		||||
 | 
			
		||||
                #[cfg(feature = "unicode-width")]
 | 
			
		||||
                assert!(ch.width().unwrap() <= 2, "char: {}", desc);
 | 
			
		||||
 | 
			
		||||
                #[cfg(not(feature = "unicode-width"))]
 | 
			
		||||
                assert_eq!(ch_width(ch), 2, "char: {}", desc);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // The remaining planes contain almost no assigned code points
 | 
			
		||||
        // and thus also no emojis.
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn display_width_works() {
 | 
			
		||||
        assert_eq!("Café Plain".len(), 11); // “é” is two bytes
 | 
			
		||||
        assert_eq!(display_width("Café Plain"), 10);
 | 
			
		||||
        assert_eq!(display_width("\u{1b}[31mCafé Rouge\u{1b}[0m"), 10);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn display_width_narrow_emojis() {
 | 
			
		||||
        #[cfg(feature = "unicode-width")]
 | 
			
		||||
        assert_eq!(display_width("⁉"), 1);
 | 
			
		||||
 | 
			
		||||
        // The ⁉ character is above DOUBLE_WIDTH_CUTOFF.
 | 
			
		||||
        #[cfg(not(feature = "unicode-width"))]
 | 
			
		||||
        assert_eq!(display_width("⁉"), 2);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn display_width_narrow_emojis_variant_selector() {
 | 
			
		||||
        #[cfg(feature = "unicode-width")]
 | 
			
		||||
        assert_eq!(display_width("⁉\u{fe0f}"), 1);
 | 
			
		||||
 | 
			
		||||
        // The variant selector-16 is also counted.
 | 
			
		||||
        #[cfg(not(feature = "unicode-width"))]
 | 
			
		||||
        assert_eq!(display_width("⁉\u{fe0f}"), 4);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn display_width_emojis() {
 | 
			
		||||
        assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										347
									
								
								vendor/textwrap/src/indentation.rs
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										347
									
								
								vendor/textwrap/src/indentation.rs
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,347 @@
 | 
			
		||||
//! Functions related to adding and removing indentation from lines of
 | 
			
		||||
//! text.
 | 
			
		||||
//!
 | 
			
		||||
//! The functions here can be used to uniformly indent or dedent
 | 
			
		||||
//! (unindent) word wrapped lines of text.
 | 
			
		||||
 | 
			
		||||
/// Indent each line by the given prefix.
 | 
			
		||||
///
 | 
			
		||||
/// # Examples
 | 
			
		||||
///
 | 
			
		||||
/// ```
 | 
			
		||||
/// use textwrap::indent;
 | 
			
		||||
///
 | 
			
		||||
/// assert_eq!(indent("First line.\nSecond line.\n", "  "),
 | 
			
		||||
///            "  First line.\n  Second line.\n");
 | 
			
		||||
/// ```
 | 
			
		||||
///
 | 
			
		||||
/// When indenting, trailing whitespace is stripped from the prefix.
 | 
			
		||||
/// This means that empty lines remain empty afterwards:
 | 
			
		||||
///
 | 
			
		||||
/// ```
 | 
			
		||||
/// use textwrap::indent;
 | 
			
		||||
///
 | 
			
		||||
/// assert_eq!(indent("First line.\n\n\nSecond line.\n", "  "),
 | 
			
		||||
///            "  First line.\n\n\n  Second line.\n");
 | 
			
		||||
/// ```
 | 
			
		||||
///
 | 
			
		||||
/// Notice how `"\n\n\n"` remained as `"\n\n\n"`.
 | 
			
		||||
///
 | 
			
		||||
/// This feature is useful when you want to indent text and have a
 | 
			
		||||
/// space between your prefix and the text. In this case, you _don't_
 | 
			
		||||
/// want a trailing space on empty lines:
 | 
			
		||||
///
 | 
			
		||||
/// ```
 | 
			
		||||
/// use textwrap::indent;
 | 
			
		||||
///
 | 
			
		||||
/// assert_eq!(indent("foo = 123\n\nprint(foo)\n", "# "),
 | 
			
		||||
///            "# foo = 123\n#\n# print(foo)\n");
 | 
			
		||||
/// ```
 | 
			
		||||
///
 | 
			
		||||
/// Notice how `"\n\n"` became `"\n#\n"` instead of `"\n# \n"` which
 | 
			
		||||
/// would have trailing whitespace.
 | 
			
		||||
///
 | 
			
		||||
/// Leading and trailing whitespace coming from the text itself is
 | 
			
		||||
/// kept unchanged:
 | 
			
		||||
///
 | 
			
		||||
/// ```
 | 
			
		||||
/// use textwrap::indent;
 | 
			
		||||
///
 | 
			
		||||
/// assert_eq!(indent(" \t  Foo   ", "->"), "-> \t  Foo   ");
 | 
			
		||||
/// ```
 | 
			
		||||
pub fn indent(s: &str, prefix: &str) -> String {
 | 
			
		||||
    // We know we'll need more than s.len() bytes for the output, but
 | 
			
		||||
    // without counting '\n' characters (which is somewhat slow), we
 | 
			
		||||
    // don't know exactly how much. However, we can preemptively do
 | 
			
		||||
    // the first doubling of the output size.
 | 
			
		||||
    let mut result = String::with_capacity(2 * s.len());
 | 
			
		||||
    let trimmed_prefix = prefix.trim_end();
 | 
			
		||||
    for (idx, line) in s.split_terminator('\n').enumerate() {
 | 
			
		||||
        if idx > 0 {
 | 
			
		||||
            result.push('\n');
 | 
			
		||||
        }
 | 
			
		||||
        if line.trim().is_empty() {
 | 
			
		||||
            result.push_str(trimmed_prefix);
 | 
			
		||||
        } else {
 | 
			
		||||
            result.push_str(prefix);
 | 
			
		||||
        }
 | 
			
		||||
        result.push_str(line);
 | 
			
		||||
    }
 | 
			
		||||
    if s.ends_with('\n') {
 | 
			
		||||
        // split_terminator will have eaten the final '\n'.
 | 
			
		||||
        result.push('\n');
 | 
			
		||||
    }
 | 
			
		||||
    result
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Removes common leading whitespace from each line.
 | 
			
		||||
///
 | 
			
		||||
/// This function will look at each non-empty line and determine the
 | 
			
		||||
/// maximum amount of whitespace that can be removed from all lines:
 | 
			
		||||
///
 | 
			
		||||
/// ```
 | 
			
		||||
/// use textwrap::dedent;
 | 
			
		||||
///
 | 
			
		||||
/// assert_eq!(dedent("
 | 
			
		||||
///     1st line
 | 
			
		||||
///       2nd line
 | 
			
		||||
///     3rd line
 | 
			
		||||
/// "), "
 | 
			
		||||
/// 1st line
 | 
			
		||||
///   2nd line
 | 
			
		||||
/// 3rd line
 | 
			
		||||
/// ");
 | 
			
		||||
/// ```
 | 
			
		||||
pub fn dedent(s: &str) -> String {
 | 
			
		||||
    let mut prefix = "";
 | 
			
		||||
    let mut lines = s.lines();
 | 
			
		||||
 | 
			
		||||
    // We first search for a non-empty line to find a prefix.
 | 
			
		||||
    for line in &mut lines {
 | 
			
		||||
        let mut whitespace_idx = line.len();
 | 
			
		||||
        for (idx, ch) in line.char_indices() {
 | 
			
		||||
            if !ch.is_whitespace() {
 | 
			
		||||
                whitespace_idx = idx;
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Check if the line had anything but whitespace
 | 
			
		||||
        if whitespace_idx < line.len() {
 | 
			
		||||
            prefix = &line[..whitespace_idx];
 | 
			
		||||
            break;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // We then continue looking through the remaining lines to
 | 
			
		||||
    // possibly shorten the prefix.
 | 
			
		||||
    for line in &mut lines {
 | 
			
		||||
        let mut whitespace_idx = line.len();
 | 
			
		||||
        for ((idx, a), b) in line.char_indices().zip(prefix.chars()) {
 | 
			
		||||
            if a != b {
 | 
			
		||||
                whitespace_idx = idx;
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Check if the line had anything but whitespace and if we
 | 
			
		||||
        // have found a shorter prefix
 | 
			
		||||
        if whitespace_idx < line.len() && whitespace_idx < prefix.len() {
 | 
			
		||||
            prefix = &line[..whitespace_idx];
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // We now go over the lines a second time to build the result.
 | 
			
		||||
    let mut result = String::new();
 | 
			
		||||
    for line in s.lines() {
 | 
			
		||||
        if line.starts_with(&prefix) && line.chars().any(|c| !c.is_whitespace()) {
 | 
			
		||||
            let (_, tail) = line.split_at(prefix.len());
 | 
			
		||||
            result.push_str(tail);
 | 
			
		||||
        }
 | 
			
		||||
        result.push('\n');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if result.ends_with('\n') && !s.ends_with('\n') {
 | 
			
		||||
        let new_len = result.len() - 1;
 | 
			
		||||
        result.truncate(new_len);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    result
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[cfg(test)]
 | 
			
		||||
mod tests {
 | 
			
		||||
    use super::*;
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn indent_empty() {
 | 
			
		||||
        assert_eq!(indent("\n", "  "), "\n");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    #[rustfmt::skip]
 | 
			
		||||
    fn indent_nonempty() {
 | 
			
		||||
        let text = [
 | 
			
		||||
            "  foo\n",
 | 
			
		||||
            "bar\n",
 | 
			
		||||
            "  baz\n",
 | 
			
		||||
        ].join("");
 | 
			
		||||
        let expected = [
 | 
			
		||||
            "//   foo\n",
 | 
			
		||||
            "// bar\n",
 | 
			
		||||
            "//   baz\n",
 | 
			
		||||
        ].join("");
 | 
			
		||||
        assert_eq!(indent(&text, "// "), expected);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    #[rustfmt::skip]
 | 
			
		||||
    fn indent_empty_line() {
 | 
			
		||||
        let text = [
 | 
			
		||||
            "  foo",
 | 
			
		||||
            "bar",
 | 
			
		||||
            "",
 | 
			
		||||
            "  baz",
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        let expected = [
 | 
			
		||||
            "//   foo",
 | 
			
		||||
            "// bar",
 | 
			
		||||
            "//",
 | 
			
		||||
            "//   baz",
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        assert_eq!(indent(&text, "// "), expected);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn dedent_empty() {
 | 
			
		||||
        assert_eq!(dedent(""), "");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    #[rustfmt::skip]
 | 
			
		||||
    fn dedent_multi_line() {
 | 
			
		||||
        let x = [
 | 
			
		||||
            "    foo",
 | 
			
		||||
            "  bar",
 | 
			
		||||
            "    baz",
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        let y = [
 | 
			
		||||
            "  foo",
 | 
			
		||||
            "bar",
 | 
			
		||||
            "  baz"
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        assert_eq!(dedent(&x), y);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    #[rustfmt::skip]
 | 
			
		||||
    fn dedent_empty_line() {
 | 
			
		||||
        let x = [
 | 
			
		||||
            "    foo",
 | 
			
		||||
            "  bar",
 | 
			
		||||
            "   ",
 | 
			
		||||
            "    baz"
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        let y = [
 | 
			
		||||
            "  foo",
 | 
			
		||||
            "bar",
 | 
			
		||||
            "",
 | 
			
		||||
            "  baz"
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        assert_eq!(dedent(&x), y);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    #[rustfmt::skip]
 | 
			
		||||
    fn dedent_blank_line() {
 | 
			
		||||
        let x = [
 | 
			
		||||
            "      foo",
 | 
			
		||||
            "",
 | 
			
		||||
            "        bar",
 | 
			
		||||
            "          foo",
 | 
			
		||||
            "          bar",
 | 
			
		||||
            "          baz",
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        let y = [
 | 
			
		||||
            "foo",
 | 
			
		||||
            "",
 | 
			
		||||
            "  bar",
 | 
			
		||||
            "    foo",
 | 
			
		||||
            "    bar",
 | 
			
		||||
            "    baz",
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        assert_eq!(dedent(&x), y);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    #[rustfmt::skip]
 | 
			
		||||
    fn dedent_whitespace_line() {
 | 
			
		||||
        let x = [
 | 
			
		||||
            "      foo",
 | 
			
		||||
            " ",
 | 
			
		||||
            "        bar",
 | 
			
		||||
            "          foo",
 | 
			
		||||
            "          bar",
 | 
			
		||||
            "          baz",
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        let y = [
 | 
			
		||||
            "foo",
 | 
			
		||||
            "",
 | 
			
		||||
            "  bar",
 | 
			
		||||
            "    foo",
 | 
			
		||||
            "    bar",
 | 
			
		||||
            "    baz",
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        assert_eq!(dedent(&x), y);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    #[rustfmt::skip]
 | 
			
		||||
    fn dedent_mixed_whitespace() {
 | 
			
		||||
        let x = [
 | 
			
		||||
            "\tfoo",
 | 
			
		||||
            "  bar",
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        let y = [
 | 
			
		||||
            "\tfoo",
 | 
			
		||||
            "  bar",
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        assert_eq!(dedent(&x), y);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    #[rustfmt::skip]
 | 
			
		||||
    fn dedent_tabbed_whitespace() {
 | 
			
		||||
        let x = [
 | 
			
		||||
            "\t\tfoo",
 | 
			
		||||
            "\t\t\tbar",
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        let y = [
 | 
			
		||||
            "foo",
 | 
			
		||||
            "\tbar",
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        assert_eq!(dedent(&x), y);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    #[rustfmt::skip]
 | 
			
		||||
    fn dedent_mixed_tabbed_whitespace() {
 | 
			
		||||
        let x = [
 | 
			
		||||
            "\t  \tfoo",
 | 
			
		||||
            "\t  \t\tbar",
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        let y = [
 | 
			
		||||
            "foo",
 | 
			
		||||
            "\tbar",
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        assert_eq!(dedent(&x), y);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    #[rustfmt::skip]
 | 
			
		||||
    fn dedent_mixed_tabbed_whitespace2() {
 | 
			
		||||
        let x = [
 | 
			
		||||
            "\t  \tfoo",
 | 
			
		||||
            "\t    \tbar",
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        let y = [
 | 
			
		||||
            "\tfoo",
 | 
			
		||||
            "  \tbar",
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        assert_eq!(dedent(&x), y);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    #[rustfmt::skip]
 | 
			
		||||
    fn dedent_preserve_no_terminating_newline() {
 | 
			
		||||
        let x = [
 | 
			
		||||
            "  foo",
 | 
			
		||||
            "    bar",
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        let y = [
 | 
			
		||||
            "foo",
 | 
			
		||||
            "  bar",
 | 
			
		||||
        ].join("\n");
 | 
			
		||||
        assert_eq!(dedent(&x), y);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										1847
									
								
								vendor/textwrap/src/lib.rs
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1847
									
								
								vendor/textwrap/src/lib.rs
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										428
									
								
								vendor/textwrap/src/word_separators.rs
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										428
									
								
								vendor/textwrap/src/word_separators.rs
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,428 @@
 | 
			
		||||
//! Functionality for finding words.
 | 
			
		||||
//!
 | 
			
		||||
//! In order to wrap text, we need to know where the legal break
 | 
			
		||||
//! points are, i.e., where the words of the text are. This means that
 | 
			
		||||
//! we need to define what a "word" is.
 | 
			
		||||
//!
 | 
			
		||||
//! A simple approach is to simply split the text on whitespace, but
 | 
			
		||||
//! this does not work for East-Asian languages such as Chinese or
 | 
			
		||||
//! Japanese where there are no spaces between words. Breaking a long
 | 
			
		||||
//! sequence of emojis is another example where line breaks might be
 | 
			
		||||
//! wanted even if there are no whitespace to be found.
 | 
			
		||||
//!
 | 
			
		||||
//! The [`WordSeparator`] trait is responsible for determining where
 | 
			
		||||
//! there words are in a line of text. Please refer to the trait and
 | 
			
		||||
//! the structs which implement it for more information.
 | 
			
		||||
 | 
			
		||||
#[cfg(feature = "unicode-linebreak")]
 | 
			
		||||
use crate::core::skip_ansi_escape_sequence;
 | 
			
		||||
use crate::core::Word;
 | 
			
		||||
 | 
			
		||||
/// Describes where words occur in a line of text.
 | 
			
		||||
///
 | 
			
		||||
/// The simplest approach is say that words are separated by one or
 | 
			
		||||
/// more ASCII spaces (`' '`). This works for Western languages
 | 
			
		||||
/// without emojis. A more complex approach is to use the Unicode line
 | 
			
		||||
/// breaking algorithm, which finds break points in non-ASCII text.
 | 
			
		||||
///
 | 
			
		||||
/// The line breaks occur between words, please see
 | 
			
		||||
/// [`WordSplitter`](crate::WordSplitter) for options of how to handle
 | 
			
		||||
/// hyphenation of individual words.
 | 
			
		||||
///
 | 
			
		||||
/// # Examples
 | 
			
		||||
///
 | 
			
		||||
/// ```
 | 
			
		||||
/// use textwrap::core::Word;
 | 
			
		||||
/// use textwrap::WordSeparator::AsciiSpace;
 | 
			
		||||
///
 | 
			
		||||
/// let words = AsciiSpace.find_words("Hello World!").collect::<Vec<_>>();
 | 
			
		||||
/// assert_eq!(words, vec![Word::from("Hello "), Word::from("World!")]);
 | 
			
		||||
/// ```
 | 
			
		||||
#[derive(Clone, Copy)]
 | 
			
		||||
pub enum WordSeparator {
 | 
			
		||||
    /// Find words by splitting on runs of `' '` characters.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Examples
 | 
			
		||||
    ///
 | 
			
		||||
    /// ```
 | 
			
		||||
    /// use textwrap::core::Word;
 | 
			
		||||
    /// use textwrap::WordSeparator::AsciiSpace;
 | 
			
		||||
    ///
 | 
			
		||||
    /// let words = AsciiSpace.find_words("Hello   World!").collect::<Vec<_>>();
 | 
			
		||||
    /// assert_eq!(words, vec![Word::from("Hello   "),
 | 
			
		||||
    ///                        Word::from("World!")]);
 | 
			
		||||
    /// ```
 | 
			
		||||
    AsciiSpace,
 | 
			
		||||
 | 
			
		||||
    /// Split `line` into words using Unicode break properties.
 | 
			
		||||
    ///
 | 
			
		||||
    /// This word separator uses the Unicode line breaking algorithm
 | 
			
		||||
    /// described in [Unicode Standard Annex
 | 
			
		||||
    /// #14](https://www.unicode.org/reports/tr14/) to find legal places
 | 
			
		||||
    /// to break lines. There is a small difference in that the U+002D
 | 
			
		||||
    /// (Hyphen-Minus) and U+00AD (Soft Hyphen) don’t create a line break:
 | 
			
		||||
    /// to allow a line break at a hyphen, use
 | 
			
		||||
    /// [`WordSplitter::HyphenSplitter`](crate::WordSplitter::HyphenSplitter).
 | 
			
		||||
    /// Soft hyphens are not currently supported.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Examples
 | 
			
		||||
    ///
 | 
			
		||||
    /// Unlike [`WordSeparator::AsciiSpace`], the Unicode line
 | 
			
		||||
    /// breaking algorithm will find line break opportunities between
 | 
			
		||||
    /// some characters with no intervening whitespace:
 | 
			
		||||
    ///
 | 
			
		||||
    /// ```
 | 
			
		||||
    /// #[cfg(feature = "unicode-linebreak")] {
 | 
			
		||||
    /// use textwrap::core::Word;
 | 
			
		||||
    /// use textwrap::WordSeparator::UnicodeBreakProperties;
 | 
			
		||||
    ///
 | 
			
		||||
    /// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂😍").collect::<Vec<_>>(),
 | 
			
		||||
    ///            vec![Word::from("Emojis: "),
 | 
			
		||||
    ///                 Word::from("😂"),
 | 
			
		||||
    ///                 Word::from("😍")]);
 | 
			
		||||
    ///
 | 
			
		||||
    /// assert_eq!(UnicodeBreakProperties.find_words("CJK: 你好").collect::<Vec<_>>(),
 | 
			
		||||
    ///            vec![Word::from("CJK: "),
 | 
			
		||||
    ///                 Word::from("你"),
 | 
			
		||||
    ///                 Word::from("好")]);
 | 
			
		||||
    /// }
 | 
			
		||||
    /// ```
 | 
			
		||||
    ///
 | 
			
		||||
    /// A U+2060 (Word Joiner) character can be inserted if you want to
 | 
			
		||||
    /// manually override the defaults and keep the characters together:
 | 
			
		||||
    ///
 | 
			
		||||
    /// ```
 | 
			
		||||
    /// #[cfg(feature = "unicode-linebreak")] {
 | 
			
		||||
    /// use textwrap::core::Word;
 | 
			
		||||
    /// use textwrap::WordSeparator::UnicodeBreakProperties;
 | 
			
		||||
    ///
 | 
			
		||||
    /// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂\u{2060}😍").collect::<Vec<_>>(),
 | 
			
		||||
    ///            vec![Word::from("Emojis: "),
 | 
			
		||||
    ///                 Word::from("😂\u{2060}😍")]);
 | 
			
		||||
    /// }
 | 
			
		||||
    /// ```
 | 
			
		||||
    ///
 | 
			
		||||
    /// The Unicode line breaking algorithm will also automatically
 | 
			
		||||
    /// suppress break breaks around certain punctuation characters::
 | 
			
		||||
    ///
 | 
			
		||||
    /// ```
 | 
			
		||||
    /// #[cfg(feature = "unicode-linebreak")] {
 | 
			
		||||
    /// use textwrap::core::Word;
 | 
			
		||||
    /// use textwrap::WordSeparator::UnicodeBreakProperties;
 | 
			
		||||
    ///
 | 
			
		||||
    /// assert_eq!(UnicodeBreakProperties.find_words("[ foo ] bar !").collect::<Vec<_>>(),
 | 
			
		||||
    ///            vec![Word::from("[ foo ] "),
 | 
			
		||||
    ///                 Word::from("bar !")]);
 | 
			
		||||
    /// }
 | 
			
		||||
    /// ```
 | 
			
		||||
    #[cfg(feature = "unicode-linebreak")]
 | 
			
		||||
    UnicodeBreakProperties,
 | 
			
		||||
 | 
			
		||||
    /// Find words using a custom word separator
 | 
			
		||||
    Custom(fn(line: &str) -> Box<dyn Iterator<Item = Word<'_>> + '_>),
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl std::fmt::Debug for WordSeparator {
 | 
			
		||||
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 | 
			
		||||
        match self {
 | 
			
		||||
            WordSeparator::AsciiSpace => f.write_str("AsciiSpace"),
 | 
			
		||||
            #[cfg(feature = "unicode-linebreak")]
 | 
			
		||||
            WordSeparator::UnicodeBreakProperties => f.write_str("UnicodeBreakProperties"),
 | 
			
		||||
            WordSeparator::Custom(_) => f.write_str("Custom(...)"),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl WordSeparator {
 | 
			
		||||
    // This function should really return impl Iterator<Item = Word>, but
 | 
			
		||||
    // this isn't possible until Rust supports higher-kinded types:
 | 
			
		||||
    // https://github.com/rust-lang/rfcs/blob/master/text/1522-conservative-impl-trait.md
 | 
			
		||||
    /// Find all words in `line`.
 | 
			
		||||
    pub fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
 | 
			
		||||
        match self {
 | 
			
		||||
            WordSeparator::AsciiSpace => find_words_ascii_space(line),
 | 
			
		||||
            #[cfg(feature = "unicode-linebreak")]
 | 
			
		||||
            WordSeparator::UnicodeBreakProperties => find_words_unicode_break_properties(line),
 | 
			
		||||
            WordSeparator::Custom(func) => func(line),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn find_words_ascii_space<'a>(line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
 | 
			
		||||
    let mut start = 0;
 | 
			
		||||
    let mut in_whitespace = false;
 | 
			
		||||
    let mut char_indices = line.char_indices();
 | 
			
		||||
 | 
			
		||||
    Box::new(std::iter::from_fn(move || {
 | 
			
		||||
        // for (idx, ch) in char_indices does not work, gives this
 | 
			
		||||
        // error:
 | 
			
		||||
        //
 | 
			
		||||
        // > cannot move out of `char_indices`, a captured variable in
 | 
			
		||||
        // > an `FnMut` closure
 | 
			
		||||
        #[allow(clippy::while_let_on_iterator)]
 | 
			
		||||
        while let Some((idx, ch)) = char_indices.next() {
 | 
			
		||||
            if in_whitespace && ch != ' ' {
 | 
			
		||||
                let word = Word::from(&line[start..idx]);
 | 
			
		||||
                start = idx;
 | 
			
		||||
                in_whitespace = ch == ' ';
 | 
			
		||||
                return Some(word);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            in_whitespace = ch == ' ';
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if start < line.len() {
 | 
			
		||||
            let word = Word::from(&line[start..]);
 | 
			
		||||
            start = line.len();
 | 
			
		||||
            return Some(word);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        None
 | 
			
		||||
    }))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Strip all ANSI escape sequences from `text`.
 | 
			
		||||
#[cfg(feature = "unicode-linebreak")]
 | 
			
		||||
fn strip_ansi_escape_sequences(text: &str) -> String {
 | 
			
		||||
    let mut result = String::with_capacity(text.len());
 | 
			
		||||
 | 
			
		||||
    let mut chars = text.chars();
 | 
			
		||||
    while let Some(ch) = chars.next() {
 | 
			
		||||
        if skip_ansi_escape_sequence(ch, &mut chars) {
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        result.push(ch);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    result
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Soft hyphen, also knows as a “shy hyphen”. Should show up as ‘-’
 | 
			
		||||
/// if a line is broken at this point, and otherwise be invisible.
 | 
			
		||||
/// Textwrap does not currently support breaking words at soft
 | 
			
		||||
/// hyphens.
 | 
			
		||||
#[cfg(feature = "unicode-linebreak")]
 | 
			
		||||
const SHY: char = '\u{00ad}';
 | 
			
		||||
 | 
			
		||||
/// Find words in line. ANSI escape sequences are ignored in `line`.
 | 
			
		||||
#[cfg(feature = "unicode-linebreak")]
 | 
			
		||||
fn find_words_unicode_break_properties<'a>(
 | 
			
		||||
    line: &'a str,
 | 
			
		||||
) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
 | 
			
		||||
    // Construct an iterator over (original index, stripped index)
 | 
			
		||||
    // tuples. We find the Unicode linebreaks on a stripped string,
 | 
			
		||||
    // but we need the original indices so we can form words based on
 | 
			
		||||
    // the original string.
 | 
			
		||||
    let mut last_stripped_idx = 0;
 | 
			
		||||
    let mut char_indices = line.char_indices();
 | 
			
		||||
    let mut idx_map = std::iter::from_fn(move || match char_indices.next() {
 | 
			
		||||
        Some((orig_idx, ch)) => {
 | 
			
		||||
            let stripped_idx = last_stripped_idx;
 | 
			
		||||
            if !skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) {
 | 
			
		||||
                last_stripped_idx += ch.len_utf8();
 | 
			
		||||
            }
 | 
			
		||||
            Some((orig_idx, stripped_idx))
 | 
			
		||||
        }
 | 
			
		||||
        None => None,
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    let stripped = strip_ansi_escape_sequences(line);
 | 
			
		||||
    let mut opportunities = unicode_linebreak::linebreaks(&stripped)
 | 
			
		||||
        .filter(|(idx, _)| {
 | 
			
		||||
            #[allow(clippy::match_like_matches_macro)]
 | 
			
		||||
            match &stripped[..*idx].chars().next_back() {
 | 
			
		||||
                // We suppress breaks at ‘-’ since we want to control
 | 
			
		||||
                // this via the WordSplitter.
 | 
			
		||||
                Some('-') => false,
 | 
			
		||||
                // Soft hyphens are currently not supported since we
 | 
			
		||||
                // require all `Word` fragments to be continuous in
 | 
			
		||||
                // the input string.
 | 
			
		||||
                Some(SHY) => false,
 | 
			
		||||
                // Other breaks should be fine!
 | 
			
		||||
                _ => true,
 | 
			
		||||
            }
 | 
			
		||||
        })
 | 
			
		||||
        .collect::<Vec<_>>()
 | 
			
		||||
        .into_iter();
 | 
			
		||||
 | 
			
		||||
    // Remove final break opportunity, we will add it below using
 | 
			
		||||
    // &line[start..]; This ensures that we correctly include a
 | 
			
		||||
    // trailing ANSI escape sequence.
 | 
			
		||||
    opportunities.next_back();
 | 
			
		||||
 | 
			
		||||
    let mut start = 0;
 | 
			
		||||
    Box::new(std::iter::from_fn(move || {
 | 
			
		||||
        #[allow(clippy::while_let_on_iterator)]
 | 
			
		||||
        while let Some((idx, _)) = opportunities.next() {
 | 
			
		||||
            if let Some((orig_idx, _)) = idx_map.find(|&(_, stripped_idx)| stripped_idx == idx) {
 | 
			
		||||
                let word = Word::from(&line[start..orig_idx]);
 | 
			
		||||
                start = orig_idx;
 | 
			
		||||
                return Some(word);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if start < line.len() {
 | 
			
		||||
            let word = Word::from(&line[start..]);
 | 
			
		||||
            start = line.len();
 | 
			
		||||
            return Some(word);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        None
 | 
			
		||||
    }))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[cfg(test)]
 | 
			
		||||
mod tests {
 | 
			
		||||
    use super::WordSeparator::*;
 | 
			
		||||
    use super::*;
 | 
			
		||||
 | 
			
		||||
    // Like assert_eq!, but the left expression is an iterator.
 | 
			
		||||
    macro_rules! assert_iter_eq {
 | 
			
		||||
        ($left:expr, $right:expr) => {
 | 
			
		||||
            assert_eq!($left.collect::<Vec<_>>(), $right);
 | 
			
		||||
        };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn to_words<'a>(words: Vec<&'a str>) -> Vec<Word<'a>> {
 | 
			
		||||
        words.into_iter().map(|w: &str| Word::from(&w)).collect()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    macro_rules! test_find_words {
 | 
			
		||||
        ($ascii_name:ident,
 | 
			
		||||
         $unicode_name:ident,
 | 
			
		||||
         $([ $line:expr, $ascii_words:expr, $unicode_words:expr ]),+) => {
 | 
			
		||||
            #[test]
 | 
			
		||||
            fn $ascii_name() {
 | 
			
		||||
                $(
 | 
			
		||||
                    let expected_words = to_words($ascii_words.to_vec());
 | 
			
		||||
                    let actual_words = WordSeparator::AsciiSpace
 | 
			
		||||
                        .find_words($line)
 | 
			
		||||
                        .collect::<Vec<_>>();
 | 
			
		||||
                    assert_eq!(actual_words, expected_words, "Line: {:?}", $line);
 | 
			
		||||
                )+
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            #[test]
 | 
			
		||||
            #[cfg(feature = "unicode-linebreak")]
 | 
			
		||||
            fn $unicode_name() {
 | 
			
		||||
                $(
 | 
			
		||||
                    let expected_words = to_words($unicode_words.to_vec());
 | 
			
		||||
                    let actual_words = WordSeparator::UnicodeBreakProperties
 | 
			
		||||
                        .find_words($line)
 | 
			
		||||
                        .collect::<Vec<_>>();
 | 
			
		||||
                    assert_eq!(actual_words, expected_words, "Line: {:?}", $line);
 | 
			
		||||
                )+
 | 
			
		||||
            }
 | 
			
		||||
        };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    test_find_words!(ascii_space_empty, unicode_empty, ["", [], []]);
 | 
			
		||||
 | 
			
		||||
    test_find_words!(
 | 
			
		||||
        ascii_single_word,
 | 
			
		||||
        unicode_single_word,
 | 
			
		||||
        ["foo", ["foo"], ["foo"]]
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    test_find_words!(
 | 
			
		||||
        ascii_two_words,
 | 
			
		||||
        unicode_two_words,
 | 
			
		||||
        ["foo bar", ["foo ", "bar"], ["foo ", "bar"]]
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    test_find_words!(
 | 
			
		||||
        ascii_multiple_words,
 | 
			
		||||
        unicode_multiple_words,
 | 
			
		||||
        ["foo bar", ["foo ", "bar"], ["foo ", "bar"]],
 | 
			
		||||
        ["x y z", ["x ", "y ", "z"], ["x ", "y ", "z"]]
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    test_find_words!(
 | 
			
		||||
        ascii_only_whitespace,
 | 
			
		||||
        unicode_only_whitespace,
 | 
			
		||||
        [" ", [" "], [" "]],
 | 
			
		||||
        ["    ", ["    "], ["    "]]
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    test_find_words!(
 | 
			
		||||
        ascii_inter_word_whitespace,
 | 
			
		||||
        unicode_inter_word_whitespace,
 | 
			
		||||
        ["foo   bar", ["foo   ", "bar"], ["foo   ", "bar"]]
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    test_find_words!(
 | 
			
		||||
        ascii_trailing_whitespace,
 | 
			
		||||
        unicode_trailing_whitespace,
 | 
			
		||||
        ["foo   ", ["foo   "], ["foo   "]]
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    test_find_words!(
 | 
			
		||||
        ascii_leading_whitespace,
 | 
			
		||||
        unicode_leading_whitespace,
 | 
			
		||||
        ["   foo", ["   ", "foo"], ["   ", "foo"]]
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    test_find_words!(
 | 
			
		||||
        ascii_multi_column_char,
 | 
			
		||||
        unicode_multi_column_char,
 | 
			
		||||
        ["\u{1f920}", ["\u{1f920}"], ["\u{1f920}"]] // cowboy emoji 🤠
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    test_find_words!(
 | 
			
		||||
        ascii_hyphens,
 | 
			
		||||
        unicode_hyphens,
 | 
			
		||||
        ["foo-bar", ["foo-bar"], ["foo-bar"]],
 | 
			
		||||
        ["foo- bar", ["foo- ", "bar"], ["foo- ", "bar"]],
 | 
			
		||||
        ["foo - bar", ["foo ", "- ", "bar"], ["foo ", "- ", "bar"]],
 | 
			
		||||
        ["foo -bar", ["foo ", "-bar"], ["foo ", "-bar"]]
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    test_find_words!(
 | 
			
		||||
        ascii_newline,
 | 
			
		||||
        unicode_newline,
 | 
			
		||||
        ["foo\nbar", ["foo\nbar"], ["foo\n", "bar"]]
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    test_find_words!(
 | 
			
		||||
        ascii_tab,
 | 
			
		||||
        unicode_tab,
 | 
			
		||||
        ["foo\tbar", ["foo\tbar"], ["foo\t", "bar"]]
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    test_find_words!(
 | 
			
		||||
        ascii_non_breaking_space,
 | 
			
		||||
        unicode_non_breaking_space,
 | 
			
		||||
        ["foo\u{00A0}bar", ["foo\u{00A0}bar"], ["foo\u{00A0}bar"]]
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    #[cfg(unix)]
 | 
			
		||||
    fn find_words_colored_text() {
 | 
			
		||||
        use termion::color::{Blue, Fg, Green, Reset};
 | 
			
		||||
 | 
			
		||||
        let green_hello = format!("{}Hello{} ", Fg(Green), Fg(Reset));
 | 
			
		||||
        let blue_world = format!("{}World!{}", Fg(Blue), Fg(Reset));
 | 
			
		||||
        assert_iter_eq!(
 | 
			
		||||
            AsciiSpace.find_words(&format!("{}{}", green_hello, blue_world)),
 | 
			
		||||
            vec![Word::from(&green_hello), Word::from(&blue_world)]
 | 
			
		||||
        );
 | 
			
		||||
 | 
			
		||||
        #[cfg(feature = "unicode-linebreak")]
 | 
			
		||||
        assert_iter_eq!(
 | 
			
		||||
            UnicodeBreakProperties.find_words(&format!("{}{}", green_hello, blue_world)),
 | 
			
		||||
            vec![Word::from(&green_hello), Word::from(&blue_world)]
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn find_words_color_inside_word() {
 | 
			
		||||
        let text = "foo\u{1b}[0m\u{1b}[32mbar\u{1b}[0mbaz";
 | 
			
		||||
        assert_iter_eq!(AsciiSpace.find_words(&text), vec![Word::from(text)]);
 | 
			
		||||
 | 
			
		||||
        #[cfg(feature = "unicode-linebreak")]
 | 
			
		||||
        assert_iter_eq!(
 | 
			
		||||
            UnicodeBreakProperties.find_words(&text),
 | 
			
		||||
            vec![Word::from(text)]
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										314
									
								
								vendor/textwrap/src/word_splitters.rs
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										314
									
								
								vendor/textwrap/src/word_splitters.rs
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,314 @@
 | 
			
		||||
//! Word splitting functionality.
 | 
			
		||||
//!
 | 
			
		||||
//! To wrap text into lines, long words sometimes need to be split
 | 
			
		||||
//! across lines. The [`WordSplitter`] enum defines this
 | 
			
		||||
//! functionality.
 | 
			
		||||
 | 
			
		||||
use crate::core::{display_width, Word};
 | 
			
		||||
 | 
			
		||||
/// The `WordSplitter` enum describes where words can be split.
 | 
			
		||||
///
 | 
			
		||||
/// If the textwrap crate has been compiled with the `hyphenation`
 | 
			
		||||
/// Cargo feature enabled, you will find a
 | 
			
		||||
/// [`WordSplitter::Hyphenation`] variant. Use this struct for
 | 
			
		||||
/// language-aware hyphenation:
 | 
			
		||||
///
 | 
			
		||||
/// ```
 | 
			
		||||
/// #[cfg(feature = "hyphenation")] {
 | 
			
		||||
///     use hyphenation::{Language, Load, Standard};
 | 
			
		||||
///     use textwrap::{wrap, Options, WordSplitter};
 | 
			
		||||
///
 | 
			
		||||
///     let text = "Oxidation is the loss of electrons.";
 | 
			
		||||
///     let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
 | 
			
		||||
///     let options = Options::new(8).word_splitter(WordSplitter::Hyphenation(dictionary));
 | 
			
		||||
///     assert_eq!(wrap(text, &options), vec!["Oxida-",
 | 
			
		||||
///                                           "tion is",
 | 
			
		||||
///                                           "the loss",
 | 
			
		||||
///                                           "of elec-",
 | 
			
		||||
///                                           "trons."]);
 | 
			
		||||
/// }
 | 
			
		||||
/// ```
 | 
			
		||||
///
 | 
			
		||||
/// Please see the documentation for the [hyphenation] crate for more
 | 
			
		||||
/// details.
 | 
			
		||||
///
 | 
			
		||||
/// [hyphenation]: https://docs.rs/hyphenation/
 | 
			
		||||
#[derive(Clone)]
 | 
			
		||||
pub enum WordSplitter {
 | 
			
		||||
    /// Use this as a [`Options.word_splitter`] to avoid any kind of
 | 
			
		||||
    /// hyphenation:
 | 
			
		||||
    ///
 | 
			
		||||
    /// ```
 | 
			
		||||
    /// use textwrap::{wrap, Options, WordSplitter};
 | 
			
		||||
    ///
 | 
			
		||||
    /// let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation);
 | 
			
		||||
    /// assert_eq!(wrap("foo bar-baz", &options),
 | 
			
		||||
    ///            vec!["foo", "bar-baz"]);
 | 
			
		||||
    /// ```
 | 
			
		||||
    ///
 | 
			
		||||
    /// [`Options.word_splitter`]: super::Options::word_splitter
 | 
			
		||||
    NoHyphenation,
 | 
			
		||||
 | 
			
		||||
    /// `HyphenSplitter` is the default `WordSplitter` used by
 | 
			
		||||
    /// [`Options::new`](super::Options::new). It will split words on
 | 
			
		||||
    /// existing hyphens in the word.
 | 
			
		||||
    ///
 | 
			
		||||
    /// It will only use hyphens that are surrounded by alphanumeric
 | 
			
		||||
    /// characters, which prevents a word like `"--foo-bar"` from
 | 
			
		||||
    /// being split into `"--"` and `"foo-bar"`.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Examples
 | 
			
		||||
    ///
 | 
			
		||||
    /// ```
 | 
			
		||||
    /// use textwrap::WordSplitter;
 | 
			
		||||
    ///
 | 
			
		||||
    /// assert_eq!(WordSplitter::HyphenSplitter.split_points("--foo-bar"),
 | 
			
		||||
    ///            vec![6]);
 | 
			
		||||
    /// ```
 | 
			
		||||
    HyphenSplitter,
 | 
			
		||||
 | 
			
		||||
    /// Use a custom function as the word splitter.
 | 
			
		||||
    ///
 | 
			
		||||
    /// This varian lets you implement a custom word splitter using
 | 
			
		||||
    /// your own function.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Examples
 | 
			
		||||
    ///
 | 
			
		||||
    /// ```
 | 
			
		||||
    /// use textwrap::WordSplitter;
 | 
			
		||||
    ///
 | 
			
		||||
    /// fn split_at_underscore(word: &str) -> Vec<usize> {
 | 
			
		||||
    ///     word.match_indices('_').map(|(idx, _)| idx + 1).collect()
 | 
			
		||||
    /// }
 | 
			
		||||
    ///
 | 
			
		||||
    /// let word_splitter = WordSplitter::Custom(split_at_underscore);
 | 
			
		||||
    /// assert_eq!(word_splitter.split_points("a_long_identifier"),
 | 
			
		||||
    ///            vec![2, 7]);
 | 
			
		||||
    /// ```
 | 
			
		||||
    Custom(fn(word: &str) -> Vec<usize>),
 | 
			
		||||
 | 
			
		||||
    /// A hyphenation dictionary can be used to do language-specific
 | 
			
		||||
    /// hyphenation using patterns from the [hyphenation] crate.
 | 
			
		||||
    ///
 | 
			
		||||
    /// **Note:** Only available when the `hyphenation` Cargo feature is
 | 
			
		||||
    /// enabled.
 | 
			
		||||
    ///
 | 
			
		||||
    /// [hyphenation]: https://docs.rs/hyphenation/
 | 
			
		||||
    #[cfg(feature = "hyphenation")]
 | 
			
		||||
    Hyphenation(hyphenation::Standard),
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl std::fmt::Debug for WordSplitter {
 | 
			
		||||
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 | 
			
		||||
        match self {
 | 
			
		||||
            WordSplitter::NoHyphenation => f.write_str("NoHyphenation"),
 | 
			
		||||
            WordSplitter::HyphenSplitter => f.write_str("HyphenSplitter"),
 | 
			
		||||
            WordSplitter::Custom(_) => f.write_str("Custom(...)"),
 | 
			
		||||
            #[cfg(feature = "hyphenation")]
 | 
			
		||||
            WordSplitter::Hyphenation(dict) => write!(f, "Hyphenation({})", dict.language()),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl PartialEq<WordSplitter> for WordSplitter {
 | 
			
		||||
    fn eq(&self, other: &WordSplitter) -> bool {
 | 
			
		||||
        match (self, other) {
 | 
			
		||||
            (WordSplitter::NoHyphenation, WordSplitter::NoHyphenation) => true,
 | 
			
		||||
            (WordSplitter::HyphenSplitter, WordSplitter::HyphenSplitter) => true,
 | 
			
		||||
            #[cfg(feature = "hyphenation")]
 | 
			
		||||
            (WordSplitter::Hyphenation(this_dict), WordSplitter::Hyphenation(other_dict)) => {
 | 
			
		||||
                this_dict.language() == other_dict.language()
 | 
			
		||||
            }
 | 
			
		||||
            (_, _) => false,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl WordSplitter {
 | 
			
		||||
    /// Return all possible indices where `word` can be split.
 | 
			
		||||
    ///
 | 
			
		||||
    /// The indices are in the range `0..word.len()`. They point to
 | 
			
		||||
    /// the index _after_ the split point, i.e., after `-` if
 | 
			
		||||
    /// splitting on hyphens. This way, `word.split_at(idx)` will
 | 
			
		||||
    /// break the word into two well-formed pieces.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Examples
 | 
			
		||||
    ///
 | 
			
		||||
    /// ```
 | 
			
		||||
    /// use textwrap::WordSplitter;
 | 
			
		||||
    /// assert_eq!(WordSplitter::NoHyphenation.split_points("cannot-be-split"), vec![]);
 | 
			
		||||
    /// assert_eq!(WordSplitter::HyphenSplitter.split_points("can-be-split"), vec![4, 7]);
 | 
			
		||||
    /// assert_eq!(WordSplitter::Custom(|word| vec![word.len()/2]).split_points("middle"), vec![3]);
 | 
			
		||||
    /// ```
 | 
			
		||||
    pub fn split_points(&self, word: &str) -> Vec<usize> {
 | 
			
		||||
        match self {
 | 
			
		||||
            WordSplitter::NoHyphenation => Vec::new(),
 | 
			
		||||
            WordSplitter::HyphenSplitter => {
 | 
			
		||||
                let mut splits = Vec::new();
 | 
			
		||||
 | 
			
		||||
                for (idx, _) in word.match_indices('-') {
 | 
			
		||||
                    // We only use hyphens that are surrounded by alphanumeric
 | 
			
		||||
                    // characters. This is to avoid splitting on repeated hyphens,
 | 
			
		||||
                    // such as those found in --foo-bar.
 | 
			
		||||
                    let prev = word[..idx].chars().next_back();
 | 
			
		||||
                    let next = word[idx + 1..].chars().next();
 | 
			
		||||
 | 
			
		||||
                    if prev.filter(|ch| ch.is_alphanumeric()).is_some()
 | 
			
		||||
                        && next.filter(|ch| ch.is_alphanumeric()).is_some()
 | 
			
		||||
                    {
 | 
			
		||||
                        splits.push(idx + 1); // +1 due to width of '-'.
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                splits
 | 
			
		||||
            }
 | 
			
		||||
            WordSplitter::Custom(splitter_func) => splitter_func(word),
 | 
			
		||||
            #[cfg(feature = "hyphenation")]
 | 
			
		||||
            WordSplitter::Hyphenation(dictionary) => {
 | 
			
		||||
                use hyphenation::Hyphenator;
 | 
			
		||||
                dictionary.hyphenate(word).breaks
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Split words into smaller words according to the split points given
 | 
			
		||||
/// by `word_splitter`.
 | 
			
		||||
///
 | 
			
		||||
/// Note that we split all words, regardless of their length. This is
 | 
			
		||||
/// to more cleanly separate the business of splitting (including
 | 
			
		||||
/// automatic hyphenation) from the business of word wrapping.
 | 
			
		||||
pub fn split_words<'a, I>(
 | 
			
		||||
    words: I,
 | 
			
		||||
    word_splitter: &'a WordSplitter,
 | 
			
		||||
) -> impl Iterator<Item = Word<'a>>
 | 
			
		||||
where
 | 
			
		||||
    I: IntoIterator<Item = Word<'a>>,
 | 
			
		||||
{
 | 
			
		||||
    words.into_iter().flat_map(move |word| {
 | 
			
		||||
        let mut prev = 0;
 | 
			
		||||
        let mut split_points = word_splitter.split_points(&word).into_iter();
 | 
			
		||||
        std::iter::from_fn(move || {
 | 
			
		||||
            if let Some(idx) = split_points.next() {
 | 
			
		||||
                let need_hyphen = !word[..idx].ends_with('-');
 | 
			
		||||
                let w = Word {
 | 
			
		||||
                    word: &word.word[prev..idx],
 | 
			
		||||
                    width: display_width(&word[prev..idx]),
 | 
			
		||||
                    whitespace: "",
 | 
			
		||||
                    penalty: if need_hyphen { "-" } else { "" },
 | 
			
		||||
                };
 | 
			
		||||
                prev = idx;
 | 
			
		||||
                return Some(w);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if prev < word.word.len() || prev == 0 {
 | 
			
		||||
                let w = Word {
 | 
			
		||||
                    word: &word.word[prev..],
 | 
			
		||||
                    width: display_width(&word[prev..]),
 | 
			
		||||
                    whitespace: word.whitespace,
 | 
			
		||||
                    penalty: word.penalty,
 | 
			
		||||
                };
 | 
			
		||||
                prev = word.word.len() + 1;
 | 
			
		||||
                return Some(w);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            None
 | 
			
		||||
        })
 | 
			
		||||
    })
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[cfg(test)]
 | 
			
		||||
mod tests {
 | 
			
		||||
    use super::*;
 | 
			
		||||
 | 
			
		||||
    // Like assert_eq!, but the left expression is an iterator.
 | 
			
		||||
    macro_rules! assert_iter_eq {
 | 
			
		||||
        ($left:expr, $right:expr) => {
 | 
			
		||||
            assert_eq!($left.collect::<Vec<_>>(), $right);
 | 
			
		||||
        };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn split_words_no_words() {
 | 
			
		||||
        assert_iter_eq!(split_words(vec![], &WordSplitter::HyphenSplitter), vec![]);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn split_words_empty_word() {
 | 
			
		||||
        assert_iter_eq!(
 | 
			
		||||
            split_words(vec![Word::from("   ")], &WordSplitter::HyphenSplitter),
 | 
			
		||||
            vec![Word::from("   ")]
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn split_words_single_word() {
 | 
			
		||||
        assert_iter_eq!(
 | 
			
		||||
            split_words(vec![Word::from("foobar")], &WordSplitter::HyphenSplitter),
 | 
			
		||||
            vec![Word::from("foobar")]
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn split_words_hyphen_splitter() {
 | 
			
		||||
        assert_iter_eq!(
 | 
			
		||||
            split_words(vec![Word::from("foo-bar")], &WordSplitter::HyphenSplitter),
 | 
			
		||||
            vec![Word::from("foo-"), Word::from("bar")]
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn split_words_no_hyphenation() {
 | 
			
		||||
        assert_iter_eq!(
 | 
			
		||||
            split_words(vec![Word::from("foo-bar")], &WordSplitter::NoHyphenation),
 | 
			
		||||
            vec![Word::from("foo-bar")]
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn split_words_adds_penalty() {
 | 
			
		||||
        let fixed_split_point = |_: &str| vec![3];
 | 
			
		||||
 | 
			
		||||
        assert_iter_eq!(
 | 
			
		||||
            split_words(
 | 
			
		||||
                vec![Word::from("foobar")].into_iter(),
 | 
			
		||||
                &WordSplitter::Custom(fixed_split_point)
 | 
			
		||||
            ),
 | 
			
		||||
            vec![
 | 
			
		||||
                Word {
 | 
			
		||||
                    word: "foo",
 | 
			
		||||
                    width: 3,
 | 
			
		||||
                    whitespace: "",
 | 
			
		||||
                    penalty: "-"
 | 
			
		||||
                },
 | 
			
		||||
                Word {
 | 
			
		||||
                    word: "bar",
 | 
			
		||||
                    width: 3,
 | 
			
		||||
                    whitespace: "",
 | 
			
		||||
                    penalty: ""
 | 
			
		||||
                }
 | 
			
		||||
            ]
 | 
			
		||||
        );
 | 
			
		||||
 | 
			
		||||
        assert_iter_eq!(
 | 
			
		||||
            split_words(
 | 
			
		||||
                vec![Word::from("fo-bar")].into_iter(),
 | 
			
		||||
                &WordSplitter::Custom(fixed_split_point)
 | 
			
		||||
            ),
 | 
			
		||||
            vec![
 | 
			
		||||
                Word {
 | 
			
		||||
                    word: "fo-",
 | 
			
		||||
                    width: 3,
 | 
			
		||||
                    whitespace: "",
 | 
			
		||||
                    penalty: ""
 | 
			
		||||
                },
 | 
			
		||||
                Word {
 | 
			
		||||
                    word: "bar",
 | 
			
		||||
                    width: 3,
 | 
			
		||||
                    whitespace: "",
 | 
			
		||||
                    penalty: ""
 | 
			
		||||
                }
 | 
			
		||||
            ]
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										381
									
								
								vendor/textwrap/src/wrap_algorithms.rs
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										381
									
								
								vendor/textwrap/src/wrap_algorithms.rs
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,381 @@
 | 
			
		||||
//! Word wrapping algorithms.
 | 
			
		||||
//!
 | 
			
		||||
//! After a text has been broken into words (or [`Fragment`]s), one
 | 
			
		||||
//! now has to decide how to break the fragments into lines. The
 | 
			
		||||
//! simplest algorithm for this is implemented by [`wrap_first_fit`]:
 | 
			
		||||
//! it uses no look-ahead and simply adds fragments to the line as
 | 
			
		||||
//! long as they fit. However, this can lead to poor line breaks if a
 | 
			
		||||
//! large fragment almost-but-not-quite fits on a line. When that
 | 
			
		||||
//! happens, the fragment is moved to the next line and it will leave
 | 
			
		||||
//! behind a large gap. A more advanced algorithm, implemented by
 | 
			
		||||
//! [`wrap_optimal_fit`], will take this into account. The optimal-fit
 | 
			
		||||
//! algorithm considers all possible line breaks and will attempt to
 | 
			
		||||
//! minimize the gaps left behind by overly short lines.
 | 
			
		||||
//!
 | 
			
		||||
//! While both algorithms run in linear time, the first-fit algorithm
 | 
			
		||||
//! is about 4 times faster than the optimal-fit algorithm.
 | 
			
		||||
 | 
			
		||||
#[cfg(feature = "smawk")]
 | 
			
		||||
mod optimal_fit;
 | 
			
		||||
#[cfg(feature = "smawk")]
 | 
			
		||||
pub use optimal_fit::{wrap_optimal_fit, OverflowError, Penalties};
 | 
			
		||||
 | 
			
		||||
use crate::core::{Fragment, Word};
 | 
			
		||||
 | 
			
		||||
/// Describes how to wrap words into lines.
 | 
			
		||||
///
 | 
			
		||||
/// The simplest approach is to wrap words one word at a time and
 | 
			
		||||
/// accept the first way of wrapping which fit
 | 
			
		||||
/// ([`WrapAlgorithm::FirstFit`]). If the `smawk` Cargo feature is
 | 
			
		||||
/// enabled, a more complex algorithm is available which will look at
 | 
			
		||||
/// an entire paragraph at a time in order to find optimal line breaks
 | 
			
		||||
/// ([`WrapAlgorithm::OptimalFit`]).
 | 
			
		||||
#[derive(Clone, Copy)]
 | 
			
		||||
pub enum WrapAlgorithm {
 | 
			
		||||
    /// Wrap words using a fast and simple algorithm.
 | 
			
		||||
    ///
 | 
			
		||||
    /// This algorithm uses no look-ahead when finding line breaks.
 | 
			
		||||
    /// Implemented by [`wrap_first_fit`], please see that function for
 | 
			
		||||
    /// details and examples.
 | 
			
		||||
    FirstFit,
 | 
			
		||||
 | 
			
		||||
    /// Wrap words using an advanced algorithm with look-ahead.
 | 
			
		||||
    ///
 | 
			
		||||
    /// This wrapping algorithm considers the entire paragraph to find
 | 
			
		||||
    /// optimal line breaks. When wrapping text, "penalties" are
 | 
			
		||||
    /// assigned to line breaks based on the gaps left at the end of
 | 
			
		||||
    /// lines. See [`Penalties`] for details.
 | 
			
		||||
    ///
 | 
			
		||||
    /// The underlying wrapping algorithm is implemented by
 | 
			
		||||
    /// [`wrap_optimal_fit`], please see that function for examples.
 | 
			
		||||
    ///
 | 
			
		||||
    /// **Note:** Only available when the `smawk` Cargo feature is
 | 
			
		||||
    /// enabled.
 | 
			
		||||
    #[cfg(feature = "smawk")]
 | 
			
		||||
    OptimalFit(Penalties),
 | 
			
		||||
 | 
			
		||||
    /// Custom wrapping function.
 | 
			
		||||
    ///
 | 
			
		||||
    /// Use this if you want to implement your own wrapping algorithm.
 | 
			
		||||
    /// The function can freely decide how to turn a slice of
 | 
			
		||||
    /// [`Word`]s into lines.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Example
 | 
			
		||||
    ///
 | 
			
		||||
    /// ```
 | 
			
		||||
    /// use textwrap::core::Word;
 | 
			
		||||
    /// use textwrap::{wrap, Options, WrapAlgorithm};
 | 
			
		||||
    ///
 | 
			
		||||
    /// fn stair<'a, 'b>(words: &'b [Word<'a>], _: &'b [usize]) -> Vec<&'b [Word<'a>]> {
 | 
			
		||||
    ///     let mut lines = Vec::new();
 | 
			
		||||
    ///     let mut step = 1;
 | 
			
		||||
    ///     let mut start_idx = 0;
 | 
			
		||||
    ///     while start_idx + step <= words.len() {
 | 
			
		||||
    ///       lines.push(&words[start_idx .. start_idx+step]);
 | 
			
		||||
    ///       start_idx += step;
 | 
			
		||||
    ///       step += 1;
 | 
			
		||||
    ///     }
 | 
			
		||||
    ///     lines
 | 
			
		||||
    /// }
 | 
			
		||||
    ///
 | 
			
		||||
    /// let options = Options::new(10).wrap_algorithm(WrapAlgorithm::Custom(stair));
 | 
			
		||||
    /// assert_eq!(wrap("First, second, third, fourth, fifth, sixth", options),
 | 
			
		||||
    ///            vec!["First,",
 | 
			
		||||
    ///                 "second, third,",
 | 
			
		||||
    ///                 "fourth, fifth, sixth"]);
 | 
			
		||||
    /// ```
 | 
			
		||||
    Custom(for<'a, 'b> fn(words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>),
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl std::fmt::Debug for WrapAlgorithm {
 | 
			
		||||
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 | 
			
		||||
        match self {
 | 
			
		||||
            WrapAlgorithm::FirstFit => f.write_str("FirstFit"),
 | 
			
		||||
            #[cfg(feature = "smawk")]
 | 
			
		||||
            WrapAlgorithm::OptimalFit(penalties) => write!(f, "OptimalFit({:?})", penalties),
 | 
			
		||||
            WrapAlgorithm::Custom(_) => f.write_str("Custom(...)"),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl WrapAlgorithm {
 | 
			
		||||
    /// Create new wrap algorithm.
 | 
			
		||||
    ///
 | 
			
		||||
    /// The best wrapping algorithm is used by default, i.e.,
 | 
			
		||||
    /// [`WrapAlgorithm::OptimalFit`] if available, otherwise
 | 
			
		||||
    /// [`WrapAlgorithm::FirstFit`].
 | 
			
		||||
    pub const fn new() -> Self {
 | 
			
		||||
        #[cfg(not(feature = "smawk"))]
 | 
			
		||||
        {
 | 
			
		||||
            WrapAlgorithm::FirstFit
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        #[cfg(feature = "smawk")]
 | 
			
		||||
        {
 | 
			
		||||
            WrapAlgorithm::new_optimal_fit()
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// New [`WrapAlgorithm::OptimalFit`] with default penalties. This
 | 
			
		||||
    /// works well for monospace text.
 | 
			
		||||
    ///
 | 
			
		||||
    /// **Note:** Only available when the `smawk` Cargo feature is
 | 
			
		||||
    /// enabled.
 | 
			
		||||
    #[cfg(feature = "smawk")]
 | 
			
		||||
    pub const fn new_optimal_fit() -> Self {
 | 
			
		||||
        WrapAlgorithm::OptimalFit(Penalties::new())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Wrap words according to line widths.
 | 
			
		||||
    ///
 | 
			
		||||
    /// The `line_widths` slice gives the target line width for each
 | 
			
		||||
    /// line (the last slice element is repeated as necessary). This
 | 
			
		||||
    /// can be used to implement hanging indentation.
 | 
			
		||||
    #[inline]
 | 
			
		||||
    pub fn wrap<'a, 'b>(
 | 
			
		||||
        &self,
 | 
			
		||||
        words: &'b [Word<'a>],
 | 
			
		||||
        line_widths: &'b [usize],
 | 
			
		||||
    ) -> Vec<&'b [Word<'a>]> {
 | 
			
		||||
        // Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53
 | 
			
		||||
        // = 9_007_199_254_740_992 can be represented without loss by
 | 
			
		||||
        // a f64. Larger line widths will be rounded to the nearest
 | 
			
		||||
        // representable number.
 | 
			
		||||
        let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>();
 | 
			
		||||
 | 
			
		||||
        match self {
 | 
			
		||||
            WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths),
 | 
			
		||||
 | 
			
		||||
            #[cfg(feature = "smawk")]
 | 
			
		||||
            WrapAlgorithm::OptimalFit(penalties) => {
 | 
			
		||||
                // The computation cannnot overflow when the line
 | 
			
		||||
                // widths are restricted to usize.
 | 
			
		||||
                wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap()
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            WrapAlgorithm::Custom(func) => func(words, line_widths),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl Default for WrapAlgorithm {
 | 
			
		||||
    fn default() -> Self {
 | 
			
		||||
        WrapAlgorithm::new()
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Wrap abstract fragments into lines with a first-fit algorithm.
 | 
			
		||||
///
 | 
			
		||||
/// The `line_widths` slice gives the target line width for each line
 | 
			
		||||
/// (the last slice element is repeated as necessary). This can be
 | 
			
		||||
/// used to implement hanging indentation.
 | 
			
		||||
///
 | 
			
		||||
/// The fragments must already have been split into the desired
 | 
			
		||||
/// widths, this function will not (and cannot) attempt to split them
 | 
			
		||||
/// further when arranging them into lines.
 | 
			
		||||
///
 | 
			
		||||
/// # First-Fit Algorithm
 | 
			
		||||
///
 | 
			
		||||
/// This implements a simple “greedy” algorithm: accumulate fragments
 | 
			
		||||
/// one by one and when a fragment no longer fits, start a new line.
 | 
			
		||||
/// There is no look-ahead, we simply take first fit of the fragments
 | 
			
		||||
/// we find.
 | 
			
		||||
///
 | 
			
		||||
/// While fast and predictable, this algorithm can produce poor line
 | 
			
		||||
/// breaks when a long fragment is moved to a new line, leaving behind
 | 
			
		||||
/// a large gap:
 | 
			
		||||
///
 | 
			
		||||
/// ```
 | 
			
		||||
/// use textwrap::core::Word;
 | 
			
		||||
/// use textwrap::wrap_algorithms::wrap_first_fit;
 | 
			
		||||
/// use textwrap::WordSeparator;
 | 
			
		||||
///
 | 
			
		||||
/// // Helper to convert wrapped lines to a Vec<String>.
 | 
			
		||||
/// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> {
 | 
			
		||||
///     lines.iter().map(|line| {
 | 
			
		||||
///         line.iter().map(|word| &**word).collect::<Vec<_>>().join(" ")
 | 
			
		||||
///     }).collect::<Vec<_>>()
 | 
			
		||||
/// }
 | 
			
		||||
///
 | 
			
		||||
/// let text = "These few words will unfortunately not wrap nicely.";
 | 
			
		||||
/// let words = WordSeparator::AsciiSpace.find_words(text).collect::<Vec<_>>();
 | 
			
		||||
/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])),
 | 
			
		||||
///            vec!["These few words",
 | 
			
		||||
///                 "will",  // <-- short line
 | 
			
		||||
///                 "unfortunately",
 | 
			
		||||
///                 "not wrap",
 | 
			
		||||
///                 "nicely."]);
 | 
			
		||||
///
 | 
			
		||||
/// // We can avoid the short line if we look ahead:
 | 
			
		||||
/// #[cfg(feature = "smawk")]
 | 
			
		||||
/// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
 | 
			
		||||
/// #[cfg(feature = "smawk")]
 | 
			
		||||
/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()),
 | 
			
		||||
///            vec!["These few",
 | 
			
		||||
///                 "words will",
 | 
			
		||||
///                 "unfortunately",
 | 
			
		||||
///                 "not wrap",
 | 
			
		||||
///                 "nicely."]);
 | 
			
		||||
/// ```
 | 
			
		||||
///
 | 
			
		||||
/// The [`wrap_optimal_fit`] function was used above to get better
 | 
			
		||||
/// line breaks. It uses an advanced algorithm which tries to avoid
 | 
			
		||||
/// short lines. This function is about 4 times faster than
 | 
			
		||||
/// [`wrap_optimal_fit`].
 | 
			
		||||
///
 | 
			
		||||
/// # Examples
 | 
			
		||||
///
 | 
			
		||||
/// Imagine you're building a house site and you have a number of
 | 
			
		||||
/// tasks you need to execute. Things like pour foundation, complete
 | 
			
		||||
/// framing, install plumbing, electric cabling, install insulation.
 | 
			
		||||
///
 | 
			
		||||
/// The construction workers can only work during daytime, so they
 | 
			
		||||
/// need to pack up everything at night. Because they need to secure
 | 
			
		||||
/// their tools and move machines back to the garage, this process
 | 
			
		||||
/// takes much more time than the time it would take them to simply
 | 
			
		||||
/// switch to another task.
 | 
			
		||||
///
 | 
			
		||||
/// You would like to make a list of tasks to execute every day based
 | 
			
		||||
/// on your estimates. You can model this with a program like this:
 | 
			
		||||
///
 | 
			
		||||
/// ```
 | 
			
		||||
/// use textwrap::core::{Fragment, Word};
 | 
			
		||||
/// use textwrap::wrap_algorithms::wrap_first_fit;
 | 
			
		||||
///
 | 
			
		||||
/// #[derive(Debug)]
 | 
			
		||||
/// struct Task<'a> {
 | 
			
		||||
///     name: &'a str,
 | 
			
		||||
///     hours: f64,   // Time needed to complete task.
 | 
			
		||||
///     sweep: f64,   // Time needed for a quick sweep after task during the day.
 | 
			
		||||
///     cleanup: f64, // Time needed for full cleanup if day ends with this task.
 | 
			
		||||
/// }
 | 
			
		||||
///
 | 
			
		||||
/// impl Fragment for Task<'_> {
 | 
			
		||||
///     fn width(&self) -> f64 { self.hours }
 | 
			
		||||
///     fn whitespace_width(&self) -> f64 { self.sweep }
 | 
			
		||||
///     fn penalty_width(&self) -> f64 { self.cleanup }
 | 
			
		||||
/// }
 | 
			
		||||
///
 | 
			
		||||
/// // The morning tasks
 | 
			
		||||
/// let tasks = vec![
 | 
			
		||||
///     Task { name: "Foundation",  hours: 4.0, sweep: 2.0, cleanup: 3.0 },
 | 
			
		||||
///     Task { name: "Framing",     hours: 3.0, sweep: 1.0, cleanup: 2.0 },
 | 
			
		||||
///     Task { name: "Plumbing",    hours: 2.0, sweep: 2.0, cleanup: 2.0 },
 | 
			
		||||
///     Task { name: "Electrical",  hours: 2.0, sweep: 1.0, cleanup: 2.0 },
 | 
			
		||||
///     Task { name: "Insulation",  hours: 2.0, sweep: 1.0, cleanup: 2.0 },
 | 
			
		||||
///     Task { name: "Drywall",     hours: 3.0, sweep: 1.0, cleanup: 2.0 },
 | 
			
		||||
///     Task { name: "Floors",      hours: 3.0, sweep: 1.0, cleanup: 2.0 },
 | 
			
		||||
///     Task { name: "Countertops", hours: 1.0, sweep: 1.0, cleanup: 2.0 },
 | 
			
		||||
///     Task { name: "Bathrooms",   hours: 2.0, sweep: 1.0, cleanup: 2.0 },
 | 
			
		||||
/// ];
 | 
			
		||||
///
 | 
			
		||||
/// // Fill tasks into days, taking `day_length` into account. The
 | 
			
		||||
/// // output shows the hours worked per day along with the names of
 | 
			
		||||
/// // the tasks for that day.
 | 
			
		||||
/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: f64) -> Vec<(f64, Vec<&'a str>)> {
 | 
			
		||||
///     let mut days = Vec::new();
 | 
			
		||||
///     // Assign tasks to days. The assignment is a vector of slices,
 | 
			
		||||
///     // with a slice per day.
 | 
			
		||||
///     let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]);
 | 
			
		||||
///     for day in assigned_days.iter() {
 | 
			
		||||
///         let last = day.last().unwrap();
 | 
			
		||||
///         let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum();
 | 
			
		||||
///         let names = day.iter().map(|t| t.name).collect::<Vec<_>>();
 | 
			
		||||
///         days.push((work_hours - last.sweep + last.cleanup, names));
 | 
			
		||||
///     }
 | 
			
		||||
///     days
 | 
			
		||||
/// }
 | 
			
		||||
///
 | 
			
		||||
/// // With a single crew working 8 hours a day:
 | 
			
		||||
/// assert_eq!(
 | 
			
		||||
///     assign_days(&tasks, 8.0),
 | 
			
		||||
///     [
 | 
			
		||||
///         (7.0, vec!["Foundation"]),
 | 
			
		||||
///         (8.0, vec!["Framing", "Plumbing"]),
 | 
			
		||||
///         (7.0, vec!["Electrical", "Insulation"]),
 | 
			
		||||
///         (5.0, vec!["Drywall"]),
 | 
			
		||||
///         (7.0, vec!["Floors", "Countertops"]),
 | 
			
		||||
///         (4.0, vec!["Bathrooms"]),
 | 
			
		||||
///     ]
 | 
			
		||||
/// );
 | 
			
		||||
///
 | 
			
		||||
/// // With two crews working in shifts, 16 hours a day:
 | 
			
		||||
/// assert_eq!(
 | 
			
		||||
///     assign_days(&tasks, 16.0),
 | 
			
		||||
///     [
 | 
			
		||||
///         (14.0, vec!["Foundation", "Framing", "Plumbing"]),
 | 
			
		||||
///         (15.0, vec!["Electrical", "Insulation", "Drywall", "Floors"]),
 | 
			
		||||
///         (6.0, vec!["Countertops", "Bathrooms"]),
 | 
			
		||||
///     ]
 | 
			
		||||
/// );
 | 
			
		||||
/// ```
 | 
			
		||||
///
 | 
			
		||||
/// Apologies to anyone who actually knows how to build a house and
 | 
			
		||||
/// knows how long each step takes :-)
 | 
			
		||||
pub fn wrap_first_fit<'a, 'b, T: Fragment>(
 | 
			
		||||
    fragments: &'a [T],
 | 
			
		||||
    line_widths: &'b [f64],
 | 
			
		||||
) -> Vec<&'a [T]> {
 | 
			
		||||
    // The final line width is used for all remaining lines.
 | 
			
		||||
    let default_line_width = line_widths.last().copied().unwrap_or(0.0);
 | 
			
		||||
    let mut lines = Vec::new();
 | 
			
		||||
    let mut start = 0;
 | 
			
		||||
    let mut width = 0.0;
 | 
			
		||||
 | 
			
		||||
    for (idx, fragment) in fragments.iter().enumerate() {
 | 
			
		||||
        let line_width = line_widths
 | 
			
		||||
            .get(lines.len())
 | 
			
		||||
            .copied()
 | 
			
		||||
            .unwrap_or(default_line_width);
 | 
			
		||||
        if width + fragment.width() + fragment.penalty_width() > line_width && idx > start {
 | 
			
		||||
            lines.push(&fragments[start..idx]);
 | 
			
		||||
            start = idx;
 | 
			
		||||
            width = 0.0;
 | 
			
		||||
        }
 | 
			
		||||
        width += fragment.width() + fragment.whitespace_width();
 | 
			
		||||
    }
 | 
			
		||||
    lines.push(&fragments[start..]);
 | 
			
		||||
    lines
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[cfg(test)]
 | 
			
		||||
mod tests {
 | 
			
		||||
    use super::*;
 | 
			
		||||
 | 
			
		||||
    #[derive(Debug, PartialEq)]
 | 
			
		||||
    struct Word(f64);
 | 
			
		||||
 | 
			
		||||
    #[rustfmt::skip]
 | 
			
		||||
    impl Fragment for Word {
 | 
			
		||||
        fn width(&self) -> f64 { self.0 }
 | 
			
		||||
        fn whitespace_width(&self) -> f64 { 1.0 }
 | 
			
		||||
        fn penalty_width(&self) -> f64 { 0.0 }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn wrap_string_longer_than_f64() {
 | 
			
		||||
        let words = vec![
 | 
			
		||||
            Word(1e307),
 | 
			
		||||
            Word(2e307),
 | 
			
		||||
            Word(3e307),
 | 
			
		||||
            Word(4e307),
 | 
			
		||||
            Word(5e307),
 | 
			
		||||
            Word(6e307),
 | 
			
		||||
        ];
 | 
			
		||||
        // Wrap at just under f64::MAX (~19e307). The tiny
 | 
			
		||||
        // whitespace_widths disappear because of loss of precision.
 | 
			
		||||
        assert_eq!(
 | 
			
		||||
            wrap_first_fit(&words, &[15e307]),
 | 
			
		||||
            &[
 | 
			
		||||
                vec![
 | 
			
		||||
                    Word(1e307),
 | 
			
		||||
                    Word(2e307),
 | 
			
		||||
                    Word(3e307),
 | 
			
		||||
                    Word(4e307),
 | 
			
		||||
                    Word(5e307)
 | 
			
		||||
                ],
 | 
			
		||||
                vec![Word(6e307)]
 | 
			
		||||
            ]
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										433
									
								
								vendor/textwrap/src/wrap_algorithms/optimal_fit.rs
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										433
									
								
								vendor/textwrap/src/wrap_algorithms/optimal_fit.rs
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,433 @@
 | 
			
		||||
use std::cell::RefCell;
 | 
			
		||||
 | 
			
		||||
use crate::core::Fragment;
 | 
			
		||||
 | 
			
		||||
/// Penalties for
 | 
			
		||||
/// [`WrapAlgorithm::OptimalFit`](crate::WrapAlgorithm::OptimalFit)
 | 
			
		||||
/// and [`wrap_optimal_fit`].
 | 
			
		||||
///
 | 
			
		||||
/// This wrapping algorithm in [`wrap_optimal_fit`] considers the
 | 
			
		||||
/// entire paragraph to find optimal line breaks. When wrapping text,
 | 
			
		||||
/// "penalties" are assigned to line breaks based on the gaps left at
 | 
			
		||||
/// the end of lines. The penalties are given by this struct, with
 | 
			
		||||
/// [`Penalties::default`] assigning penalties that work well for
 | 
			
		||||
/// monospace text.
 | 
			
		||||
///
 | 
			
		||||
/// If you are wrapping proportional text, you are advised to assign
 | 
			
		||||
/// your own penalties according to your font size. See the individual
 | 
			
		||||
/// penalties below for details.
 | 
			
		||||
///
 | 
			
		||||
/// **Note:** Only available when the `smawk` Cargo feature is
 | 
			
		||||
/// enabled.
 | 
			
		||||
#[derive(Clone, Copy, Debug)]
 | 
			
		||||
pub struct Penalties {
 | 
			
		||||
    /// Per-line penalty. This is added for every line, which makes it
 | 
			
		||||
    /// expensive to output more lines than the minimum required.
 | 
			
		||||
    pub nline_penalty: usize,
 | 
			
		||||
 | 
			
		||||
    /// Per-character cost for lines that overflow the target line width.
 | 
			
		||||
    ///
 | 
			
		||||
    /// With a default value of 50², every single character costs as
 | 
			
		||||
    /// much as leaving a gap of 50 characters behind. This is because
 | 
			
		||||
    /// we assign as cost of `gap * gap` to a short line. When
 | 
			
		||||
    /// wrapping monospace text, we can overflow the line by 1
 | 
			
		||||
    /// character in extreme cases:
 | 
			
		||||
    ///
 | 
			
		||||
    /// ```
 | 
			
		||||
    /// use textwrap::core::Word;
 | 
			
		||||
    /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
 | 
			
		||||
    ///
 | 
			
		||||
    /// let short = "foo ";
 | 
			
		||||
    /// let long = "x".repeat(50);
 | 
			
		||||
    /// let length = (short.len() + long.len()) as f64;
 | 
			
		||||
    /// let fragments = vec![Word::from(short), Word::from(&long)];
 | 
			
		||||
    /// let penalties = Penalties::new();
 | 
			
		||||
    ///
 | 
			
		||||
    /// // Perfect fit, both words are on a single line with no overflow.
 | 
			
		||||
    /// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap();
 | 
			
		||||
    /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
 | 
			
		||||
    ///
 | 
			
		||||
    /// // The words no longer fit, yet we get a single line back. While
 | 
			
		||||
    /// // the cost of overflow (`1 * 2500`) is the same as the cost of the
 | 
			
		||||
    /// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty`
 | 
			
		||||
    /// // which makes it cheaper to overflow than to use two lines.
 | 
			
		||||
    /// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap();
 | 
			
		||||
    /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
 | 
			
		||||
    ///
 | 
			
		||||
    /// // The cost of overflow would be 2 * 2500, whereas the cost of
 | 
			
		||||
    /// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 =
 | 
			
		||||
    /// // 3401`. We therefore get two lines.
 | 
			
		||||
    /// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap();
 | 
			
		||||
    /// assert_eq!(wrapped, vec![&[Word::from(short)],
 | 
			
		||||
    ///                          &[Word::from(&long)]]);
 | 
			
		||||
    /// ```
 | 
			
		||||
    ///
 | 
			
		||||
    /// This only happens if the overflowing word is 50 characters
 | 
			
		||||
    /// long _and_ if the word overflows the line by exactly one
 | 
			
		||||
    /// character. If it overflows by more than one character, the
 | 
			
		||||
    /// overflow penalty will quickly outgrow the cost of the gap, as
 | 
			
		||||
    /// seen above.
 | 
			
		||||
    pub overflow_penalty: usize,
 | 
			
		||||
 | 
			
		||||
    /// When should the a single word on the last line be considered
 | 
			
		||||
    /// "too short"?
 | 
			
		||||
    ///
 | 
			
		||||
    /// If the last line of the text consist of a single word and if
 | 
			
		||||
    /// this word is shorter than `1 / short_last_line_fraction` of
 | 
			
		||||
    /// the line width, then the final line will be considered "short"
 | 
			
		||||
    /// and `short_last_line_penalty` is added as an extra penalty.
 | 
			
		||||
    ///
 | 
			
		||||
    /// The effect of this is to avoid a final line consisting of a
 | 
			
		||||
    /// single small word. For example, with a
 | 
			
		||||
    /// `short_last_line_penalty` of 25 (the default), a gap of up to
 | 
			
		||||
    /// 5 columns will be seen as more desirable than having a final
 | 
			
		||||
    /// short line.
 | 
			
		||||
    ///
 | 
			
		||||
    /// ## Examples
 | 
			
		||||
    ///
 | 
			
		||||
    /// ```
 | 
			
		||||
    /// use textwrap::{wrap, wrap_algorithms, Options, WrapAlgorithm};
 | 
			
		||||
    ///
 | 
			
		||||
    /// let text = "This is a demo of the short last line penalty.";
 | 
			
		||||
    ///
 | 
			
		||||
    /// // The first-fit algorithm leaves a single short word on the last line:
 | 
			
		||||
    /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::FirstFit)),
 | 
			
		||||
    ///            vec!["This is a demo of the short last line",
 | 
			
		||||
    ///                 "penalty."]);
 | 
			
		||||
    ///
 | 
			
		||||
    /// #[cfg(feature = "smawk")] {
 | 
			
		||||
    /// let mut penalties = wrap_algorithms::Penalties::new();
 | 
			
		||||
    ///
 | 
			
		||||
    /// // Since "penalty." is shorter than 25% of the line width, the
 | 
			
		||||
    /// // optimal-fit algorithm adds a penalty of 25. This is enough
 | 
			
		||||
    /// // to move "line " down:
 | 
			
		||||
    /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
 | 
			
		||||
    ///            vec!["This is a demo of the short last",
 | 
			
		||||
    ///                 "line penalty."]);
 | 
			
		||||
    ///
 | 
			
		||||
    /// // We can change the meaning of "short" lines. Here, only words
 | 
			
		||||
    /// // shorter than 1/10th of the line width will be considered short:
 | 
			
		||||
    /// penalties.short_last_line_fraction = 10;
 | 
			
		||||
    /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
 | 
			
		||||
    ///            vec!["This is a demo of the short last line",
 | 
			
		||||
    ///                 "penalty."]);
 | 
			
		||||
    ///
 | 
			
		||||
    /// // If desired, the penalty can also be disabled:
 | 
			
		||||
    /// penalties.short_last_line_fraction = 4;
 | 
			
		||||
    /// penalties.short_last_line_penalty = 0;
 | 
			
		||||
    /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
 | 
			
		||||
    ///            vec!["This is a demo of the short last line",
 | 
			
		||||
    ///                 "penalty."]);
 | 
			
		||||
    /// }
 | 
			
		||||
    /// ```
 | 
			
		||||
    pub short_last_line_fraction: usize,
 | 
			
		||||
 | 
			
		||||
    /// Penalty for a last line with a single short word.
 | 
			
		||||
    ///
 | 
			
		||||
    /// Set this to zero if you do not want to penalize short last lines.
 | 
			
		||||
    pub short_last_line_penalty: usize,
 | 
			
		||||
 | 
			
		||||
    /// Penalty for lines ending with a hyphen.
 | 
			
		||||
    pub hyphen_penalty: usize,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl Penalties {
 | 
			
		||||
    /// Default penalties for monospace text.
 | 
			
		||||
    ///
 | 
			
		||||
    /// The penalties here work well for monospace text. This is
 | 
			
		||||
    /// because they expect the gaps at the end of lines to be roughly
 | 
			
		||||
    /// in the range `0..100`. If the gaps are larger, the
 | 
			
		||||
    /// `overflow_penalty` and `hyphen_penalty` become insignificant.
 | 
			
		||||
    pub const fn new() -> Self {
 | 
			
		||||
        Penalties {
 | 
			
		||||
            nline_penalty: 1000,
 | 
			
		||||
            overflow_penalty: 50 * 50,
 | 
			
		||||
            short_last_line_fraction: 4,
 | 
			
		||||
            short_last_line_penalty: 25,
 | 
			
		||||
            hyphen_penalty: 25,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl Default for Penalties {
 | 
			
		||||
    fn default() -> Self {
 | 
			
		||||
        Self::new()
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Cache for line numbers. This is necessary to avoid a O(n**2)
 | 
			
		||||
/// behavior when computing line numbers in [`wrap_optimal_fit`].
 | 
			
		||||
struct LineNumbers {
 | 
			
		||||
    line_numbers: RefCell<Vec<usize>>,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl LineNumbers {
 | 
			
		||||
    fn new(size: usize) -> Self {
 | 
			
		||||
        let mut line_numbers = Vec::with_capacity(size);
 | 
			
		||||
        line_numbers.push(0);
 | 
			
		||||
        LineNumbers {
 | 
			
		||||
            line_numbers: RefCell::new(line_numbers),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize {
 | 
			
		||||
        while self.line_numbers.borrow_mut().len() < i + 1 {
 | 
			
		||||
            let pos = self.line_numbers.borrow().len();
 | 
			
		||||
            let line_number = 1 + self.get(minima[pos].0, minima);
 | 
			
		||||
            self.line_numbers.borrow_mut().push(line_number);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        self.line_numbers.borrow()[i]
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Overflow error during the [`wrap_optimal_fit`] computation.
 | 
			
		||||
#[derive(Debug, PartialEq, Eq)]
 | 
			
		||||
pub struct OverflowError;
 | 
			
		||||
 | 
			
		||||
impl std::fmt::Display for OverflowError {
 | 
			
		||||
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 | 
			
		||||
        write!(f, "wrap_optimal_fit cost computation overflowed")
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl std::error::Error for OverflowError {}
 | 
			
		||||
 | 
			
		||||
/// Wrap abstract fragments into lines with an optimal-fit algorithm.
 | 
			
		||||
///
 | 
			
		||||
/// The `line_widths` slice gives the target line width for each line
 | 
			
		||||
/// (the last slice element is repeated as necessary). This can be
 | 
			
		||||
/// used to implement hanging indentation.
 | 
			
		||||
///
 | 
			
		||||
/// The fragments must already have been split into the desired
 | 
			
		||||
/// widths, this function will not (and cannot) attempt to split them
 | 
			
		||||
/// further when arranging them into lines.
 | 
			
		||||
///
 | 
			
		||||
/// # Optimal-Fit Algorithm
 | 
			
		||||
///
 | 
			
		||||
/// The algorithm considers all possible break points and picks the
 | 
			
		||||
/// breaks which minimizes the gaps at the end of each line. More
 | 
			
		||||
/// precisely, the algorithm assigns a cost or penalty to each break
 | 
			
		||||
/// point, determined by `cost = gap * gap` where `gap = target_width -
 | 
			
		||||
/// line_width`. Shorter lines are thus penalized more heavily since
 | 
			
		||||
/// they leave behind a larger gap.
 | 
			
		||||
///
 | 
			
		||||
/// We can illustrate this with the text “To be, or not to be: that is
 | 
			
		||||
/// the question”. We will be wrapping it in a narrow column with room
 | 
			
		||||
/// for only 10 characters. The [greedy
 | 
			
		||||
/// algorithm](super::wrap_first_fit) will produce these lines, each
 | 
			
		||||
/// annotated with the corresponding penalty:
 | 
			
		||||
///
 | 
			
		||||
/// ```text
 | 
			
		||||
/// "To be, or"   1² =  1
 | 
			
		||||
/// "not to be:"  0² =  0
 | 
			
		||||
/// "that is"     3² =  9
 | 
			
		||||
/// "the"         7² = 49
 | 
			
		||||
/// "question"    2² =  4
 | 
			
		||||
/// ```
 | 
			
		||||
///
 | 
			
		||||
/// We see that line four with “the” leaves a gap of 7 columns, which
 | 
			
		||||
/// gives it a penalty of 49. The sum of the penalties is 63.
 | 
			
		||||
///
 | 
			
		||||
/// There are 10 words, which means that there are `2_u32.pow(9)` or
 | 
			
		||||
/// 512 different ways to typeset it. We can compute
 | 
			
		||||
/// the sum of the penalties for each possible line break and search
 | 
			
		||||
/// for the one with the lowest sum:
 | 
			
		||||
///
 | 
			
		||||
/// ```text
 | 
			
		||||
/// "To be,"     4² = 16
 | 
			
		||||
/// "or not to"  1² =  1
 | 
			
		||||
/// "be: that"   2² =  4
 | 
			
		||||
/// "is the"     4² = 16
 | 
			
		||||
/// "question"   2² =  4
 | 
			
		||||
/// ```
 | 
			
		||||
///
 | 
			
		||||
/// The sum of the penalties is 41, which is better than what the
 | 
			
		||||
/// greedy algorithm produced.
 | 
			
		||||
///
 | 
			
		||||
/// Searching through all possible combinations would normally be
 | 
			
		||||
/// prohibitively slow. However, it turns out that the problem can be
 | 
			
		||||
/// formulated as the task of finding column minima in a cost matrix.
 | 
			
		||||
/// This matrix has a special form (totally monotone) which lets us
 | 
			
		||||
/// use a [linear-time algorithm called
 | 
			
		||||
/// SMAWK](https://lib.rs/crates/smawk) to find the optimal break
 | 
			
		||||
/// points.
 | 
			
		||||
///
 | 
			
		||||
/// This means that the time complexity remains O(_n_) where _n_ is
 | 
			
		||||
/// the number of words. Compared to
 | 
			
		||||
/// [`wrap_first_fit`](super::wrap_first_fit), this function is about
 | 
			
		||||
/// 4 times slower.
 | 
			
		||||
///
 | 
			
		||||
/// The optimization of per-line costs over the entire paragraph is
 | 
			
		||||
/// inspired by the line breaking algorithm used in TeX, as described
 | 
			
		||||
/// in the 1981 article [_Breaking Paragraphs into
 | 
			
		||||
/// Lines_](http://www.eprg.org/G53DOC/pdfs/knuth-plass-breaking.pdf)
 | 
			
		||||
/// by Knuth and Plass. The implementation here is based on [Python
 | 
			
		||||
/// code by David
 | 
			
		||||
/// Eppstein](https://github.com/jfinkels/PADS/blob/master/pads/wrap.py).
 | 
			
		||||
///
 | 
			
		||||
/// # Errors
 | 
			
		||||
///
 | 
			
		||||
/// In case of an overflow during the cost computation, an `Err` is
 | 
			
		||||
/// returned. Overflows happens when fragments or lines have infinite
 | 
			
		||||
/// widths (`f64::INFINITY`) or if the widths are so large that the
 | 
			
		||||
/// gaps at the end of lines have sizes larger than `f64::MAX.sqrt()`
 | 
			
		||||
/// (approximately 1e154):
 | 
			
		||||
///
 | 
			
		||||
/// ```
 | 
			
		||||
/// use textwrap::core::Fragment;
 | 
			
		||||
/// use textwrap::wrap_algorithms::{wrap_optimal_fit, OverflowError, Penalties};
 | 
			
		||||
///
 | 
			
		||||
/// #[derive(Debug, PartialEq)]
 | 
			
		||||
/// struct Word(f64);
 | 
			
		||||
///
 | 
			
		||||
/// impl Fragment for Word {
 | 
			
		||||
///     fn width(&self) -> f64 { self.0 }
 | 
			
		||||
///     fn whitespace_width(&self) -> f64 { 1.0 }
 | 
			
		||||
///     fn penalty_width(&self) -> f64 { 0.0 }
 | 
			
		||||
/// }
 | 
			
		||||
///
 | 
			
		||||
/// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is
 | 
			
		||||
/// // larger than f64::MAX:
 | 
			
		||||
/// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &Penalties::default()),
 | 
			
		||||
///            Err(OverflowError));
 | 
			
		||||
/// ```
 | 
			
		||||
///
 | 
			
		||||
/// When using fragment widths and line widths which fit inside an
 | 
			
		||||
/// `u64`, overflows cannot happen. This means that fragments derived
 | 
			
		||||
/// from a `&str` cannot cause overflows.
 | 
			
		||||
///
 | 
			
		||||
/// **Note:** Only available when the `smawk` Cargo feature is
 | 
			
		||||
/// enabled.
 | 
			
		||||
pub fn wrap_optimal_fit<'a, 'b, T: Fragment>(
 | 
			
		||||
    fragments: &'a [T],
 | 
			
		||||
    line_widths: &'b [f64],
 | 
			
		||||
    penalties: &'b Penalties,
 | 
			
		||||
) -> Result<Vec<&'a [T]>, OverflowError> {
 | 
			
		||||
    // The final line width is used for all remaining lines.
 | 
			
		||||
    let default_line_width = line_widths.last().copied().unwrap_or(0.0);
 | 
			
		||||
    let mut widths = Vec::with_capacity(fragments.len() + 1);
 | 
			
		||||
    let mut width = 0.0;
 | 
			
		||||
    widths.push(width);
 | 
			
		||||
    for fragment in fragments {
 | 
			
		||||
        width += fragment.width() + fragment.whitespace_width();
 | 
			
		||||
        widths.push(width);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let line_numbers = LineNumbers::new(fragments.len());
 | 
			
		||||
 | 
			
		||||
    let minima = smawk::online_column_minima(0.0, widths.len(), |minima, i, j| {
 | 
			
		||||
        // Line number for fragment `i`.
 | 
			
		||||
        let line_number = line_numbers.get(i, minima);
 | 
			
		||||
        let line_width = line_widths
 | 
			
		||||
            .get(line_number)
 | 
			
		||||
            .copied()
 | 
			
		||||
            .unwrap_or(default_line_width);
 | 
			
		||||
        let target_width = line_width.max(1.0);
 | 
			
		||||
 | 
			
		||||
        // Compute the width of a line spanning fragments[i..j] in
 | 
			
		||||
        // constant time. We need to adjust widths[j] by subtracting
 | 
			
		||||
        // the whitespace of fragment[j-1] and then add the penalty.
 | 
			
		||||
        let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width()
 | 
			
		||||
            + fragments[j - 1].penalty_width();
 | 
			
		||||
 | 
			
		||||
        // We compute cost of the line containing fragments[i..j]. We
 | 
			
		||||
        // start with values[i].1, which is the optimal cost for
 | 
			
		||||
        // breaking before fragments[i].
 | 
			
		||||
        //
 | 
			
		||||
        // First, every extra line cost NLINE_PENALTY.
 | 
			
		||||
        let mut cost = minima[i].1 + penalties.nline_penalty as f64;
 | 
			
		||||
 | 
			
		||||
        // Next, we add a penalty depending on the line length.
 | 
			
		||||
        if line_width > target_width {
 | 
			
		||||
            // Lines that overflow get a hefty penalty.
 | 
			
		||||
            let overflow = line_width - target_width;
 | 
			
		||||
            cost += overflow * penalties.overflow_penalty as f64;
 | 
			
		||||
        } else if j < fragments.len() {
 | 
			
		||||
            // Other lines (except for the last line) get a milder
 | 
			
		||||
            // penalty which depend on the size of the gap.
 | 
			
		||||
            let gap = target_width - line_width;
 | 
			
		||||
            cost += gap * gap;
 | 
			
		||||
        } else if i + 1 == j
 | 
			
		||||
            && line_width < target_width / penalties.short_last_line_fraction as f64
 | 
			
		||||
        {
 | 
			
		||||
            // The last line can have any size gap, but we do add a
 | 
			
		||||
            // penalty if the line is very short (typically because it
 | 
			
		||||
            // contains just a single word).
 | 
			
		||||
            cost += penalties.short_last_line_penalty as f64;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Finally, we discourage hyphens.
 | 
			
		||||
        if fragments[j - 1].penalty_width() > 0.0 {
 | 
			
		||||
            // TODO: this should use a penalty value from the fragment
 | 
			
		||||
            // instead.
 | 
			
		||||
            cost += penalties.hyphen_penalty as f64;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        cost
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    for (_, cost) in &minima {
 | 
			
		||||
        if cost.is_infinite() {
 | 
			
		||||
            return Err(OverflowError);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima));
 | 
			
		||||
    let mut pos = fragments.len();
 | 
			
		||||
    loop {
 | 
			
		||||
        let prev = minima[pos].0;
 | 
			
		||||
        lines.push(&fragments[prev..pos]);
 | 
			
		||||
        pos = prev;
 | 
			
		||||
        if pos == 0 {
 | 
			
		||||
            break;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    lines.reverse();
 | 
			
		||||
    Ok(lines)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[cfg(test)]
 | 
			
		||||
mod tests {
 | 
			
		||||
    use super::*;
 | 
			
		||||
 | 
			
		||||
    #[derive(Debug, PartialEq)]
 | 
			
		||||
    struct Word(f64);
 | 
			
		||||
 | 
			
		||||
    #[rustfmt::skip]
 | 
			
		||||
    impl Fragment for Word {
 | 
			
		||||
        fn width(&self) -> f64 { self.0 }
 | 
			
		||||
        fn whitespace_width(&self) -> f64 { 1.0 }
 | 
			
		||||
        fn penalty_width(&self) -> f64 { 0.0 }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn wrap_fragments_with_infinite_widths() {
 | 
			
		||||
        let words = vec![Word(f64::INFINITY)];
 | 
			
		||||
        assert_eq!(
 | 
			
		||||
            wrap_optimal_fit(&words, &[0.0], &Penalties::default()),
 | 
			
		||||
            Err(OverflowError)
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn wrap_fragments_with_huge_widths() {
 | 
			
		||||
        let words = vec![Word(1e200), Word(1e250), Word(1e300)];
 | 
			
		||||
        assert_eq!(
 | 
			
		||||
            wrap_optimal_fit(&words, &[1e300], &Penalties::default()),
 | 
			
		||||
            Err(OverflowError)
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn wrap_fragments_with_large_widths() {
 | 
			
		||||
        // The gaps will be of the sizes between 1e25 and 1e75. This
 | 
			
		||||
        // makes the `gap * gap` cost fit comfortably in a f64.
 | 
			
		||||
        let words = vec![Word(1e25), Word(1e50), Word(1e75)];
 | 
			
		||||
        assert_eq!(
 | 
			
		||||
            wrap_optimal_fit(&words, &[1e100], &Penalties::default()),
 | 
			
		||||
            Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]])
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user