Initial vendor packages

Signed-off-by: Valentin Popov <valentin@popov.link>
This commit is contained in:
2024-01-08 01:21:28 +04:00
parent 5ecd8cf2cb
commit 1b6a04ca55
7309 changed files with 2160054 additions and 0 deletions

433
vendor/textwrap/src/core.rs vendored Normal file
View File

@ -0,0 +1,433 @@
//! Building blocks for advanced wrapping functionality.
//!
//! The functions and structs in this module can be used to implement
//! advanced wrapping functionality when the [`wrap`](super::wrap) and
//! [`fill`](super::fill) function don't do what you want.
//!
//! In general, you want to follow these steps when wrapping
//! something:
//!
//! 1. Split your input into [`Fragment`]s. These are abstract blocks
//! of text or content which can be wrapped into lines. See
//! [`WordSeparator`](crate::word_separators::WordSeparator) for
//! how to do this for text.
//!
//! 2. Potentially split your fragments into smaller pieces. This
//! allows you to implement things like hyphenation. If you use the
//! `Word` type, you can use [`WordSplitter`](crate::WordSplitter)
//! enum for this.
//!
//! 3. Potentially break apart fragments that are still too large to
//! fit on a single line. This is implemented in [`break_words`].
//!
//! 4. Finally take your fragments and put them into lines. There are
//! two algorithms for this in the
//! [`wrap_algorithms`](crate::wrap_algorithms) module:
//! [`wrap_optimal_fit`](crate::wrap_algorithms::wrap_optimal_fit)
//! and [`wrap_first_fit`](crate::wrap_algorithms::wrap_first_fit).
//! The former produces better line breaks, the latter is faster.
//!
//! 5. Iterate through the slices returned by the wrapping functions
//! and construct your lines of output.
//!
//! Please [open an issue](https://github.com/mgeisler/textwrap/) if
//! the functionality here is not sufficient or if you have ideas for
//! improving it. We would love to hear from you!
/// The CSI or “Control Sequence Introducer” introduces an ANSI escape
/// sequence. This is typically used for colored text and will be
/// ignored when computing the text width.
const CSI: (char, char) = ('\x1b', '[');
/// The final bytes of an ANSI escape sequence must be in this range.
const ANSI_FINAL_BYTE: std::ops::RangeInclusive<char> = '\x40'..='\x7e';
/// Skip ANSI escape sequences. The `ch` is the current `char`, the
/// `chars` provide the following characters. The `chars` will be
/// modified if `ch` is the start of an ANSI escape sequence.
#[inline]
pub(crate) fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(ch: char, chars: &mut I) -> bool {
if ch == CSI.0 && chars.next() == Some(CSI.1) {
// We have found the start of an ANSI escape code, typically
// used for colored terminal text. We skip until we find a
// "final byte" in the range 0x400x7E.
for ch in chars {
if ANSI_FINAL_BYTE.contains(&ch) {
return true;
}
}
}
false
}
#[cfg(feature = "unicode-width")]
#[inline]
fn ch_width(ch: char) -> usize {
unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0)
}
/// First character which [`ch_width`] will classify as double-width.
/// Please see [`display_width`].
#[cfg(not(feature = "unicode-width"))]
const DOUBLE_WIDTH_CUTOFF: char = '\u{1100}';
#[cfg(not(feature = "unicode-width"))]
#[inline]
fn ch_width(ch: char) -> usize {
if ch < DOUBLE_WIDTH_CUTOFF {
1
} else {
2
}
}
/// Compute the display width of `text` while skipping over ANSI
/// escape sequences.
///
/// # Examples
///
/// ```
/// use textwrap::core::display_width;
///
/// assert_eq!(display_width("Café Plain"), 10);
/// assert_eq!(display_width("\u{1b}[31mCafé Rouge\u{1b}[0m"), 10);
/// ```
///
/// **Note:** When the `unicode-width` Cargo feature is disabled, the
/// width of a `char` is determined by a crude approximation which
/// simply counts chars below U+1100 as 1 column wide, and all other
/// characters as 2 columns wide. With the feature enabled, function
/// will correctly deal with [combining characters] in their
/// decomposed form (see [Unicode equivalence]).
///
/// An example of a decomposed character is “é”, which can be
/// decomposed into: “e” followed by a combining acute accent: “◌́”.
/// Without the `unicode-width` Cargo feature, every `char` below
/// U+1100 has a width of 1. This includes the combining accent:
///
/// ```
/// use textwrap::core::display_width;
///
/// assert_eq!(display_width("Cafe Plain"), 10);
/// #[cfg(feature = "unicode-width")]
/// assert_eq!(display_width("Cafe\u{301} Plain"), 10);
/// #[cfg(not(feature = "unicode-width"))]
/// assert_eq!(display_width("Cafe\u{301} Plain"), 11);
/// ```
///
/// ## Emojis and CJK Characters
///
/// Characters such as emojis and [CJK characters] used in the
/// Chinese, Japanese, and Korean langauges are seen as double-width,
/// even if the `unicode-width` feature is disabled:
///
/// ```
/// use textwrap::core::display_width;
///
/// assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20);
/// assert_eq!(display_width("你好"), 4); // “Nǐ hǎo” or “Hello” in Chinese
/// ```
///
/// # Limitations
///
/// The displayed width of a string cannot always be computed from the
/// string alone. This is because the width depends on the rendering
/// engine used. This is particularly visible with [emoji modifier
/// sequences] where a base emoji is modified with, e.g., skin tone or
/// hair color modifiers. It is up to the rendering engine to detect
/// this and to produce a suitable emoji.
///
/// A simple example is “❤️”, which consists of “❤” (U+2764: Black
/// Heart Symbol) followed by U+FE0F (Variation Selector-16). By
/// itself, “❤” is a black heart, but if you follow it with the
/// variant selector, you may get a wider red heart.
///
/// A more complex example would be “👨‍🦰” which should depict a man
/// with red hair. Here the computed width is too large — and the
/// width differs depending on the use of the `unicode-width` feature:
///
/// ```
/// use textwrap::core::display_width;
///
/// assert_eq!("👨‍🦰".chars().collect::<Vec<char>>(), ['\u{1f468}', '\u{200d}', '\u{1f9b0}']);
/// #[cfg(feature = "unicode-width")]
/// assert_eq!(display_width("👨‍🦰"), 4);
/// #[cfg(not(feature = "unicode-width"))]
/// assert_eq!(display_width("👨‍🦰"), 6);
/// ```
///
/// This happens because the grapheme consists of three code points:
/// “👨” (U+1F468: Man), Zero Width Joiner (U+200D), and “🦰”
/// (U+1F9B0: Red Hair). You can see them above in the test. With
/// `unicode-width` enabled, the ZWJ is correctly seen as having zero
/// width, without it is counted as a double-width character.
///
/// ## Terminal Support
///
/// Modern browsers typically do a great job at combining characters
/// as shown above, but terminals often struggle more. As an example,
/// Gnome Terminal version 3.38.1, shows “❤️” as a big red heart, but
/// shows "👨‍🦰" as “👨🦰”.
///
/// [combining characters]: https://en.wikipedia.org/wiki/Combining_character
/// [Unicode equivalence]: https://en.wikipedia.org/wiki/Unicode_equivalence
/// [CJK characters]: https://en.wikipedia.org/wiki/CJK_characters
/// [emoji modifier sequences]: https://unicode.org/emoji/charts/full-emoji-modifiers.html
pub fn display_width(text: &str) -> usize {
let mut chars = text.chars();
let mut width = 0;
while let Some(ch) = chars.next() {
if skip_ansi_escape_sequence(ch, &mut chars) {
continue;
}
width += ch_width(ch);
}
width
}
/// A (text) fragment denotes the unit which we wrap into lines.
///
/// Fragments represent an abstract _word_ plus the _whitespace_
/// following the word. In case the word falls at the end of the line,
/// the whitespace is dropped and a so-called _penalty_ is inserted
/// instead (typically `"-"` if the word was hyphenated).
///
/// For wrapping purposes, the precise content of the word, the
/// whitespace, and the penalty is irrelevant. All we need to know is
/// the displayed width of each part, which this trait provides.
pub trait Fragment: std::fmt::Debug {
/// Displayed width of word represented by this fragment.
fn width(&self) -> f64;
/// Displayed width of the whitespace that must follow the word
/// when the word is not at the end of a line.
fn whitespace_width(&self) -> f64;
/// Displayed width of the penalty that must be inserted if the
/// word falls at the end of a line.
fn penalty_width(&self) -> f64;
}
/// A piece of wrappable text, including any trailing whitespace.
///
/// A `Word` is an example of a [`Fragment`], so it has a width,
/// trailing whitespace, and potentially a penalty item.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct Word<'a> {
/// Word content.
pub word: &'a str,
/// Whitespace to insert if the word does not fall at the end of a line.
pub whitespace: &'a str,
/// Penalty string to insert if the word falls at the end of a line.
pub penalty: &'a str,
// Cached width in columns.
pub(crate) width: usize,
}
impl std::ops::Deref for Word<'_> {
type Target = str;
fn deref(&self) -> &Self::Target {
self.word
}
}
impl<'a> Word<'a> {
/// Construct a `Word` from a string.
///
/// A trailing stretch of `' '` is automatically taken to be the
/// whitespace part of the word.
pub fn from(word: &str) -> Word<'_> {
let trimmed = word.trim_end_matches(' ');
Word {
word: trimmed,
width: display_width(trimmed),
whitespace: &word[trimmed.len()..],
penalty: "",
}
}
/// Break this word into smaller words with a width of at most
/// `line_width`. The whitespace and penalty from this `Word` is
/// added to the last piece.
///
/// # Examples
///
/// ```
/// use textwrap::core::Word;
/// assert_eq!(
/// Word::from("Hello! ").break_apart(3).collect::<Vec<_>>(),
/// vec![Word::from("Hel"), Word::from("lo! ")]
/// );
/// ```
pub fn break_apart<'b>(&'b self, line_width: usize) -> impl Iterator<Item = Word<'a>> + 'b {
let mut char_indices = self.word.char_indices();
let mut offset = 0;
let mut width = 0;
std::iter::from_fn(move || {
while let Some((idx, ch)) = char_indices.next() {
if skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) {
continue;
}
if width > 0 && width + ch_width(ch) > line_width {
let word = Word {
word: &self.word[offset..idx],
width: width,
whitespace: "",
penalty: "",
};
offset = idx;
width = ch_width(ch);
return Some(word);
}
width += ch_width(ch);
}
if offset < self.word.len() {
let word = Word {
word: &self.word[offset..],
width: width,
whitespace: self.whitespace,
penalty: self.penalty,
};
offset = self.word.len();
return Some(word);
}
None
})
}
}
impl Fragment for Word<'_> {
#[inline]
fn width(&self) -> f64 {
self.width as f64
}
// We assume the whitespace consist of ' ' only. This allows us to
// compute the display width in constant time.
#[inline]
fn whitespace_width(&self) -> f64 {
self.whitespace.len() as f64
}
// We assume the penalty is `""` or `"-"`. This allows us to
// compute the display width in constant time.
#[inline]
fn penalty_width(&self) -> f64 {
self.penalty.len() as f64
}
}
/// Forcibly break words wider than `line_width` into smaller words.
///
/// This simply calls [`Word::break_apart`] on words that are too
/// wide. This means that no extra `'-'` is inserted, the word is
/// simply broken into smaller pieces.
pub fn break_words<'a, I>(words: I, line_width: usize) -> Vec<Word<'a>>
where
I: IntoIterator<Item = Word<'a>>,
{
let mut shortened_words = Vec::new();
for word in words {
if word.width() > line_width as f64 {
shortened_words.extend(word.break_apart(line_width));
} else {
shortened_words.push(word);
}
}
shortened_words
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg(feature = "unicode-width")]
use unicode_width::UnicodeWidthChar;
#[test]
fn skip_ansi_escape_sequence_works() {
let blue_text = "\u{1b}[34mHello\u{1b}[0m";
let mut chars = blue_text.chars();
let ch = chars.next().unwrap();
assert!(skip_ansi_escape_sequence(ch, &mut chars));
assert_eq!(chars.next(), Some('H'));
}
#[test]
fn emojis_have_correct_width() {
use unic_emoji_char::is_emoji;
// Emojis in the Basic Latin (ASCII) and Latin-1 Supplement
// blocks all have a width of 1 column. This includes
// characters such as '#' and '©'.
for ch in '\u{1}'..'\u{FF}' {
if is_emoji(ch) {
let desc = format!("{:?} U+{:04X}", ch, ch as u32);
#[cfg(feature = "unicode-width")]
assert_eq!(ch.width().unwrap(), 1, "char: {}", desc);
#[cfg(not(feature = "unicode-width"))]
assert_eq!(ch_width(ch), 1, "char: {}", desc);
}
}
// Emojis in the remaining blocks of the Basic Multilingual
// Plane (BMP), in the Supplementary Multilingual Plane (SMP),
// and in the Supplementary Ideographic Plane (SIP), are all 1
// or 2 columns wide when unicode-width is used, and always 2
// columns wide otherwise. This includes all of our favorite
// emojis such as 😊.
for ch in '\u{FF}'..'\u{2FFFF}' {
if is_emoji(ch) {
let desc = format!("{:?} U+{:04X}", ch, ch as u32);
#[cfg(feature = "unicode-width")]
assert!(ch.width().unwrap() <= 2, "char: {}", desc);
#[cfg(not(feature = "unicode-width"))]
assert_eq!(ch_width(ch), 2, "char: {}", desc);
}
}
// The remaining planes contain almost no assigned code points
// and thus also no emojis.
}
#[test]
fn display_width_works() {
assert_eq!("Café Plain".len(), 11); // “é” is two bytes
assert_eq!(display_width("Café Plain"), 10);
assert_eq!(display_width("\u{1b}[31mCafé Rouge\u{1b}[0m"), 10);
}
#[test]
fn display_width_narrow_emojis() {
#[cfg(feature = "unicode-width")]
assert_eq!(display_width(""), 1);
// The ⁉ character is above DOUBLE_WIDTH_CUTOFF.
#[cfg(not(feature = "unicode-width"))]
assert_eq!(display_width(""), 2);
}
#[test]
fn display_width_narrow_emojis_variant_selector() {
#[cfg(feature = "unicode-width")]
assert_eq!(display_width("\u{fe0f}"), 1);
// The variant selector-16 is also counted.
#[cfg(not(feature = "unicode-width"))]
assert_eq!(display_width("\u{fe0f}"), 4);
}
#[test]
fn display_width_emojis() {
assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20);
}
}

347
vendor/textwrap/src/indentation.rs vendored Normal file
View File

@ -0,0 +1,347 @@
//! Functions related to adding and removing indentation from lines of
//! text.
//!
//! The functions here can be used to uniformly indent or dedent
//! (unindent) word wrapped lines of text.
/// Indent each line by the given prefix.
///
/// # Examples
///
/// ```
/// use textwrap::indent;
///
/// assert_eq!(indent("First line.\nSecond line.\n", " "),
/// " First line.\n Second line.\n");
/// ```
///
/// When indenting, trailing whitespace is stripped from the prefix.
/// This means that empty lines remain empty afterwards:
///
/// ```
/// use textwrap::indent;
///
/// assert_eq!(indent("First line.\n\n\nSecond line.\n", " "),
/// " First line.\n\n\n Second line.\n");
/// ```
///
/// Notice how `"\n\n\n"` remained as `"\n\n\n"`.
///
/// This feature is useful when you want to indent text and have a
/// space between your prefix and the text. In this case, you _don't_
/// want a trailing space on empty lines:
///
/// ```
/// use textwrap::indent;
///
/// assert_eq!(indent("foo = 123\n\nprint(foo)\n", "# "),
/// "# foo = 123\n#\n# print(foo)\n");
/// ```
///
/// Notice how `"\n\n"` became `"\n#\n"` instead of `"\n# \n"` which
/// would have trailing whitespace.
///
/// Leading and trailing whitespace coming from the text itself is
/// kept unchanged:
///
/// ```
/// use textwrap::indent;
///
/// assert_eq!(indent(" \t Foo ", "->"), "-> \t Foo ");
/// ```
pub fn indent(s: &str, prefix: &str) -> String {
// We know we'll need more than s.len() bytes for the output, but
// without counting '\n' characters (which is somewhat slow), we
// don't know exactly how much. However, we can preemptively do
// the first doubling of the output size.
let mut result = String::with_capacity(2 * s.len());
let trimmed_prefix = prefix.trim_end();
for (idx, line) in s.split_terminator('\n').enumerate() {
if idx > 0 {
result.push('\n');
}
if line.trim().is_empty() {
result.push_str(trimmed_prefix);
} else {
result.push_str(prefix);
}
result.push_str(line);
}
if s.ends_with('\n') {
// split_terminator will have eaten the final '\n'.
result.push('\n');
}
result
}
/// Removes common leading whitespace from each line.
///
/// This function will look at each non-empty line and determine the
/// maximum amount of whitespace that can be removed from all lines:
///
/// ```
/// use textwrap::dedent;
///
/// assert_eq!(dedent("
/// 1st line
/// 2nd line
/// 3rd line
/// "), "
/// 1st line
/// 2nd line
/// 3rd line
/// ");
/// ```
pub fn dedent(s: &str) -> String {
let mut prefix = "";
let mut lines = s.lines();
// We first search for a non-empty line to find a prefix.
for line in &mut lines {
let mut whitespace_idx = line.len();
for (idx, ch) in line.char_indices() {
if !ch.is_whitespace() {
whitespace_idx = idx;
break;
}
}
// Check if the line had anything but whitespace
if whitespace_idx < line.len() {
prefix = &line[..whitespace_idx];
break;
}
}
// We then continue looking through the remaining lines to
// possibly shorten the prefix.
for line in &mut lines {
let mut whitespace_idx = line.len();
for ((idx, a), b) in line.char_indices().zip(prefix.chars()) {
if a != b {
whitespace_idx = idx;
break;
}
}
// Check if the line had anything but whitespace and if we
// have found a shorter prefix
if whitespace_idx < line.len() && whitespace_idx < prefix.len() {
prefix = &line[..whitespace_idx];
}
}
// We now go over the lines a second time to build the result.
let mut result = String::new();
for line in s.lines() {
if line.starts_with(&prefix) && line.chars().any(|c| !c.is_whitespace()) {
let (_, tail) = line.split_at(prefix.len());
result.push_str(tail);
}
result.push('\n');
}
if result.ends_with('\n') && !s.ends_with('\n') {
let new_len = result.len() - 1;
result.truncate(new_len);
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn indent_empty() {
assert_eq!(indent("\n", " "), "\n");
}
#[test]
#[rustfmt::skip]
fn indent_nonempty() {
let text = [
" foo\n",
"bar\n",
" baz\n",
].join("");
let expected = [
"// foo\n",
"// bar\n",
"// baz\n",
].join("");
assert_eq!(indent(&text, "// "), expected);
}
#[test]
#[rustfmt::skip]
fn indent_empty_line() {
let text = [
" foo",
"bar",
"",
" baz",
].join("\n");
let expected = [
"// foo",
"// bar",
"//",
"// baz",
].join("\n");
assert_eq!(indent(&text, "// "), expected);
}
#[test]
fn dedent_empty() {
assert_eq!(dedent(""), "");
}
#[test]
#[rustfmt::skip]
fn dedent_multi_line() {
let x = [
" foo",
" bar",
" baz",
].join("\n");
let y = [
" foo",
"bar",
" baz"
].join("\n");
assert_eq!(dedent(&x), y);
}
#[test]
#[rustfmt::skip]
fn dedent_empty_line() {
let x = [
" foo",
" bar",
" ",
" baz"
].join("\n");
let y = [
" foo",
"bar",
"",
" baz"
].join("\n");
assert_eq!(dedent(&x), y);
}
#[test]
#[rustfmt::skip]
fn dedent_blank_line() {
let x = [
" foo",
"",
" bar",
" foo",
" bar",
" baz",
].join("\n");
let y = [
"foo",
"",
" bar",
" foo",
" bar",
" baz",
].join("\n");
assert_eq!(dedent(&x), y);
}
#[test]
#[rustfmt::skip]
fn dedent_whitespace_line() {
let x = [
" foo",
" ",
" bar",
" foo",
" bar",
" baz",
].join("\n");
let y = [
"foo",
"",
" bar",
" foo",
" bar",
" baz",
].join("\n");
assert_eq!(dedent(&x), y);
}
#[test]
#[rustfmt::skip]
fn dedent_mixed_whitespace() {
let x = [
"\tfoo",
" bar",
].join("\n");
let y = [
"\tfoo",
" bar",
].join("\n");
assert_eq!(dedent(&x), y);
}
#[test]
#[rustfmt::skip]
fn dedent_tabbed_whitespace() {
let x = [
"\t\tfoo",
"\t\t\tbar",
].join("\n");
let y = [
"foo",
"\tbar",
].join("\n");
assert_eq!(dedent(&x), y);
}
#[test]
#[rustfmt::skip]
fn dedent_mixed_tabbed_whitespace() {
let x = [
"\t \tfoo",
"\t \t\tbar",
].join("\n");
let y = [
"foo",
"\tbar",
].join("\n");
assert_eq!(dedent(&x), y);
}
#[test]
#[rustfmt::skip]
fn dedent_mixed_tabbed_whitespace2() {
let x = [
"\t \tfoo",
"\t \tbar",
].join("\n");
let y = [
"\tfoo",
" \tbar",
].join("\n");
assert_eq!(dedent(&x), y);
}
#[test]
#[rustfmt::skip]
fn dedent_preserve_no_terminating_newline() {
let x = [
" foo",
" bar",
].join("\n");
let y = [
"foo",
" bar",
].join("\n");
assert_eq!(dedent(&x), y);
}
}

1847
vendor/textwrap/src/lib.rs vendored Normal file

File diff suppressed because it is too large Load Diff

428
vendor/textwrap/src/word_separators.rs vendored Normal file
View File

@ -0,0 +1,428 @@
//! Functionality for finding words.
//!
//! In order to wrap text, we need to know where the legal break
//! points are, i.e., where the words of the text are. This means that
//! we need to define what a "word" is.
//!
//! A simple approach is to simply split the text on whitespace, but
//! this does not work for East-Asian languages such as Chinese or
//! Japanese where there are no spaces between words. Breaking a long
//! sequence of emojis is another example where line breaks might be
//! wanted even if there are no whitespace to be found.
//!
//! The [`WordSeparator`] trait is responsible for determining where
//! there words are in a line of text. Please refer to the trait and
//! the structs which implement it for more information.
#[cfg(feature = "unicode-linebreak")]
use crate::core::skip_ansi_escape_sequence;
use crate::core::Word;
/// Describes where words occur in a line of text.
///
/// The simplest approach is say that words are separated by one or
/// more ASCII spaces (`' '`). This works for Western languages
/// without emojis. A more complex approach is to use the Unicode line
/// breaking algorithm, which finds break points in non-ASCII text.
///
/// The line breaks occur between words, please see
/// [`WordSplitter`](crate::WordSplitter) for options of how to handle
/// hyphenation of individual words.
///
/// # Examples
///
/// ```
/// use textwrap::core::Word;
/// use textwrap::WordSeparator::AsciiSpace;
///
/// let words = AsciiSpace.find_words("Hello World!").collect::<Vec<_>>();
/// assert_eq!(words, vec![Word::from("Hello "), Word::from("World!")]);
/// ```
#[derive(Clone, Copy)]
pub enum WordSeparator {
/// Find words by splitting on runs of `' '` characters.
///
/// # Examples
///
/// ```
/// use textwrap::core::Word;
/// use textwrap::WordSeparator::AsciiSpace;
///
/// let words = AsciiSpace.find_words("Hello World!").collect::<Vec<_>>();
/// assert_eq!(words, vec![Word::from("Hello "),
/// Word::from("World!")]);
/// ```
AsciiSpace,
/// Split `line` into words using Unicode break properties.
///
/// This word separator uses the Unicode line breaking algorithm
/// described in [Unicode Standard Annex
/// #14](https://www.unicode.org/reports/tr14/) to find legal places
/// to break lines. There is a small difference in that the U+002D
/// (Hyphen-Minus) and U+00AD (Soft Hyphen) dont create a line break:
/// to allow a line break at a hyphen, use
/// [`WordSplitter::HyphenSplitter`](crate::WordSplitter::HyphenSplitter).
/// Soft hyphens are not currently supported.
///
/// # Examples
///
/// Unlike [`WordSeparator::AsciiSpace`], the Unicode line
/// breaking algorithm will find line break opportunities between
/// some characters with no intervening whitespace:
///
/// ```
/// #[cfg(feature = "unicode-linebreak")] {
/// use textwrap::core::Word;
/// use textwrap::WordSeparator::UnicodeBreakProperties;
///
/// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂😍").collect::<Vec<_>>(),
/// vec![Word::from("Emojis: "),
/// Word::from("😂"),
/// Word::from("😍")]);
///
/// assert_eq!(UnicodeBreakProperties.find_words("CJK: 你好").collect::<Vec<_>>(),
/// vec![Word::from("CJK: "),
/// Word::from("你"),
/// Word::from("好")]);
/// }
/// ```
///
/// A U+2060 (Word Joiner) character can be inserted if you want to
/// manually override the defaults and keep the characters together:
///
/// ```
/// #[cfg(feature = "unicode-linebreak")] {
/// use textwrap::core::Word;
/// use textwrap::WordSeparator::UnicodeBreakProperties;
///
/// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂\u{2060}😍").collect::<Vec<_>>(),
/// vec![Word::from("Emojis: "),
/// Word::from("😂\u{2060}😍")]);
/// }
/// ```
///
/// The Unicode line breaking algorithm will also automatically
/// suppress break breaks around certain punctuation characters::
///
/// ```
/// #[cfg(feature = "unicode-linebreak")] {
/// use textwrap::core::Word;
/// use textwrap::WordSeparator::UnicodeBreakProperties;
///
/// assert_eq!(UnicodeBreakProperties.find_words("[ foo ] bar !").collect::<Vec<_>>(),
/// vec![Word::from("[ foo ] "),
/// Word::from("bar !")]);
/// }
/// ```
#[cfg(feature = "unicode-linebreak")]
UnicodeBreakProperties,
/// Find words using a custom word separator
Custom(fn(line: &str) -> Box<dyn Iterator<Item = Word<'_>> + '_>),
}
impl std::fmt::Debug for WordSeparator {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
WordSeparator::AsciiSpace => f.write_str("AsciiSpace"),
#[cfg(feature = "unicode-linebreak")]
WordSeparator::UnicodeBreakProperties => f.write_str("UnicodeBreakProperties"),
WordSeparator::Custom(_) => f.write_str("Custom(...)"),
}
}
}
impl WordSeparator {
// This function should really return impl Iterator<Item = Word>, but
// this isn't possible until Rust supports higher-kinded types:
// https://github.com/rust-lang/rfcs/blob/master/text/1522-conservative-impl-trait.md
/// Find all words in `line`.
pub fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
match self {
WordSeparator::AsciiSpace => find_words_ascii_space(line),
#[cfg(feature = "unicode-linebreak")]
WordSeparator::UnicodeBreakProperties => find_words_unicode_break_properties(line),
WordSeparator::Custom(func) => func(line),
}
}
}
fn find_words_ascii_space<'a>(line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
let mut start = 0;
let mut in_whitespace = false;
let mut char_indices = line.char_indices();
Box::new(std::iter::from_fn(move || {
// for (idx, ch) in char_indices does not work, gives this
// error:
//
// > cannot move out of `char_indices`, a captured variable in
// > an `FnMut` closure
#[allow(clippy::while_let_on_iterator)]
while let Some((idx, ch)) = char_indices.next() {
if in_whitespace && ch != ' ' {
let word = Word::from(&line[start..idx]);
start = idx;
in_whitespace = ch == ' ';
return Some(word);
}
in_whitespace = ch == ' ';
}
if start < line.len() {
let word = Word::from(&line[start..]);
start = line.len();
return Some(word);
}
None
}))
}
// Strip all ANSI escape sequences from `text`.
#[cfg(feature = "unicode-linebreak")]
fn strip_ansi_escape_sequences(text: &str) -> String {
let mut result = String::with_capacity(text.len());
let mut chars = text.chars();
while let Some(ch) = chars.next() {
if skip_ansi_escape_sequence(ch, &mut chars) {
continue;
}
result.push(ch);
}
result
}
/// Soft hyphen, also knows as a “shy hyphen”. Should show up as -
/// if a line is broken at this point, and otherwise be invisible.
/// Textwrap does not currently support breaking words at soft
/// hyphens.
#[cfg(feature = "unicode-linebreak")]
const SHY: char = '\u{00ad}';
/// Find words in line. ANSI escape sequences are ignored in `line`.
#[cfg(feature = "unicode-linebreak")]
fn find_words_unicode_break_properties<'a>(
line: &'a str,
) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
// Construct an iterator over (original index, stripped index)
// tuples. We find the Unicode linebreaks on a stripped string,
// but we need the original indices so we can form words based on
// the original string.
let mut last_stripped_idx = 0;
let mut char_indices = line.char_indices();
let mut idx_map = std::iter::from_fn(move || match char_indices.next() {
Some((orig_idx, ch)) => {
let stripped_idx = last_stripped_idx;
if !skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) {
last_stripped_idx += ch.len_utf8();
}
Some((orig_idx, stripped_idx))
}
None => None,
});
let stripped = strip_ansi_escape_sequences(line);
let mut opportunities = unicode_linebreak::linebreaks(&stripped)
.filter(|(idx, _)| {
#[allow(clippy::match_like_matches_macro)]
match &stripped[..*idx].chars().next_back() {
// We suppress breaks at - since we want to control
// this via the WordSplitter.
Some('-') => false,
// Soft hyphens are currently not supported since we
// require all `Word` fragments to be continuous in
// the input string.
Some(SHY) => false,
// Other breaks should be fine!
_ => true,
}
})
.collect::<Vec<_>>()
.into_iter();
// Remove final break opportunity, we will add it below using
// &line[start..]; This ensures that we correctly include a
// trailing ANSI escape sequence.
opportunities.next_back();
let mut start = 0;
Box::new(std::iter::from_fn(move || {
#[allow(clippy::while_let_on_iterator)]
while let Some((idx, _)) = opportunities.next() {
if let Some((orig_idx, _)) = idx_map.find(|&(_, stripped_idx)| stripped_idx == idx) {
let word = Word::from(&line[start..orig_idx]);
start = orig_idx;
return Some(word);
}
}
if start < line.len() {
let word = Word::from(&line[start..]);
start = line.len();
return Some(word);
}
None
}))
}
#[cfg(test)]
mod tests {
use super::WordSeparator::*;
use super::*;
// Like assert_eq!, but the left expression is an iterator.
macro_rules! assert_iter_eq {
($left:expr, $right:expr) => {
assert_eq!($left.collect::<Vec<_>>(), $right);
};
}
fn to_words<'a>(words: Vec<&'a str>) -> Vec<Word<'a>> {
words.into_iter().map(|w: &str| Word::from(&w)).collect()
}
macro_rules! test_find_words {
($ascii_name:ident,
$unicode_name:ident,
$([ $line:expr, $ascii_words:expr, $unicode_words:expr ]),+) => {
#[test]
fn $ascii_name() {
$(
let expected_words = to_words($ascii_words.to_vec());
let actual_words = WordSeparator::AsciiSpace
.find_words($line)
.collect::<Vec<_>>();
assert_eq!(actual_words, expected_words, "Line: {:?}", $line);
)+
}
#[test]
#[cfg(feature = "unicode-linebreak")]
fn $unicode_name() {
$(
let expected_words = to_words($unicode_words.to_vec());
let actual_words = WordSeparator::UnicodeBreakProperties
.find_words($line)
.collect::<Vec<_>>();
assert_eq!(actual_words, expected_words, "Line: {:?}", $line);
)+
}
};
}
test_find_words!(ascii_space_empty, unicode_empty, ["", [], []]);
test_find_words!(
ascii_single_word,
unicode_single_word,
["foo", ["foo"], ["foo"]]
);
test_find_words!(
ascii_two_words,
unicode_two_words,
["foo bar", ["foo ", "bar"], ["foo ", "bar"]]
);
test_find_words!(
ascii_multiple_words,
unicode_multiple_words,
["foo bar", ["foo ", "bar"], ["foo ", "bar"]],
["x y z", ["x ", "y ", "z"], ["x ", "y ", "z"]]
);
test_find_words!(
ascii_only_whitespace,
unicode_only_whitespace,
[" ", [" "], [" "]],
[" ", [" "], [" "]]
);
test_find_words!(
ascii_inter_word_whitespace,
unicode_inter_word_whitespace,
["foo bar", ["foo ", "bar"], ["foo ", "bar"]]
);
test_find_words!(
ascii_trailing_whitespace,
unicode_trailing_whitespace,
["foo ", ["foo "], ["foo "]]
);
test_find_words!(
ascii_leading_whitespace,
unicode_leading_whitespace,
[" foo", [" ", "foo"], [" ", "foo"]]
);
test_find_words!(
ascii_multi_column_char,
unicode_multi_column_char,
["\u{1f920}", ["\u{1f920}"], ["\u{1f920}"]] // cowboy emoji 🤠
);
test_find_words!(
ascii_hyphens,
unicode_hyphens,
["foo-bar", ["foo-bar"], ["foo-bar"]],
["foo- bar", ["foo- ", "bar"], ["foo- ", "bar"]],
["foo - bar", ["foo ", "- ", "bar"], ["foo ", "- ", "bar"]],
["foo -bar", ["foo ", "-bar"], ["foo ", "-bar"]]
);
test_find_words!(
ascii_newline,
unicode_newline,
["foo\nbar", ["foo\nbar"], ["foo\n", "bar"]]
);
test_find_words!(
ascii_tab,
unicode_tab,
["foo\tbar", ["foo\tbar"], ["foo\t", "bar"]]
);
test_find_words!(
ascii_non_breaking_space,
unicode_non_breaking_space,
["foo\u{00A0}bar", ["foo\u{00A0}bar"], ["foo\u{00A0}bar"]]
);
#[test]
#[cfg(unix)]
fn find_words_colored_text() {
use termion::color::{Blue, Fg, Green, Reset};
let green_hello = format!("{}Hello{} ", Fg(Green), Fg(Reset));
let blue_world = format!("{}World!{}", Fg(Blue), Fg(Reset));
assert_iter_eq!(
AsciiSpace.find_words(&format!("{}{}", green_hello, blue_world)),
vec![Word::from(&green_hello), Word::from(&blue_world)]
);
#[cfg(feature = "unicode-linebreak")]
assert_iter_eq!(
UnicodeBreakProperties.find_words(&format!("{}{}", green_hello, blue_world)),
vec![Word::from(&green_hello), Word::from(&blue_world)]
);
}
#[test]
fn find_words_color_inside_word() {
let text = "foo\u{1b}[0m\u{1b}[32mbar\u{1b}[0mbaz";
assert_iter_eq!(AsciiSpace.find_words(&text), vec![Word::from(text)]);
#[cfg(feature = "unicode-linebreak")]
assert_iter_eq!(
UnicodeBreakProperties.find_words(&text),
vec![Word::from(text)]
);
}
}

314
vendor/textwrap/src/word_splitters.rs vendored Normal file
View File

@ -0,0 +1,314 @@
//! Word splitting functionality.
//!
//! To wrap text into lines, long words sometimes need to be split
//! across lines. The [`WordSplitter`] enum defines this
//! functionality.
use crate::core::{display_width, Word};
/// The `WordSplitter` enum describes where words can be split.
///
/// If the textwrap crate has been compiled with the `hyphenation`
/// Cargo feature enabled, you will find a
/// [`WordSplitter::Hyphenation`] variant. Use this struct for
/// language-aware hyphenation:
///
/// ```
/// #[cfg(feature = "hyphenation")] {
/// use hyphenation::{Language, Load, Standard};
/// use textwrap::{wrap, Options, WordSplitter};
///
/// let text = "Oxidation is the loss of electrons.";
/// let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
/// let options = Options::new(8).word_splitter(WordSplitter::Hyphenation(dictionary));
/// assert_eq!(wrap(text, &options), vec!["Oxida-",
/// "tion is",
/// "the loss",
/// "of elec-",
/// "trons."]);
/// }
/// ```
///
/// Please see the documentation for the [hyphenation] crate for more
/// details.
///
/// [hyphenation]: https://docs.rs/hyphenation/
#[derive(Clone)]
pub enum WordSplitter {
/// Use this as a [`Options.word_splitter`] to avoid any kind of
/// hyphenation:
///
/// ```
/// use textwrap::{wrap, Options, WordSplitter};
///
/// let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation);
/// assert_eq!(wrap("foo bar-baz", &options),
/// vec!["foo", "bar-baz"]);
/// ```
///
/// [`Options.word_splitter`]: super::Options::word_splitter
NoHyphenation,
/// `HyphenSplitter` is the default `WordSplitter` used by
/// [`Options::new`](super::Options::new). It will split words on
/// existing hyphens in the word.
///
/// It will only use hyphens that are surrounded by alphanumeric
/// characters, which prevents a word like `"--foo-bar"` from
/// being split into `"--"` and `"foo-bar"`.
///
/// # Examples
///
/// ```
/// use textwrap::WordSplitter;
///
/// assert_eq!(WordSplitter::HyphenSplitter.split_points("--foo-bar"),
/// vec![6]);
/// ```
HyphenSplitter,
/// Use a custom function as the word splitter.
///
/// This varian lets you implement a custom word splitter using
/// your own function.
///
/// # Examples
///
/// ```
/// use textwrap::WordSplitter;
///
/// fn split_at_underscore(word: &str) -> Vec<usize> {
/// word.match_indices('_').map(|(idx, _)| idx + 1).collect()
/// }
///
/// let word_splitter = WordSplitter::Custom(split_at_underscore);
/// assert_eq!(word_splitter.split_points("a_long_identifier"),
/// vec![2, 7]);
/// ```
Custom(fn(word: &str) -> Vec<usize>),
/// A hyphenation dictionary can be used to do language-specific
/// hyphenation using patterns from the [hyphenation] crate.
///
/// **Note:** Only available when the `hyphenation` Cargo feature is
/// enabled.
///
/// [hyphenation]: https://docs.rs/hyphenation/
#[cfg(feature = "hyphenation")]
Hyphenation(hyphenation::Standard),
}
impl std::fmt::Debug for WordSplitter {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
WordSplitter::NoHyphenation => f.write_str("NoHyphenation"),
WordSplitter::HyphenSplitter => f.write_str("HyphenSplitter"),
WordSplitter::Custom(_) => f.write_str("Custom(...)"),
#[cfg(feature = "hyphenation")]
WordSplitter::Hyphenation(dict) => write!(f, "Hyphenation({})", dict.language()),
}
}
}
impl PartialEq<WordSplitter> for WordSplitter {
fn eq(&self, other: &WordSplitter) -> bool {
match (self, other) {
(WordSplitter::NoHyphenation, WordSplitter::NoHyphenation) => true,
(WordSplitter::HyphenSplitter, WordSplitter::HyphenSplitter) => true,
#[cfg(feature = "hyphenation")]
(WordSplitter::Hyphenation(this_dict), WordSplitter::Hyphenation(other_dict)) => {
this_dict.language() == other_dict.language()
}
(_, _) => false,
}
}
}
impl WordSplitter {
/// Return all possible indices where `word` can be split.
///
/// The indices are in the range `0..word.len()`. They point to
/// the index _after_ the split point, i.e., after `-` if
/// splitting on hyphens. This way, `word.split_at(idx)` will
/// break the word into two well-formed pieces.
///
/// # Examples
///
/// ```
/// use textwrap::WordSplitter;
/// assert_eq!(WordSplitter::NoHyphenation.split_points("cannot-be-split"), vec![]);
/// assert_eq!(WordSplitter::HyphenSplitter.split_points("can-be-split"), vec![4, 7]);
/// assert_eq!(WordSplitter::Custom(|word| vec![word.len()/2]).split_points("middle"), vec![3]);
/// ```
pub fn split_points(&self, word: &str) -> Vec<usize> {
match self {
WordSplitter::NoHyphenation => Vec::new(),
WordSplitter::HyphenSplitter => {
let mut splits = Vec::new();
for (idx, _) in word.match_indices('-') {
// We only use hyphens that are surrounded by alphanumeric
// characters. This is to avoid splitting on repeated hyphens,
// such as those found in --foo-bar.
let prev = word[..idx].chars().next_back();
let next = word[idx + 1..].chars().next();
if prev.filter(|ch| ch.is_alphanumeric()).is_some()
&& next.filter(|ch| ch.is_alphanumeric()).is_some()
{
splits.push(idx + 1); // +1 due to width of '-'.
}
}
splits
}
WordSplitter::Custom(splitter_func) => splitter_func(word),
#[cfg(feature = "hyphenation")]
WordSplitter::Hyphenation(dictionary) => {
use hyphenation::Hyphenator;
dictionary.hyphenate(word).breaks
}
}
}
}
/// Split words into smaller words according to the split points given
/// by `word_splitter`.
///
/// Note that we split all words, regardless of their length. This is
/// to more cleanly separate the business of splitting (including
/// automatic hyphenation) from the business of word wrapping.
pub fn split_words<'a, I>(
words: I,
word_splitter: &'a WordSplitter,
) -> impl Iterator<Item = Word<'a>>
where
I: IntoIterator<Item = Word<'a>>,
{
words.into_iter().flat_map(move |word| {
let mut prev = 0;
let mut split_points = word_splitter.split_points(&word).into_iter();
std::iter::from_fn(move || {
if let Some(idx) = split_points.next() {
let need_hyphen = !word[..idx].ends_with('-');
let w = Word {
word: &word.word[prev..idx],
width: display_width(&word[prev..idx]),
whitespace: "",
penalty: if need_hyphen { "-" } else { "" },
};
prev = idx;
return Some(w);
}
if prev < word.word.len() || prev == 0 {
let w = Word {
word: &word.word[prev..],
width: display_width(&word[prev..]),
whitespace: word.whitespace,
penalty: word.penalty,
};
prev = word.word.len() + 1;
return Some(w);
}
None
})
})
}
#[cfg(test)]
mod tests {
use super::*;
// Like assert_eq!, but the left expression is an iterator.
macro_rules! assert_iter_eq {
($left:expr, $right:expr) => {
assert_eq!($left.collect::<Vec<_>>(), $right);
};
}
#[test]
fn split_words_no_words() {
assert_iter_eq!(split_words(vec![], &WordSplitter::HyphenSplitter), vec![]);
}
#[test]
fn split_words_empty_word() {
assert_iter_eq!(
split_words(vec![Word::from(" ")], &WordSplitter::HyphenSplitter),
vec![Word::from(" ")]
);
}
#[test]
fn split_words_single_word() {
assert_iter_eq!(
split_words(vec![Word::from("foobar")], &WordSplitter::HyphenSplitter),
vec![Word::from("foobar")]
);
}
#[test]
fn split_words_hyphen_splitter() {
assert_iter_eq!(
split_words(vec![Word::from("foo-bar")], &WordSplitter::HyphenSplitter),
vec![Word::from("foo-"), Word::from("bar")]
);
}
#[test]
fn split_words_no_hyphenation() {
assert_iter_eq!(
split_words(vec![Word::from("foo-bar")], &WordSplitter::NoHyphenation),
vec![Word::from("foo-bar")]
);
}
#[test]
fn split_words_adds_penalty() {
let fixed_split_point = |_: &str| vec![3];
assert_iter_eq!(
split_words(
vec![Word::from("foobar")].into_iter(),
&WordSplitter::Custom(fixed_split_point)
),
vec![
Word {
word: "foo",
width: 3,
whitespace: "",
penalty: "-"
},
Word {
word: "bar",
width: 3,
whitespace: "",
penalty: ""
}
]
);
assert_iter_eq!(
split_words(
vec![Word::from("fo-bar")].into_iter(),
&WordSplitter::Custom(fixed_split_point)
),
vec![
Word {
word: "fo-",
width: 3,
whitespace: "",
penalty: ""
},
Word {
word: "bar",
width: 3,
whitespace: "",
penalty: ""
}
]
);
}
}

381
vendor/textwrap/src/wrap_algorithms.rs vendored Normal file
View File

@ -0,0 +1,381 @@
//! Word wrapping algorithms.
//!
//! After a text has been broken into words (or [`Fragment`]s), one
//! now has to decide how to break the fragments into lines. The
//! simplest algorithm for this is implemented by [`wrap_first_fit`]:
//! it uses no look-ahead and simply adds fragments to the line as
//! long as they fit. However, this can lead to poor line breaks if a
//! large fragment almost-but-not-quite fits on a line. When that
//! happens, the fragment is moved to the next line and it will leave
//! behind a large gap. A more advanced algorithm, implemented by
//! [`wrap_optimal_fit`], will take this into account. The optimal-fit
//! algorithm considers all possible line breaks and will attempt to
//! minimize the gaps left behind by overly short lines.
//!
//! While both algorithms run in linear time, the first-fit algorithm
//! is about 4 times faster than the optimal-fit algorithm.
#[cfg(feature = "smawk")]
mod optimal_fit;
#[cfg(feature = "smawk")]
pub use optimal_fit::{wrap_optimal_fit, OverflowError, Penalties};
use crate::core::{Fragment, Word};
/// Describes how to wrap words into lines.
///
/// The simplest approach is to wrap words one word at a time and
/// accept the first way of wrapping which fit
/// ([`WrapAlgorithm::FirstFit`]). If the `smawk` Cargo feature is
/// enabled, a more complex algorithm is available which will look at
/// an entire paragraph at a time in order to find optimal line breaks
/// ([`WrapAlgorithm::OptimalFit`]).
#[derive(Clone, Copy)]
pub enum WrapAlgorithm {
/// Wrap words using a fast and simple algorithm.
///
/// This algorithm uses no look-ahead when finding line breaks.
/// Implemented by [`wrap_first_fit`], please see that function for
/// details and examples.
FirstFit,
/// Wrap words using an advanced algorithm with look-ahead.
///
/// This wrapping algorithm considers the entire paragraph to find
/// optimal line breaks. When wrapping text, "penalties" are
/// assigned to line breaks based on the gaps left at the end of
/// lines. See [`Penalties`] for details.
///
/// The underlying wrapping algorithm is implemented by
/// [`wrap_optimal_fit`], please see that function for examples.
///
/// **Note:** Only available when the `smawk` Cargo feature is
/// enabled.
#[cfg(feature = "smawk")]
OptimalFit(Penalties),
/// Custom wrapping function.
///
/// Use this if you want to implement your own wrapping algorithm.
/// The function can freely decide how to turn a slice of
/// [`Word`]s into lines.
///
/// # Example
///
/// ```
/// use textwrap::core::Word;
/// use textwrap::{wrap, Options, WrapAlgorithm};
///
/// fn stair<'a, 'b>(words: &'b [Word<'a>], _: &'b [usize]) -> Vec<&'b [Word<'a>]> {
/// let mut lines = Vec::new();
/// let mut step = 1;
/// let mut start_idx = 0;
/// while start_idx + step <= words.len() {
/// lines.push(&words[start_idx .. start_idx+step]);
/// start_idx += step;
/// step += 1;
/// }
/// lines
/// }
///
/// let options = Options::new(10).wrap_algorithm(WrapAlgorithm::Custom(stair));
/// assert_eq!(wrap("First, second, third, fourth, fifth, sixth", options),
/// vec!["First,",
/// "second, third,",
/// "fourth, fifth, sixth"]);
/// ```
Custom(for<'a, 'b> fn(words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>),
}
impl std::fmt::Debug for WrapAlgorithm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
WrapAlgorithm::FirstFit => f.write_str("FirstFit"),
#[cfg(feature = "smawk")]
WrapAlgorithm::OptimalFit(penalties) => write!(f, "OptimalFit({:?})", penalties),
WrapAlgorithm::Custom(_) => f.write_str("Custom(...)"),
}
}
}
impl WrapAlgorithm {
/// Create new wrap algorithm.
///
/// The best wrapping algorithm is used by default, i.e.,
/// [`WrapAlgorithm::OptimalFit`] if available, otherwise
/// [`WrapAlgorithm::FirstFit`].
pub const fn new() -> Self {
#[cfg(not(feature = "smawk"))]
{
WrapAlgorithm::FirstFit
}
#[cfg(feature = "smawk")]
{
WrapAlgorithm::new_optimal_fit()
}
}
/// New [`WrapAlgorithm::OptimalFit`] with default penalties. This
/// works well for monospace text.
///
/// **Note:** Only available when the `smawk` Cargo feature is
/// enabled.
#[cfg(feature = "smawk")]
pub const fn new_optimal_fit() -> Self {
WrapAlgorithm::OptimalFit(Penalties::new())
}
/// Wrap words according to line widths.
///
/// The `line_widths` slice gives the target line width for each
/// line (the last slice element is repeated as necessary). This
/// can be used to implement hanging indentation.
#[inline]
pub fn wrap<'a, 'b>(
&self,
words: &'b [Word<'a>],
line_widths: &'b [usize],
) -> Vec<&'b [Word<'a>]> {
// Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53
// = 9_007_199_254_740_992 can be represented without loss by
// a f64. Larger line widths will be rounded to the nearest
// representable number.
let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>();
match self {
WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths),
#[cfg(feature = "smawk")]
WrapAlgorithm::OptimalFit(penalties) => {
// The computation cannnot overflow when the line
// widths are restricted to usize.
wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap()
}
WrapAlgorithm::Custom(func) => func(words, line_widths),
}
}
}
impl Default for WrapAlgorithm {
fn default() -> Self {
WrapAlgorithm::new()
}
}
/// Wrap abstract fragments into lines with a first-fit algorithm.
///
/// The `line_widths` slice gives the target line width for each line
/// (the last slice element is repeated as necessary). This can be
/// used to implement hanging indentation.
///
/// The fragments must already have been split into the desired
/// widths, this function will not (and cannot) attempt to split them
/// further when arranging them into lines.
///
/// # First-Fit Algorithm
///
/// This implements a simple “greedy” algorithm: accumulate fragments
/// one by one and when a fragment no longer fits, start a new line.
/// There is no look-ahead, we simply take first fit of the fragments
/// we find.
///
/// While fast and predictable, this algorithm can produce poor line
/// breaks when a long fragment is moved to a new line, leaving behind
/// a large gap:
///
/// ```
/// use textwrap::core::Word;
/// use textwrap::wrap_algorithms::wrap_first_fit;
/// use textwrap::WordSeparator;
///
/// // Helper to convert wrapped lines to a Vec<String>.
/// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> {
/// lines.iter().map(|line| {
/// line.iter().map(|word| &**word).collect::<Vec<_>>().join(" ")
/// }).collect::<Vec<_>>()
/// }
///
/// let text = "These few words will unfortunately not wrap nicely.";
/// let words = WordSeparator::AsciiSpace.find_words(text).collect::<Vec<_>>();
/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])),
/// vec!["These few words",
/// "will", // <-- short line
/// "unfortunately",
/// "not wrap",
/// "nicely."]);
///
/// // We can avoid the short line if we look ahead:
/// #[cfg(feature = "smawk")]
/// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
/// #[cfg(feature = "smawk")]
/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()),
/// vec!["These few",
/// "words will",
/// "unfortunately",
/// "not wrap",
/// "nicely."]);
/// ```
///
/// The [`wrap_optimal_fit`] function was used above to get better
/// line breaks. It uses an advanced algorithm which tries to avoid
/// short lines. This function is about 4 times faster than
/// [`wrap_optimal_fit`].
///
/// # Examples
///
/// Imagine you're building a house site and you have a number of
/// tasks you need to execute. Things like pour foundation, complete
/// framing, install plumbing, electric cabling, install insulation.
///
/// The construction workers can only work during daytime, so they
/// need to pack up everything at night. Because they need to secure
/// their tools and move machines back to the garage, this process
/// takes much more time than the time it would take them to simply
/// switch to another task.
///
/// You would like to make a list of tasks to execute every day based
/// on your estimates. You can model this with a program like this:
///
/// ```
/// use textwrap::core::{Fragment, Word};
/// use textwrap::wrap_algorithms::wrap_first_fit;
///
/// #[derive(Debug)]
/// struct Task<'a> {
/// name: &'a str,
/// hours: f64, // Time needed to complete task.
/// sweep: f64, // Time needed for a quick sweep after task during the day.
/// cleanup: f64, // Time needed for full cleanup if day ends with this task.
/// }
///
/// impl Fragment for Task<'_> {
/// fn width(&self) -> f64 { self.hours }
/// fn whitespace_width(&self) -> f64 { self.sweep }
/// fn penalty_width(&self) -> f64 { self.cleanup }
/// }
///
/// // The morning tasks
/// let tasks = vec![
/// Task { name: "Foundation", hours: 4.0, sweep: 2.0, cleanup: 3.0 },
/// Task { name: "Framing", hours: 3.0, sweep: 1.0, cleanup: 2.0 },
/// Task { name: "Plumbing", hours: 2.0, sweep: 2.0, cleanup: 2.0 },
/// Task { name: "Electrical", hours: 2.0, sweep: 1.0, cleanup: 2.0 },
/// Task { name: "Insulation", hours: 2.0, sweep: 1.0, cleanup: 2.0 },
/// Task { name: "Drywall", hours: 3.0, sweep: 1.0, cleanup: 2.0 },
/// Task { name: "Floors", hours: 3.0, sweep: 1.0, cleanup: 2.0 },
/// Task { name: "Countertops", hours: 1.0, sweep: 1.0, cleanup: 2.0 },
/// Task { name: "Bathrooms", hours: 2.0, sweep: 1.0, cleanup: 2.0 },
/// ];
///
/// // Fill tasks into days, taking `day_length` into account. The
/// // output shows the hours worked per day along with the names of
/// // the tasks for that day.
/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: f64) -> Vec<(f64, Vec<&'a str>)> {
/// let mut days = Vec::new();
/// // Assign tasks to days. The assignment is a vector of slices,
/// // with a slice per day.
/// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]);
/// for day in assigned_days.iter() {
/// let last = day.last().unwrap();
/// let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum();
/// let names = day.iter().map(|t| t.name).collect::<Vec<_>>();
/// days.push((work_hours - last.sweep + last.cleanup, names));
/// }
/// days
/// }
///
/// // With a single crew working 8 hours a day:
/// assert_eq!(
/// assign_days(&tasks, 8.0),
/// [
/// (7.0, vec!["Foundation"]),
/// (8.0, vec!["Framing", "Plumbing"]),
/// (7.0, vec!["Electrical", "Insulation"]),
/// (5.0, vec!["Drywall"]),
/// (7.0, vec!["Floors", "Countertops"]),
/// (4.0, vec!["Bathrooms"]),
/// ]
/// );
///
/// // With two crews working in shifts, 16 hours a day:
/// assert_eq!(
/// assign_days(&tasks, 16.0),
/// [
/// (14.0, vec!["Foundation", "Framing", "Plumbing"]),
/// (15.0, vec!["Electrical", "Insulation", "Drywall", "Floors"]),
/// (6.0, vec!["Countertops", "Bathrooms"]),
/// ]
/// );
/// ```
///
/// Apologies to anyone who actually knows how to build a house and
/// knows how long each step takes :-)
pub fn wrap_first_fit<'a, 'b, T: Fragment>(
fragments: &'a [T],
line_widths: &'b [f64],
) -> Vec<&'a [T]> {
// The final line width is used for all remaining lines.
let default_line_width = line_widths.last().copied().unwrap_or(0.0);
let mut lines = Vec::new();
let mut start = 0;
let mut width = 0.0;
for (idx, fragment) in fragments.iter().enumerate() {
let line_width = line_widths
.get(lines.len())
.copied()
.unwrap_or(default_line_width);
if width + fragment.width() + fragment.penalty_width() > line_width && idx > start {
lines.push(&fragments[start..idx]);
start = idx;
width = 0.0;
}
width += fragment.width() + fragment.whitespace_width();
}
lines.push(&fragments[start..]);
lines
}
#[cfg(test)]
mod tests {
use super::*;
#[derive(Debug, PartialEq)]
struct Word(f64);
#[rustfmt::skip]
impl Fragment for Word {
fn width(&self) -> f64 { self.0 }
fn whitespace_width(&self) -> f64 { 1.0 }
fn penalty_width(&self) -> f64 { 0.0 }
}
#[test]
fn wrap_string_longer_than_f64() {
let words = vec![
Word(1e307),
Word(2e307),
Word(3e307),
Word(4e307),
Word(5e307),
Word(6e307),
];
// Wrap at just under f64::MAX (~19e307). The tiny
// whitespace_widths disappear because of loss of precision.
assert_eq!(
wrap_first_fit(&words, &[15e307]),
&[
vec![
Word(1e307),
Word(2e307),
Word(3e307),
Word(4e307),
Word(5e307)
],
vec![Word(6e307)]
]
);
}
}

View File

@ -0,0 +1,433 @@
use std::cell::RefCell;
use crate::core::Fragment;
/// Penalties for
/// [`WrapAlgorithm::OptimalFit`](crate::WrapAlgorithm::OptimalFit)
/// and [`wrap_optimal_fit`].
///
/// This wrapping algorithm in [`wrap_optimal_fit`] considers the
/// entire paragraph to find optimal line breaks. When wrapping text,
/// "penalties" are assigned to line breaks based on the gaps left at
/// the end of lines. The penalties are given by this struct, with
/// [`Penalties::default`] assigning penalties that work well for
/// monospace text.
///
/// If you are wrapping proportional text, you are advised to assign
/// your own penalties according to your font size. See the individual
/// penalties below for details.
///
/// **Note:** Only available when the `smawk` Cargo feature is
/// enabled.
#[derive(Clone, Copy, Debug)]
pub struct Penalties {
/// Per-line penalty. This is added for every line, which makes it
/// expensive to output more lines than the minimum required.
pub nline_penalty: usize,
/// Per-character cost for lines that overflow the target line width.
///
/// With a default value of 50², every single character costs as
/// much as leaving a gap of 50 characters behind. This is because
/// we assign as cost of `gap * gap` to a short line. When
/// wrapping monospace text, we can overflow the line by 1
/// character in extreme cases:
///
/// ```
/// use textwrap::core::Word;
/// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
///
/// let short = "foo ";
/// let long = "x".repeat(50);
/// let length = (short.len() + long.len()) as f64;
/// let fragments = vec![Word::from(short), Word::from(&long)];
/// let penalties = Penalties::new();
///
/// // Perfect fit, both words are on a single line with no overflow.
/// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap();
/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
///
/// // The words no longer fit, yet we get a single line back. While
/// // the cost of overflow (`1 * 2500`) is the same as the cost of the
/// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty`
/// // which makes it cheaper to overflow than to use two lines.
/// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap();
/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
///
/// // The cost of overflow would be 2 * 2500, whereas the cost of
/// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 =
/// // 3401`. We therefore get two lines.
/// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap();
/// assert_eq!(wrapped, vec![&[Word::from(short)],
/// &[Word::from(&long)]]);
/// ```
///
/// This only happens if the overflowing word is 50 characters
/// long _and_ if the word overflows the line by exactly one
/// character. If it overflows by more than one character, the
/// overflow penalty will quickly outgrow the cost of the gap, as
/// seen above.
pub overflow_penalty: usize,
/// When should the a single word on the last line be considered
/// "too short"?
///
/// If the last line of the text consist of a single word and if
/// this word is shorter than `1 / short_last_line_fraction` of
/// the line width, then the final line will be considered "short"
/// and `short_last_line_penalty` is added as an extra penalty.
///
/// The effect of this is to avoid a final line consisting of a
/// single small word. For example, with a
/// `short_last_line_penalty` of 25 (the default), a gap of up to
/// 5 columns will be seen as more desirable than having a final
/// short line.
///
/// ## Examples
///
/// ```
/// use textwrap::{wrap, wrap_algorithms, Options, WrapAlgorithm};
///
/// let text = "This is a demo of the short last line penalty.";
///
/// // The first-fit algorithm leaves a single short word on the last line:
/// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::FirstFit)),
/// vec!["This is a demo of the short last line",
/// "penalty."]);
///
/// #[cfg(feature = "smawk")] {
/// let mut penalties = wrap_algorithms::Penalties::new();
///
/// // Since "penalty." is shorter than 25% of the line width, the
/// // optimal-fit algorithm adds a penalty of 25. This is enough
/// // to move "line " down:
/// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
/// vec!["This is a demo of the short last",
/// "line penalty."]);
///
/// // We can change the meaning of "short" lines. Here, only words
/// // shorter than 1/10th of the line width will be considered short:
/// penalties.short_last_line_fraction = 10;
/// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
/// vec!["This is a demo of the short last line",
/// "penalty."]);
///
/// // If desired, the penalty can also be disabled:
/// penalties.short_last_line_fraction = 4;
/// penalties.short_last_line_penalty = 0;
/// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
/// vec!["This is a demo of the short last line",
/// "penalty."]);
/// }
/// ```
pub short_last_line_fraction: usize,
/// Penalty for a last line with a single short word.
///
/// Set this to zero if you do not want to penalize short last lines.
pub short_last_line_penalty: usize,
/// Penalty for lines ending with a hyphen.
pub hyphen_penalty: usize,
}
impl Penalties {
/// Default penalties for monospace text.
///
/// The penalties here work well for monospace text. This is
/// because they expect the gaps at the end of lines to be roughly
/// in the range `0..100`. If the gaps are larger, the
/// `overflow_penalty` and `hyphen_penalty` become insignificant.
pub const fn new() -> Self {
Penalties {
nline_penalty: 1000,
overflow_penalty: 50 * 50,
short_last_line_fraction: 4,
short_last_line_penalty: 25,
hyphen_penalty: 25,
}
}
}
impl Default for Penalties {
fn default() -> Self {
Self::new()
}
}
/// Cache for line numbers. This is necessary to avoid a O(n**2)
/// behavior when computing line numbers in [`wrap_optimal_fit`].
struct LineNumbers {
line_numbers: RefCell<Vec<usize>>,
}
impl LineNumbers {
fn new(size: usize) -> Self {
let mut line_numbers = Vec::with_capacity(size);
line_numbers.push(0);
LineNumbers {
line_numbers: RefCell::new(line_numbers),
}
}
fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize {
while self.line_numbers.borrow_mut().len() < i + 1 {
let pos = self.line_numbers.borrow().len();
let line_number = 1 + self.get(minima[pos].0, minima);
self.line_numbers.borrow_mut().push(line_number);
}
self.line_numbers.borrow()[i]
}
}
/// Overflow error during the [`wrap_optimal_fit`] computation.
#[derive(Debug, PartialEq, Eq)]
pub struct OverflowError;
impl std::fmt::Display for OverflowError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "wrap_optimal_fit cost computation overflowed")
}
}
impl std::error::Error for OverflowError {}
/// Wrap abstract fragments into lines with an optimal-fit algorithm.
///
/// The `line_widths` slice gives the target line width for each line
/// (the last slice element is repeated as necessary). This can be
/// used to implement hanging indentation.
///
/// The fragments must already have been split into the desired
/// widths, this function will not (and cannot) attempt to split them
/// further when arranging them into lines.
///
/// # Optimal-Fit Algorithm
///
/// The algorithm considers all possible break points and picks the
/// breaks which minimizes the gaps at the end of each line. More
/// precisely, the algorithm assigns a cost or penalty to each break
/// point, determined by `cost = gap * gap` where `gap = target_width -
/// line_width`. Shorter lines are thus penalized more heavily since
/// they leave behind a larger gap.
///
/// We can illustrate this with the text “To be, or not to be: that is
/// the question”. We will be wrapping it in a narrow column with room
/// for only 10 characters. The [greedy
/// algorithm](super::wrap_first_fit) will produce these lines, each
/// annotated with the corresponding penalty:
///
/// ```text
/// "To be, or" 1² = 1
/// "not to be:" 0² = 0
/// "that is" 3² = 9
/// "the" 7² = 49
/// "question" 2² = 4
/// ```
///
/// We see that line four with “the” leaves a gap of 7 columns, which
/// gives it a penalty of 49. The sum of the penalties is 63.
///
/// There are 10 words, which means that there are `2_u32.pow(9)` or
/// 512 different ways to typeset it. We can compute
/// the sum of the penalties for each possible line break and search
/// for the one with the lowest sum:
///
/// ```text
/// "To be," 4² = 16
/// "or not to" 1² = 1
/// "be: that" 2² = 4
/// "is the" 4² = 16
/// "question" 2² = 4
/// ```
///
/// The sum of the penalties is 41, which is better than what the
/// greedy algorithm produced.
///
/// Searching through all possible combinations would normally be
/// prohibitively slow. However, it turns out that the problem can be
/// formulated as the task of finding column minima in a cost matrix.
/// This matrix has a special form (totally monotone) which lets us
/// use a [linear-time algorithm called
/// SMAWK](https://lib.rs/crates/smawk) to find the optimal break
/// points.
///
/// This means that the time complexity remains O(_n_) where _n_ is
/// the number of words. Compared to
/// [`wrap_first_fit`](super::wrap_first_fit), this function is about
/// 4 times slower.
///
/// The optimization of per-line costs over the entire paragraph is
/// inspired by the line breaking algorithm used in TeX, as described
/// in the 1981 article [_Breaking Paragraphs into
/// Lines_](http://www.eprg.org/G53DOC/pdfs/knuth-plass-breaking.pdf)
/// by Knuth and Plass. The implementation here is based on [Python
/// code by David
/// Eppstein](https://github.com/jfinkels/PADS/blob/master/pads/wrap.py).
///
/// # Errors
///
/// In case of an overflow during the cost computation, an `Err` is
/// returned. Overflows happens when fragments or lines have infinite
/// widths (`f64::INFINITY`) or if the widths are so large that the
/// gaps at the end of lines have sizes larger than `f64::MAX.sqrt()`
/// (approximately 1e154):
///
/// ```
/// use textwrap::core::Fragment;
/// use textwrap::wrap_algorithms::{wrap_optimal_fit, OverflowError, Penalties};
///
/// #[derive(Debug, PartialEq)]
/// struct Word(f64);
///
/// impl Fragment for Word {
/// fn width(&self) -> f64 { self.0 }
/// fn whitespace_width(&self) -> f64 { 1.0 }
/// fn penalty_width(&self) -> f64 { 0.0 }
/// }
///
/// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is
/// // larger than f64::MAX:
/// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &Penalties::default()),
/// Err(OverflowError));
/// ```
///
/// When using fragment widths and line widths which fit inside an
/// `u64`, overflows cannot happen. This means that fragments derived
/// from a `&str` cannot cause overflows.
///
/// **Note:** Only available when the `smawk` Cargo feature is
/// enabled.
pub fn wrap_optimal_fit<'a, 'b, T: Fragment>(
fragments: &'a [T],
line_widths: &'b [f64],
penalties: &'b Penalties,
) -> Result<Vec<&'a [T]>, OverflowError> {
// The final line width is used for all remaining lines.
let default_line_width = line_widths.last().copied().unwrap_or(0.0);
let mut widths = Vec::with_capacity(fragments.len() + 1);
let mut width = 0.0;
widths.push(width);
for fragment in fragments {
width += fragment.width() + fragment.whitespace_width();
widths.push(width);
}
let line_numbers = LineNumbers::new(fragments.len());
let minima = smawk::online_column_minima(0.0, widths.len(), |minima, i, j| {
// Line number for fragment `i`.
let line_number = line_numbers.get(i, minima);
let line_width = line_widths
.get(line_number)
.copied()
.unwrap_or(default_line_width);
let target_width = line_width.max(1.0);
// Compute the width of a line spanning fragments[i..j] in
// constant time. We need to adjust widths[j] by subtracting
// the whitespace of fragment[j-1] and then add the penalty.
let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width()
+ fragments[j - 1].penalty_width();
// We compute cost of the line containing fragments[i..j]. We
// start with values[i].1, which is the optimal cost for
// breaking before fragments[i].
//
// First, every extra line cost NLINE_PENALTY.
let mut cost = minima[i].1 + penalties.nline_penalty as f64;
// Next, we add a penalty depending on the line length.
if line_width > target_width {
// Lines that overflow get a hefty penalty.
let overflow = line_width - target_width;
cost += overflow * penalties.overflow_penalty as f64;
} else if j < fragments.len() {
// Other lines (except for the last line) get a milder
// penalty which depend on the size of the gap.
let gap = target_width - line_width;
cost += gap * gap;
} else if i + 1 == j
&& line_width < target_width / penalties.short_last_line_fraction as f64
{
// The last line can have any size gap, but we do add a
// penalty if the line is very short (typically because it
// contains just a single word).
cost += penalties.short_last_line_penalty as f64;
}
// Finally, we discourage hyphens.
if fragments[j - 1].penalty_width() > 0.0 {
// TODO: this should use a penalty value from the fragment
// instead.
cost += penalties.hyphen_penalty as f64;
}
cost
});
for (_, cost) in &minima {
if cost.is_infinite() {
return Err(OverflowError);
}
}
let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima));
let mut pos = fragments.len();
loop {
let prev = minima[pos].0;
lines.push(&fragments[prev..pos]);
pos = prev;
if pos == 0 {
break;
}
}
lines.reverse();
Ok(lines)
}
#[cfg(test)]
mod tests {
use super::*;
#[derive(Debug, PartialEq)]
struct Word(f64);
#[rustfmt::skip]
impl Fragment for Word {
fn width(&self) -> f64 { self.0 }
fn whitespace_width(&self) -> f64 { 1.0 }
fn penalty_width(&self) -> f64 { 0.0 }
}
#[test]
fn wrap_fragments_with_infinite_widths() {
let words = vec![Word(f64::INFINITY)];
assert_eq!(
wrap_optimal_fit(&words, &[0.0], &Penalties::default()),
Err(OverflowError)
);
}
#[test]
fn wrap_fragments_with_huge_widths() {
let words = vec![Word(1e200), Word(1e250), Word(1e300)];
assert_eq!(
wrap_optimal_fit(&words, &[1e300], &Penalties::default()),
Err(OverflowError)
);
}
#[test]
fn wrap_fragments_with_large_widths() {
// The gaps will be of the sizes between 1e25 and 1e75. This
// makes the `gap * gap` cost fit comfortably in a f64.
let words = vec![Word(1e25), Word(1e50), Word(1e75)];
assert_eq!(
wrap_optimal_fit(&words, &[1e100], &Penalties::default()),
Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]])
);
}
}