Initial vendor packages
Signed-off-by: Valentin Popov <valentin@popov.link>
This commit is contained in:
433
vendor/textwrap/src/core.rs
vendored
Normal file
433
vendor/textwrap/src/core.rs
vendored
Normal file
@ -0,0 +1,433 @@
|
||||
//! Building blocks for advanced wrapping functionality.
|
||||
//!
|
||||
//! The functions and structs in this module can be used to implement
|
||||
//! advanced wrapping functionality when the [`wrap`](super::wrap) and
|
||||
//! [`fill`](super::fill) function don't do what you want.
|
||||
//!
|
||||
//! In general, you want to follow these steps when wrapping
|
||||
//! something:
|
||||
//!
|
||||
//! 1. Split your input into [`Fragment`]s. These are abstract blocks
|
||||
//! of text or content which can be wrapped into lines. See
|
||||
//! [`WordSeparator`](crate::word_separators::WordSeparator) for
|
||||
//! how to do this for text.
|
||||
//!
|
||||
//! 2. Potentially split your fragments into smaller pieces. This
|
||||
//! allows you to implement things like hyphenation. If you use the
|
||||
//! `Word` type, you can use [`WordSplitter`](crate::WordSplitter)
|
||||
//! enum for this.
|
||||
//!
|
||||
//! 3. Potentially break apart fragments that are still too large to
|
||||
//! fit on a single line. This is implemented in [`break_words`].
|
||||
//!
|
||||
//! 4. Finally take your fragments and put them into lines. There are
|
||||
//! two algorithms for this in the
|
||||
//! [`wrap_algorithms`](crate::wrap_algorithms) module:
|
||||
//! [`wrap_optimal_fit`](crate::wrap_algorithms::wrap_optimal_fit)
|
||||
//! and [`wrap_first_fit`](crate::wrap_algorithms::wrap_first_fit).
|
||||
//! The former produces better line breaks, the latter is faster.
|
||||
//!
|
||||
//! 5. Iterate through the slices returned by the wrapping functions
|
||||
//! and construct your lines of output.
|
||||
//!
|
||||
//! Please [open an issue](https://github.com/mgeisler/textwrap/) if
|
||||
//! the functionality here is not sufficient or if you have ideas for
|
||||
//! improving it. We would love to hear from you!
|
||||
|
||||
/// The CSI or “Control Sequence Introducer” introduces an ANSI escape
|
||||
/// sequence. This is typically used for colored text and will be
|
||||
/// ignored when computing the text width.
|
||||
const CSI: (char, char) = ('\x1b', '[');
|
||||
/// The final bytes of an ANSI escape sequence must be in this range.
|
||||
const ANSI_FINAL_BYTE: std::ops::RangeInclusive<char> = '\x40'..='\x7e';
|
||||
|
||||
/// Skip ANSI escape sequences. The `ch` is the current `char`, the
|
||||
/// `chars` provide the following characters. The `chars` will be
|
||||
/// modified if `ch` is the start of an ANSI escape sequence.
|
||||
#[inline]
|
||||
pub(crate) fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(ch: char, chars: &mut I) -> bool {
|
||||
if ch == CSI.0 && chars.next() == Some(CSI.1) {
|
||||
// We have found the start of an ANSI escape code, typically
|
||||
// used for colored terminal text. We skip until we find a
|
||||
// "final byte" in the range 0x40–0x7E.
|
||||
for ch in chars {
|
||||
if ANSI_FINAL_BYTE.contains(&ch) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(feature = "unicode-width")]
|
||||
#[inline]
|
||||
fn ch_width(ch: char) -> usize {
|
||||
unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0)
|
||||
}
|
||||
|
||||
/// First character which [`ch_width`] will classify as double-width.
|
||||
/// Please see [`display_width`].
|
||||
#[cfg(not(feature = "unicode-width"))]
|
||||
const DOUBLE_WIDTH_CUTOFF: char = '\u{1100}';
|
||||
|
||||
#[cfg(not(feature = "unicode-width"))]
|
||||
#[inline]
|
||||
fn ch_width(ch: char) -> usize {
|
||||
if ch < DOUBLE_WIDTH_CUTOFF {
|
||||
1
|
||||
} else {
|
||||
2
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the display width of `text` while skipping over ANSI
|
||||
/// escape sequences.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::core::display_width;
|
||||
///
|
||||
/// assert_eq!(display_width("Café Plain"), 10);
|
||||
/// assert_eq!(display_width("\u{1b}[31mCafé Rouge\u{1b}[0m"), 10);
|
||||
/// ```
|
||||
///
|
||||
/// **Note:** When the `unicode-width` Cargo feature is disabled, the
|
||||
/// width of a `char` is determined by a crude approximation which
|
||||
/// simply counts chars below U+1100 as 1 column wide, and all other
|
||||
/// characters as 2 columns wide. With the feature enabled, function
|
||||
/// will correctly deal with [combining characters] in their
|
||||
/// decomposed form (see [Unicode equivalence]).
|
||||
///
|
||||
/// An example of a decomposed character is “é”, which can be
|
||||
/// decomposed into: “e” followed by a combining acute accent: “◌́”.
|
||||
/// Without the `unicode-width` Cargo feature, every `char` below
|
||||
/// U+1100 has a width of 1. This includes the combining accent:
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::core::display_width;
|
||||
///
|
||||
/// assert_eq!(display_width("Cafe Plain"), 10);
|
||||
/// #[cfg(feature = "unicode-width")]
|
||||
/// assert_eq!(display_width("Cafe\u{301} Plain"), 10);
|
||||
/// #[cfg(not(feature = "unicode-width"))]
|
||||
/// assert_eq!(display_width("Cafe\u{301} Plain"), 11);
|
||||
/// ```
|
||||
///
|
||||
/// ## Emojis and CJK Characters
|
||||
///
|
||||
/// Characters such as emojis and [CJK characters] used in the
|
||||
/// Chinese, Japanese, and Korean langauges are seen as double-width,
|
||||
/// even if the `unicode-width` feature is disabled:
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::core::display_width;
|
||||
///
|
||||
/// assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20);
|
||||
/// assert_eq!(display_width("你好"), 4); // “Nǐ hǎo” or “Hello” in Chinese
|
||||
/// ```
|
||||
///
|
||||
/// # Limitations
|
||||
///
|
||||
/// The displayed width of a string cannot always be computed from the
|
||||
/// string alone. This is because the width depends on the rendering
|
||||
/// engine used. This is particularly visible with [emoji modifier
|
||||
/// sequences] where a base emoji is modified with, e.g., skin tone or
|
||||
/// hair color modifiers. It is up to the rendering engine to detect
|
||||
/// this and to produce a suitable emoji.
|
||||
///
|
||||
/// A simple example is “❤️”, which consists of “❤” (U+2764: Black
|
||||
/// Heart Symbol) followed by U+FE0F (Variation Selector-16). By
|
||||
/// itself, “❤” is a black heart, but if you follow it with the
|
||||
/// variant selector, you may get a wider red heart.
|
||||
///
|
||||
/// A more complex example would be “👨🦰” which should depict a man
|
||||
/// with red hair. Here the computed width is too large — and the
|
||||
/// width differs depending on the use of the `unicode-width` feature:
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::core::display_width;
|
||||
///
|
||||
/// assert_eq!("👨🦰".chars().collect::<Vec<char>>(), ['\u{1f468}', '\u{200d}', '\u{1f9b0}']);
|
||||
/// #[cfg(feature = "unicode-width")]
|
||||
/// assert_eq!(display_width("👨🦰"), 4);
|
||||
/// #[cfg(not(feature = "unicode-width"))]
|
||||
/// assert_eq!(display_width("👨🦰"), 6);
|
||||
/// ```
|
||||
///
|
||||
/// This happens because the grapheme consists of three code points:
|
||||
/// “👨” (U+1F468: Man), Zero Width Joiner (U+200D), and “🦰”
|
||||
/// (U+1F9B0: Red Hair). You can see them above in the test. With
|
||||
/// `unicode-width` enabled, the ZWJ is correctly seen as having zero
|
||||
/// width, without it is counted as a double-width character.
|
||||
///
|
||||
/// ## Terminal Support
|
||||
///
|
||||
/// Modern browsers typically do a great job at combining characters
|
||||
/// as shown above, but terminals often struggle more. As an example,
|
||||
/// Gnome Terminal version 3.38.1, shows “❤️” as a big red heart, but
|
||||
/// shows "👨🦰" as “👨🦰”.
|
||||
///
|
||||
/// [combining characters]: https://en.wikipedia.org/wiki/Combining_character
|
||||
/// [Unicode equivalence]: https://en.wikipedia.org/wiki/Unicode_equivalence
|
||||
/// [CJK characters]: https://en.wikipedia.org/wiki/CJK_characters
|
||||
/// [emoji modifier sequences]: https://unicode.org/emoji/charts/full-emoji-modifiers.html
|
||||
pub fn display_width(text: &str) -> usize {
|
||||
let mut chars = text.chars();
|
||||
let mut width = 0;
|
||||
while let Some(ch) = chars.next() {
|
||||
if skip_ansi_escape_sequence(ch, &mut chars) {
|
||||
continue;
|
||||
}
|
||||
width += ch_width(ch);
|
||||
}
|
||||
width
|
||||
}
|
||||
|
||||
/// A (text) fragment denotes the unit which we wrap into lines.
|
||||
///
|
||||
/// Fragments represent an abstract _word_ plus the _whitespace_
|
||||
/// following the word. In case the word falls at the end of the line,
|
||||
/// the whitespace is dropped and a so-called _penalty_ is inserted
|
||||
/// instead (typically `"-"` if the word was hyphenated).
|
||||
///
|
||||
/// For wrapping purposes, the precise content of the word, the
|
||||
/// whitespace, and the penalty is irrelevant. All we need to know is
|
||||
/// the displayed width of each part, which this trait provides.
|
||||
pub trait Fragment: std::fmt::Debug {
|
||||
/// Displayed width of word represented by this fragment.
|
||||
fn width(&self) -> f64;
|
||||
|
||||
/// Displayed width of the whitespace that must follow the word
|
||||
/// when the word is not at the end of a line.
|
||||
fn whitespace_width(&self) -> f64;
|
||||
|
||||
/// Displayed width of the penalty that must be inserted if the
|
||||
/// word falls at the end of a line.
|
||||
fn penalty_width(&self) -> f64;
|
||||
}
|
||||
|
||||
/// A piece of wrappable text, including any trailing whitespace.
|
||||
///
|
||||
/// A `Word` is an example of a [`Fragment`], so it has a width,
|
||||
/// trailing whitespace, and potentially a penalty item.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
pub struct Word<'a> {
|
||||
/// Word content.
|
||||
pub word: &'a str,
|
||||
/// Whitespace to insert if the word does not fall at the end of a line.
|
||||
pub whitespace: &'a str,
|
||||
/// Penalty string to insert if the word falls at the end of a line.
|
||||
pub penalty: &'a str,
|
||||
// Cached width in columns.
|
||||
pub(crate) width: usize,
|
||||
}
|
||||
|
||||
impl std::ops::Deref for Word<'_> {
|
||||
type Target = str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.word
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Word<'a> {
|
||||
/// Construct a `Word` from a string.
|
||||
///
|
||||
/// A trailing stretch of `' '` is automatically taken to be the
|
||||
/// whitespace part of the word.
|
||||
pub fn from(word: &str) -> Word<'_> {
|
||||
let trimmed = word.trim_end_matches(' ');
|
||||
Word {
|
||||
word: trimmed,
|
||||
width: display_width(trimmed),
|
||||
whitespace: &word[trimmed.len()..],
|
||||
penalty: "",
|
||||
}
|
||||
}
|
||||
|
||||
/// Break this word into smaller words with a width of at most
|
||||
/// `line_width`. The whitespace and penalty from this `Word` is
|
||||
/// added to the last piece.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::core::Word;
|
||||
/// assert_eq!(
|
||||
/// Word::from("Hello! ").break_apart(3).collect::<Vec<_>>(),
|
||||
/// vec![Word::from("Hel"), Word::from("lo! ")]
|
||||
/// );
|
||||
/// ```
|
||||
pub fn break_apart<'b>(&'b self, line_width: usize) -> impl Iterator<Item = Word<'a>> + 'b {
|
||||
let mut char_indices = self.word.char_indices();
|
||||
let mut offset = 0;
|
||||
let mut width = 0;
|
||||
|
||||
std::iter::from_fn(move || {
|
||||
while let Some((idx, ch)) = char_indices.next() {
|
||||
if skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if width > 0 && width + ch_width(ch) > line_width {
|
||||
let word = Word {
|
||||
word: &self.word[offset..idx],
|
||||
width: width,
|
||||
whitespace: "",
|
||||
penalty: "",
|
||||
};
|
||||
offset = idx;
|
||||
width = ch_width(ch);
|
||||
return Some(word);
|
||||
}
|
||||
|
||||
width += ch_width(ch);
|
||||
}
|
||||
|
||||
if offset < self.word.len() {
|
||||
let word = Word {
|
||||
word: &self.word[offset..],
|
||||
width: width,
|
||||
whitespace: self.whitespace,
|
||||
penalty: self.penalty,
|
||||
};
|
||||
offset = self.word.len();
|
||||
return Some(word);
|
||||
}
|
||||
|
||||
None
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Fragment for Word<'_> {
|
||||
#[inline]
|
||||
fn width(&self) -> f64 {
|
||||
self.width as f64
|
||||
}
|
||||
|
||||
// We assume the whitespace consist of ' ' only. This allows us to
|
||||
// compute the display width in constant time.
|
||||
#[inline]
|
||||
fn whitespace_width(&self) -> f64 {
|
||||
self.whitespace.len() as f64
|
||||
}
|
||||
|
||||
// We assume the penalty is `""` or `"-"`. This allows us to
|
||||
// compute the display width in constant time.
|
||||
#[inline]
|
||||
fn penalty_width(&self) -> f64 {
|
||||
self.penalty.len() as f64
|
||||
}
|
||||
}
|
||||
|
||||
/// Forcibly break words wider than `line_width` into smaller words.
|
||||
///
|
||||
/// This simply calls [`Word::break_apart`] on words that are too
|
||||
/// wide. This means that no extra `'-'` is inserted, the word is
|
||||
/// simply broken into smaller pieces.
|
||||
pub fn break_words<'a, I>(words: I, line_width: usize) -> Vec<Word<'a>>
|
||||
where
|
||||
I: IntoIterator<Item = Word<'a>>,
|
||||
{
|
||||
let mut shortened_words = Vec::new();
|
||||
for word in words {
|
||||
if word.width() > line_width as f64 {
|
||||
shortened_words.extend(word.break_apart(line_width));
|
||||
} else {
|
||||
shortened_words.push(word);
|
||||
}
|
||||
}
|
||||
shortened_words
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[cfg(feature = "unicode-width")]
|
||||
use unicode_width::UnicodeWidthChar;
|
||||
|
||||
#[test]
|
||||
fn skip_ansi_escape_sequence_works() {
|
||||
let blue_text = "\u{1b}[34mHello\u{1b}[0m";
|
||||
let mut chars = blue_text.chars();
|
||||
let ch = chars.next().unwrap();
|
||||
assert!(skip_ansi_escape_sequence(ch, &mut chars));
|
||||
assert_eq!(chars.next(), Some('H'));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emojis_have_correct_width() {
|
||||
use unic_emoji_char::is_emoji;
|
||||
|
||||
// Emojis in the Basic Latin (ASCII) and Latin-1 Supplement
|
||||
// blocks all have a width of 1 column. This includes
|
||||
// characters such as '#' and '©'.
|
||||
for ch in '\u{1}'..'\u{FF}' {
|
||||
if is_emoji(ch) {
|
||||
let desc = format!("{:?} U+{:04X}", ch, ch as u32);
|
||||
|
||||
#[cfg(feature = "unicode-width")]
|
||||
assert_eq!(ch.width().unwrap(), 1, "char: {}", desc);
|
||||
|
||||
#[cfg(not(feature = "unicode-width"))]
|
||||
assert_eq!(ch_width(ch), 1, "char: {}", desc);
|
||||
}
|
||||
}
|
||||
|
||||
// Emojis in the remaining blocks of the Basic Multilingual
|
||||
// Plane (BMP), in the Supplementary Multilingual Plane (SMP),
|
||||
// and in the Supplementary Ideographic Plane (SIP), are all 1
|
||||
// or 2 columns wide when unicode-width is used, and always 2
|
||||
// columns wide otherwise. This includes all of our favorite
|
||||
// emojis such as 😊.
|
||||
for ch in '\u{FF}'..'\u{2FFFF}' {
|
||||
if is_emoji(ch) {
|
||||
let desc = format!("{:?} U+{:04X}", ch, ch as u32);
|
||||
|
||||
#[cfg(feature = "unicode-width")]
|
||||
assert!(ch.width().unwrap() <= 2, "char: {}", desc);
|
||||
|
||||
#[cfg(not(feature = "unicode-width"))]
|
||||
assert_eq!(ch_width(ch), 2, "char: {}", desc);
|
||||
}
|
||||
}
|
||||
|
||||
// The remaining planes contain almost no assigned code points
|
||||
// and thus also no emojis.
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn display_width_works() {
|
||||
assert_eq!("Café Plain".len(), 11); // “é” is two bytes
|
||||
assert_eq!(display_width("Café Plain"), 10);
|
||||
assert_eq!(display_width("\u{1b}[31mCafé Rouge\u{1b}[0m"), 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn display_width_narrow_emojis() {
|
||||
#[cfg(feature = "unicode-width")]
|
||||
assert_eq!(display_width("⁉"), 1);
|
||||
|
||||
// The ⁉ character is above DOUBLE_WIDTH_CUTOFF.
|
||||
#[cfg(not(feature = "unicode-width"))]
|
||||
assert_eq!(display_width("⁉"), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn display_width_narrow_emojis_variant_selector() {
|
||||
#[cfg(feature = "unicode-width")]
|
||||
assert_eq!(display_width("⁉\u{fe0f}"), 1);
|
||||
|
||||
// The variant selector-16 is also counted.
|
||||
#[cfg(not(feature = "unicode-width"))]
|
||||
assert_eq!(display_width("⁉\u{fe0f}"), 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn display_width_emojis() {
|
||||
assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20);
|
||||
}
|
||||
}
|
347
vendor/textwrap/src/indentation.rs
vendored
Normal file
347
vendor/textwrap/src/indentation.rs
vendored
Normal file
@ -0,0 +1,347 @@
|
||||
//! Functions related to adding and removing indentation from lines of
|
||||
//! text.
|
||||
//!
|
||||
//! The functions here can be used to uniformly indent or dedent
|
||||
//! (unindent) word wrapped lines of text.
|
||||
|
||||
/// Indent each line by the given prefix.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::indent;
|
||||
///
|
||||
/// assert_eq!(indent("First line.\nSecond line.\n", " "),
|
||||
/// " First line.\n Second line.\n");
|
||||
/// ```
|
||||
///
|
||||
/// When indenting, trailing whitespace is stripped from the prefix.
|
||||
/// This means that empty lines remain empty afterwards:
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::indent;
|
||||
///
|
||||
/// assert_eq!(indent("First line.\n\n\nSecond line.\n", " "),
|
||||
/// " First line.\n\n\n Second line.\n");
|
||||
/// ```
|
||||
///
|
||||
/// Notice how `"\n\n\n"` remained as `"\n\n\n"`.
|
||||
///
|
||||
/// This feature is useful when you want to indent text and have a
|
||||
/// space between your prefix and the text. In this case, you _don't_
|
||||
/// want a trailing space on empty lines:
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::indent;
|
||||
///
|
||||
/// assert_eq!(indent("foo = 123\n\nprint(foo)\n", "# "),
|
||||
/// "# foo = 123\n#\n# print(foo)\n");
|
||||
/// ```
|
||||
///
|
||||
/// Notice how `"\n\n"` became `"\n#\n"` instead of `"\n# \n"` which
|
||||
/// would have trailing whitespace.
|
||||
///
|
||||
/// Leading and trailing whitespace coming from the text itself is
|
||||
/// kept unchanged:
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::indent;
|
||||
///
|
||||
/// assert_eq!(indent(" \t Foo ", "->"), "-> \t Foo ");
|
||||
/// ```
|
||||
pub fn indent(s: &str, prefix: &str) -> String {
|
||||
// We know we'll need more than s.len() bytes for the output, but
|
||||
// without counting '\n' characters (which is somewhat slow), we
|
||||
// don't know exactly how much. However, we can preemptively do
|
||||
// the first doubling of the output size.
|
||||
let mut result = String::with_capacity(2 * s.len());
|
||||
let trimmed_prefix = prefix.trim_end();
|
||||
for (idx, line) in s.split_terminator('\n').enumerate() {
|
||||
if idx > 0 {
|
||||
result.push('\n');
|
||||
}
|
||||
if line.trim().is_empty() {
|
||||
result.push_str(trimmed_prefix);
|
||||
} else {
|
||||
result.push_str(prefix);
|
||||
}
|
||||
result.push_str(line);
|
||||
}
|
||||
if s.ends_with('\n') {
|
||||
// split_terminator will have eaten the final '\n'.
|
||||
result.push('\n');
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/// Removes common leading whitespace from each line.
|
||||
///
|
||||
/// This function will look at each non-empty line and determine the
|
||||
/// maximum amount of whitespace that can be removed from all lines:
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::dedent;
|
||||
///
|
||||
/// assert_eq!(dedent("
|
||||
/// 1st line
|
||||
/// 2nd line
|
||||
/// 3rd line
|
||||
/// "), "
|
||||
/// 1st line
|
||||
/// 2nd line
|
||||
/// 3rd line
|
||||
/// ");
|
||||
/// ```
|
||||
pub fn dedent(s: &str) -> String {
|
||||
let mut prefix = "";
|
||||
let mut lines = s.lines();
|
||||
|
||||
// We first search for a non-empty line to find a prefix.
|
||||
for line in &mut lines {
|
||||
let mut whitespace_idx = line.len();
|
||||
for (idx, ch) in line.char_indices() {
|
||||
if !ch.is_whitespace() {
|
||||
whitespace_idx = idx;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the line had anything but whitespace
|
||||
if whitespace_idx < line.len() {
|
||||
prefix = &line[..whitespace_idx];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// We then continue looking through the remaining lines to
|
||||
// possibly shorten the prefix.
|
||||
for line in &mut lines {
|
||||
let mut whitespace_idx = line.len();
|
||||
for ((idx, a), b) in line.char_indices().zip(prefix.chars()) {
|
||||
if a != b {
|
||||
whitespace_idx = idx;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the line had anything but whitespace and if we
|
||||
// have found a shorter prefix
|
||||
if whitespace_idx < line.len() && whitespace_idx < prefix.len() {
|
||||
prefix = &line[..whitespace_idx];
|
||||
}
|
||||
}
|
||||
|
||||
// We now go over the lines a second time to build the result.
|
||||
let mut result = String::new();
|
||||
for line in s.lines() {
|
||||
if line.starts_with(&prefix) && line.chars().any(|c| !c.is_whitespace()) {
|
||||
let (_, tail) = line.split_at(prefix.len());
|
||||
result.push_str(tail);
|
||||
}
|
||||
result.push('\n');
|
||||
}
|
||||
|
||||
if result.ends_with('\n') && !s.ends_with('\n') {
|
||||
let new_len = result.len() - 1;
|
||||
result.truncate(new_len);
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn indent_empty() {
|
||||
assert_eq!(indent("\n", " "), "\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn indent_nonempty() {
|
||||
let text = [
|
||||
" foo\n",
|
||||
"bar\n",
|
||||
" baz\n",
|
||||
].join("");
|
||||
let expected = [
|
||||
"// foo\n",
|
||||
"// bar\n",
|
||||
"// baz\n",
|
||||
].join("");
|
||||
assert_eq!(indent(&text, "// "), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn indent_empty_line() {
|
||||
let text = [
|
||||
" foo",
|
||||
"bar",
|
||||
"",
|
||||
" baz",
|
||||
].join("\n");
|
||||
let expected = [
|
||||
"// foo",
|
||||
"// bar",
|
||||
"//",
|
||||
"// baz",
|
||||
].join("\n");
|
||||
assert_eq!(indent(&text, "// "), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dedent_empty() {
|
||||
assert_eq!(dedent(""), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn dedent_multi_line() {
|
||||
let x = [
|
||||
" foo",
|
||||
" bar",
|
||||
" baz",
|
||||
].join("\n");
|
||||
let y = [
|
||||
" foo",
|
||||
"bar",
|
||||
" baz"
|
||||
].join("\n");
|
||||
assert_eq!(dedent(&x), y);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn dedent_empty_line() {
|
||||
let x = [
|
||||
" foo",
|
||||
" bar",
|
||||
" ",
|
||||
" baz"
|
||||
].join("\n");
|
||||
let y = [
|
||||
" foo",
|
||||
"bar",
|
||||
"",
|
||||
" baz"
|
||||
].join("\n");
|
||||
assert_eq!(dedent(&x), y);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn dedent_blank_line() {
|
||||
let x = [
|
||||
" foo",
|
||||
"",
|
||||
" bar",
|
||||
" foo",
|
||||
" bar",
|
||||
" baz",
|
||||
].join("\n");
|
||||
let y = [
|
||||
"foo",
|
||||
"",
|
||||
" bar",
|
||||
" foo",
|
||||
" bar",
|
||||
" baz",
|
||||
].join("\n");
|
||||
assert_eq!(dedent(&x), y);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn dedent_whitespace_line() {
|
||||
let x = [
|
||||
" foo",
|
||||
" ",
|
||||
" bar",
|
||||
" foo",
|
||||
" bar",
|
||||
" baz",
|
||||
].join("\n");
|
||||
let y = [
|
||||
"foo",
|
||||
"",
|
||||
" bar",
|
||||
" foo",
|
||||
" bar",
|
||||
" baz",
|
||||
].join("\n");
|
||||
assert_eq!(dedent(&x), y);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn dedent_mixed_whitespace() {
|
||||
let x = [
|
||||
"\tfoo",
|
||||
" bar",
|
||||
].join("\n");
|
||||
let y = [
|
||||
"\tfoo",
|
||||
" bar",
|
||||
].join("\n");
|
||||
assert_eq!(dedent(&x), y);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn dedent_tabbed_whitespace() {
|
||||
let x = [
|
||||
"\t\tfoo",
|
||||
"\t\t\tbar",
|
||||
].join("\n");
|
||||
let y = [
|
||||
"foo",
|
||||
"\tbar",
|
||||
].join("\n");
|
||||
assert_eq!(dedent(&x), y);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn dedent_mixed_tabbed_whitespace() {
|
||||
let x = [
|
||||
"\t \tfoo",
|
||||
"\t \t\tbar",
|
||||
].join("\n");
|
||||
let y = [
|
||||
"foo",
|
||||
"\tbar",
|
||||
].join("\n");
|
||||
assert_eq!(dedent(&x), y);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn dedent_mixed_tabbed_whitespace2() {
|
||||
let x = [
|
||||
"\t \tfoo",
|
||||
"\t \tbar",
|
||||
].join("\n");
|
||||
let y = [
|
||||
"\tfoo",
|
||||
" \tbar",
|
||||
].join("\n");
|
||||
assert_eq!(dedent(&x), y);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn dedent_preserve_no_terminating_newline() {
|
||||
let x = [
|
||||
" foo",
|
||||
" bar",
|
||||
].join("\n");
|
||||
let y = [
|
||||
"foo",
|
||||
" bar",
|
||||
].join("\n");
|
||||
assert_eq!(dedent(&x), y);
|
||||
}
|
||||
}
|
1847
vendor/textwrap/src/lib.rs
vendored
Normal file
1847
vendor/textwrap/src/lib.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
428
vendor/textwrap/src/word_separators.rs
vendored
Normal file
428
vendor/textwrap/src/word_separators.rs
vendored
Normal file
@ -0,0 +1,428 @@
|
||||
//! Functionality for finding words.
|
||||
//!
|
||||
//! In order to wrap text, we need to know where the legal break
|
||||
//! points are, i.e., where the words of the text are. This means that
|
||||
//! we need to define what a "word" is.
|
||||
//!
|
||||
//! A simple approach is to simply split the text on whitespace, but
|
||||
//! this does not work for East-Asian languages such as Chinese or
|
||||
//! Japanese where there are no spaces between words. Breaking a long
|
||||
//! sequence of emojis is another example where line breaks might be
|
||||
//! wanted even if there are no whitespace to be found.
|
||||
//!
|
||||
//! The [`WordSeparator`] trait is responsible for determining where
|
||||
//! there words are in a line of text. Please refer to the trait and
|
||||
//! the structs which implement it for more information.
|
||||
|
||||
#[cfg(feature = "unicode-linebreak")]
|
||||
use crate::core::skip_ansi_escape_sequence;
|
||||
use crate::core::Word;
|
||||
|
||||
/// Describes where words occur in a line of text.
|
||||
///
|
||||
/// The simplest approach is say that words are separated by one or
|
||||
/// more ASCII spaces (`' '`). This works for Western languages
|
||||
/// without emojis. A more complex approach is to use the Unicode line
|
||||
/// breaking algorithm, which finds break points in non-ASCII text.
|
||||
///
|
||||
/// The line breaks occur between words, please see
|
||||
/// [`WordSplitter`](crate::WordSplitter) for options of how to handle
|
||||
/// hyphenation of individual words.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::core::Word;
|
||||
/// use textwrap::WordSeparator::AsciiSpace;
|
||||
///
|
||||
/// let words = AsciiSpace.find_words("Hello World!").collect::<Vec<_>>();
|
||||
/// assert_eq!(words, vec![Word::from("Hello "), Word::from("World!")]);
|
||||
/// ```
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum WordSeparator {
|
||||
/// Find words by splitting on runs of `' '` characters.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::core::Word;
|
||||
/// use textwrap::WordSeparator::AsciiSpace;
|
||||
///
|
||||
/// let words = AsciiSpace.find_words("Hello World!").collect::<Vec<_>>();
|
||||
/// assert_eq!(words, vec![Word::from("Hello "),
|
||||
/// Word::from("World!")]);
|
||||
/// ```
|
||||
AsciiSpace,
|
||||
|
||||
/// Split `line` into words using Unicode break properties.
|
||||
///
|
||||
/// This word separator uses the Unicode line breaking algorithm
|
||||
/// described in [Unicode Standard Annex
|
||||
/// #14](https://www.unicode.org/reports/tr14/) to find legal places
|
||||
/// to break lines. There is a small difference in that the U+002D
|
||||
/// (Hyphen-Minus) and U+00AD (Soft Hyphen) don’t create a line break:
|
||||
/// to allow a line break at a hyphen, use
|
||||
/// [`WordSplitter::HyphenSplitter`](crate::WordSplitter::HyphenSplitter).
|
||||
/// Soft hyphens are not currently supported.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// Unlike [`WordSeparator::AsciiSpace`], the Unicode line
|
||||
/// breaking algorithm will find line break opportunities between
|
||||
/// some characters with no intervening whitespace:
|
||||
///
|
||||
/// ```
|
||||
/// #[cfg(feature = "unicode-linebreak")] {
|
||||
/// use textwrap::core::Word;
|
||||
/// use textwrap::WordSeparator::UnicodeBreakProperties;
|
||||
///
|
||||
/// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂😍").collect::<Vec<_>>(),
|
||||
/// vec![Word::from("Emojis: "),
|
||||
/// Word::from("😂"),
|
||||
/// Word::from("😍")]);
|
||||
///
|
||||
/// assert_eq!(UnicodeBreakProperties.find_words("CJK: 你好").collect::<Vec<_>>(),
|
||||
/// vec![Word::from("CJK: "),
|
||||
/// Word::from("你"),
|
||||
/// Word::from("好")]);
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// A U+2060 (Word Joiner) character can be inserted if you want to
|
||||
/// manually override the defaults and keep the characters together:
|
||||
///
|
||||
/// ```
|
||||
/// #[cfg(feature = "unicode-linebreak")] {
|
||||
/// use textwrap::core::Word;
|
||||
/// use textwrap::WordSeparator::UnicodeBreakProperties;
|
||||
///
|
||||
/// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂\u{2060}😍").collect::<Vec<_>>(),
|
||||
/// vec![Word::from("Emojis: "),
|
||||
/// Word::from("😂\u{2060}😍")]);
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// The Unicode line breaking algorithm will also automatically
|
||||
/// suppress break breaks around certain punctuation characters::
|
||||
///
|
||||
/// ```
|
||||
/// #[cfg(feature = "unicode-linebreak")] {
|
||||
/// use textwrap::core::Word;
|
||||
/// use textwrap::WordSeparator::UnicodeBreakProperties;
|
||||
///
|
||||
/// assert_eq!(UnicodeBreakProperties.find_words("[ foo ] bar !").collect::<Vec<_>>(),
|
||||
/// vec![Word::from("[ foo ] "),
|
||||
/// Word::from("bar !")]);
|
||||
/// }
|
||||
/// ```
|
||||
#[cfg(feature = "unicode-linebreak")]
|
||||
UnicodeBreakProperties,
|
||||
|
||||
/// Find words using a custom word separator
|
||||
Custom(fn(line: &str) -> Box<dyn Iterator<Item = Word<'_>> + '_>),
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for WordSeparator {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
WordSeparator::AsciiSpace => f.write_str("AsciiSpace"),
|
||||
#[cfg(feature = "unicode-linebreak")]
|
||||
WordSeparator::UnicodeBreakProperties => f.write_str("UnicodeBreakProperties"),
|
||||
WordSeparator::Custom(_) => f.write_str("Custom(...)"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl WordSeparator {
|
||||
// This function should really return impl Iterator<Item = Word>, but
|
||||
// this isn't possible until Rust supports higher-kinded types:
|
||||
// https://github.com/rust-lang/rfcs/blob/master/text/1522-conservative-impl-trait.md
|
||||
/// Find all words in `line`.
|
||||
pub fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
|
||||
match self {
|
||||
WordSeparator::AsciiSpace => find_words_ascii_space(line),
|
||||
#[cfg(feature = "unicode-linebreak")]
|
||||
WordSeparator::UnicodeBreakProperties => find_words_unicode_break_properties(line),
|
||||
WordSeparator::Custom(func) => func(line),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn find_words_ascii_space<'a>(line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
|
||||
let mut start = 0;
|
||||
let mut in_whitespace = false;
|
||||
let mut char_indices = line.char_indices();
|
||||
|
||||
Box::new(std::iter::from_fn(move || {
|
||||
// for (idx, ch) in char_indices does not work, gives this
|
||||
// error:
|
||||
//
|
||||
// > cannot move out of `char_indices`, a captured variable in
|
||||
// > an `FnMut` closure
|
||||
#[allow(clippy::while_let_on_iterator)]
|
||||
while let Some((idx, ch)) = char_indices.next() {
|
||||
if in_whitespace && ch != ' ' {
|
||||
let word = Word::from(&line[start..idx]);
|
||||
start = idx;
|
||||
in_whitespace = ch == ' ';
|
||||
return Some(word);
|
||||
}
|
||||
|
||||
in_whitespace = ch == ' ';
|
||||
}
|
||||
|
||||
if start < line.len() {
|
||||
let word = Word::from(&line[start..]);
|
||||
start = line.len();
|
||||
return Some(word);
|
||||
}
|
||||
|
||||
None
|
||||
}))
|
||||
}
|
||||
|
||||
// Strip all ANSI escape sequences from `text`.
|
||||
#[cfg(feature = "unicode-linebreak")]
|
||||
fn strip_ansi_escape_sequences(text: &str) -> String {
|
||||
let mut result = String::with_capacity(text.len());
|
||||
|
||||
let mut chars = text.chars();
|
||||
while let Some(ch) = chars.next() {
|
||||
if skip_ansi_escape_sequence(ch, &mut chars) {
|
||||
continue;
|
||||
}
|
||||
result.push(ch);
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Soft hyphen, also knows as a “shy hyphen”. Should show up as ‘-’
|
||||
/// if a line is broken at this point, and otherwise be invisible.
|
||||
/// Textwrap does not currently support breaking words at soft
|
||||
/// hyphens.
|
||||
#[cfg(feature = "unicode-linebreak")]
|
||||
const SHY: char = '\u{00ad}';
|
||||
|
||||
/// Find words in line. ANSI escape sequences are ignored in `line`.
|
||||
#[cfg(feature = "unicode-linebreak")]
|
||||
fn find_words_unicode_break_properties<'a>(
|
||||
line: &'a str,
|
||||
) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
|
||||
// Construct an iterator over (original index, stripped index)
|
||||
// tuples. We find the Unicode linebreaks on a stripped string,
|
||||
// but we need the original indices so we can form words based on
|
||||
// the original string.
|
||||
let mut last_stripped_idx = 0;
|
||||
let mut char_indices = line.char_indices();
|
||||
let mut idx_map = std::iter::from_fn(move || match char_indices.next() {
|
||||
Some((orig_idx, ch)) => {
|
||||
let stripped_idx = last_stripped_idx;
|
||||
if !skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) {
|
||||
last_stripped_idx += ch.len_utf8();
|
||||
}
|
||||
Some((orig_idx, stripped_idx))
|
||||
}
|
||||
None => None,
|
||||
});
|
||||
|
||||
let stripped = strip_ansi_escape_sequences(line);
|
||||
let mut opportunities = unicode_linebreak::linebreaks(&stripped)
|
||||
.filter(|(idx, _)| {
|
||||
#[allow(clippy::match_like_matches_macro)]
|
||||
match &stripped[..*idx].chars().next_back() {
|
||||
// We suppress breaks at ‘-’ since we want to control
|
||||
// this via the WordSplitter.
|
||||
Some('-') => false,
|
||||
// Soft hyphens are currently not supported since we
|
||||
// require all `Word` fragments to be continuous in
|
||||
// the input string.
|
||||
Some(SHY) => false,
|
||||
// Other breaks should be fine!
|
||||
_ => true,
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.into_iter();
|
||||
|
||||
// Remove final break opportunity, we will add it below using
|
||||
// &line[start..]; This ensures that we correctly include a
|
||||
// trailing ANSI escape sequence.
|
||||
opportunities.next_back();
|
||||
|
||||
let mut start = 0;
|
||||
Box::new(std::iter::from_fn(move || {
|
||||
#[allow(clippy::while_let_on_iterator)]
|
||||
while let Some((idx, _)) = opportunities.next() {
|
||||
if let Some((orig_idx, _)) = idx_map.find(|&(_, stripped_idx)| stripped_idx == idx) {
|
||||
let word = Word::from(&line[start..orig_idx]);
|
||||
start = orig_idx;
|
||||
return Some(word);
|
||||
}
|
||||
}
|
||||
|
||||
if start < line.len() {
|
||||
let word = Word::from(&line[start..]);
|
||||
start = line.len();
|
||||
return Some(word);
|
||||
}
|
||||
|
||||
None
|
||||
}))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::WordSeparator::*;
|
||||
use super::*;
|
||||
|
||||
// Like assert_eq!, but the left expression is an iterator.
|
||||
macro_rules! assert_iter_eq {
|
||||
($left:expr, $right:expr) => {
|
||||
assert_eq!($left.collect::<Vec<_>>(), $right);
|
||||
};
|
||||
}
|
||||
|
||||
fn to_words<'a>(words: Vec<&'a str>) -> Vec<Word<'a>> {
|
||||
words.into_iter().map(|w: &str| Word::from(&w)).collect()
|
||||
}
|
||||
|
||||
macro_rules! test_find_words {
|
||||
($ascii_name:ident,
|
||||
$unicode_name:ident,
|
||||
$([ $line:expr, $ascii_words:expr, $unicode_words:expr ]),+) => {
|
||||
#[test]
|
||||
fn $ascii_name() {
|
||||
$(
|
||||
let expected_words = to_words($ascii_words.to_vec());
|
||||
let actual_words = WordSeparator::AsciiSpace
|
||||
.find_words($line)
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(actual_words, expected_words, "Line: {:?}", $line);
|
||||
)+
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "unicode-linebreak")]
|
||||
fn $unicode_name() {
|
||||
$(
|
||||
let expected_words = to_words($unicode_words.to_vec());
|
||||
let actual_words = WordSeparator::UnicodeBreakProperties
|
||||
.find_words($line)
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(actual_words, expected_words, "Line: {:?}", $line);
|
||||
)+
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_find_words!(ascii_space_empty, unicode_empty, ["", [], []]);
|
||||
|
||||
test_find_words!(
|
||||
ascii_single_word,
|
||||
unicode_single_word,
|
||||
["foo", ["foo"], ["foo"]]
|
||||
);
|
||||
|
||||
test_find_words!(
|
||||
ascii_two_words,
|
||||
unicode_two_words,
|
||||
["foo bar", ["foo ", "bar"], ["foo ", "bar"]]
|
||||
);
|
||||
|
||||
test_find_words!(
|
||||
ascii_multiple_words,
|
||||
unicode_multiple_words,
|
||||
["foo bar", ["foo ", "bar"], ["foo ", "bar"]],
|
||||
["x y z", ["x ", "y ", "z"], ["x ", "y ", "z"]]
|
||||
);
|
||||
|
||||
test_find_words!(
|
||||
ascii_only_whitespace,
|
||||
unicode_only_whitespace,
|
||||
[" ", [" "], [" "]],
|
||||
[" ", [" "], [" "]]
|
||||
);
|
||||
|
||||
test_find_words!(
|
||||
ascii_inter_word_whitespace,
|
||||
unicode_inter_word_whitespace,
|
||||
["foo bar", ["foo ", "bar"], ["foo ", "bar"]]
|
||||
);
|
||||
|
||||
test_find_words!(
|
||||
ascii_trailing_whitespace,
|
||||
unicode_trailing_whitespace,
|
||||
["foo ", ["foo "], ["foo "]]
|
||||
);
|
||||
|
||||
test_find_words!(
|
||||
ascii_leading_whitespace,
|
||||
unicode_leading_whitespace,
|
||||
[" foo", [" ", "foo"], [" ", "foo"]]
|
||||
);
|
||||
|
||||
test_find_words!(
|
||||
ascii_multi_column_char,
|
||||
unicode_multi_column_char,
|
||||
["\u{1f920}", ["\u{1f920}"], ["\u{1f920}"]] // cowboy emoji 🤠
|
||||
);
|
||||
|
||||
test_find_words!(
|
||||
ascii_hyphens,
|
||||
unicode_hyphens,
|
||||
["foo-bar", ["foo-bar"], ["foo-bar"]],
|
||||
["foo- bar", ["foo- ", "bar"], ["foo- ", "bar"]],
|
||||
["foo - bar", ["foo ", "- ", "bar"], ["foo ", "- ", "bar"]],
|
||||
["foo -bar", ["foo ", "-bar"], ["foo ", "-bar"]]
|
||||
);
|
||||
|
||||
test_find_words!(
|
||||
ascii_newline,
|
||||
unicode_newline,
|
||||
["foo\nbar", ["foo\nbar"], ["foo\n", "bar"]]
|
||||
);
|
||||
|
||||
test_find_words!(
|
||||
ascii_tab,
|
||||
unicode_tab,
|
||||
["foo\tbar", ["foo\tbar"], ["foo\t", "bar"]]
|
||||
);
|
||||
|
||||
test_find_words!(
|
||||
ascii_non_breaking_space,
|
||||
unicode_non_breaking_space,
|
||||
["foo\u{00A0}bar", ["foo\u{00A0}bar"], ["foo\u{00A0}bar"]]
|
||||
);
|
||||
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
fn find_words_colored_text() {
|
||||
use termion::color::{Blue, Fg, Green, Reset};
|
||||
|
||||
let green_hello = format!("{}Hello{} ", Fg(Green), Fg(Reset));
|
||||
let blue_world = format!("{}World!{}", Fg(Blue), Fg(Reset));
|
||||
assert_iter_eq!(
|
||||
AsciiSpace.find_words(&format!("{}{}", green_hello, blue_world)),
|
||||
vec![Word::from(&green_hello), Word::from(&blue_world)]
|
||||
);
|
||||
|
||||
#[cfg(feature = "unicode-linebreak")]
|
||||
assert_iter_eq!(
|
||||
UnicodeBreakProperties.find_words(&format!("{}{}", green_hello, blue_world)),
|
||||
vec![Word::from(&green_hello), Word::from(&blue_world)]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn find_words_color_inside_word() {
|
||||
let text = "foo\u{1b}[0m\u{1b}[32mbar\u{1b}[0mbaz";
|
||||
assert_iter_eq!(AsciiSpace.find_words(&text), vec![Word::from(text)]);
|
||||
|
||||
#[cfg(feature = "unicode-linebreak")]
|
||||
assert_iter_eq!(
|
||||
UnicodeBreakProperties.find_words(&text),
|
||||
vec![Word::from(text)]
|
||||
);
|
||||
}
|
||||
}
|
314
vendor/textwrap/src/word_splitters.rs
vendored
Normal file
314
vendor/textwrap/src/word_splitters.rs
vendored
Normal file
@ -0,0 +1,314 @@
|
||||
//! Word splitting functionality.
|
||||
//!
|
||||
//! To wrap text into lines, long words sometimes need to be split
|
||||
//! across lines. The [`WordSplitter`] enum defines this
|
||||
//! functionality.
|
||||
|
||||
use crate::core::{display_width, Word};
|
||||
|
||||
/// The `WordSplitter` enum describes where words can be split.
|
||||
///
|
||||
/// If the textwrap crate has been compiled with the `hyphenation`
|
||||
/// Cargo feature enabled, you will find a
|
||||
/// [`WordSplitter::Hyphenation`] variant. Use this struct for
|
||||
/// language-aware hyphenation:
|
||||
///
|
||||
/// ```
|
||||
/// #[cfg(feature = "hyphenation")] {
|
||||
/// use hyphenation::{Language, Load, Standard};
|
||||
/// use textwrap::{wrap, Options, WordSplitter};
|
||||
///
|
||||
/// let text = "Oxidation is the loss of electrons.";
|
||||
/// let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
|
||||
/// let options = Options::new(8).word_splitter(WordSplitter::Hyphenation(dictionary));
|
||||
/// assert_eq!(wrap(text, &options), vec!["Oxida-",
|
||||
/// "tion is",
|
||||
/// "the loss",
|
||||
/// "of elec-",
|
||||
/// "trons."]);
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// Please see the documentation for the [hyphenation] crate for more
|
||||
/// details.
|
||||
///
|
||||
/// [hyphenation]: https://docs.rs/hyphenation/
|
||||
#[derive(Clone)]
|
||||
pub enum WordSplitter {
|
||||
/// Use this as a [`Options.word_splitter`] to avoid any kind of
|
||||
/// hyphenation:
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::{wrap, Options, WordSplitter};
|
||||
///
|
||||
/// let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation);
|
||||
/// assert_eq!(wrap("foo bar-baz", &options),
|
||||
/// vec!["foo", "bar-baz"]);
|
||||
/// ```
|
||||
///
|
||||
/// [`Options.word_splitter`]: super::Options::word_splitter
|
||||
NoHyphenation,
|
||||
|
||||
/// `HyphenSplitter` is the default `WordSplitter` used by
|
||||
/// [`Options::new`](super::Options::new). It will split words on
|
||||
/// existing hyphens in the word.
|
||||
///
|
||||
/// It will only use hyphens that are surrounded by alphanumeric
|
||||
/// characters, which prevents a word like `"--foo-bar"` from
|
||||
/// being split into `"--"` and `"foo-bar"`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::WordSplitter;
|
||||
///
|
||||
/// assert_eq!(WordSplitter::HyphenSplitter.split_points("--foo-bar"),
|
||||
/// vec![6]);
|
||||
/// ```
|
||||
HyphenSplitter,
|
||||
|
||||
/// Use a custom function as the word splitter.
|
||||
///
|
||||
/// This varian lets you implement a custom word splitter using
|
||||
/// your own function.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::WordSplitter;
|
||||
///
|
||||
/// fn split_at_underscore(word: &str) -> Vec<usize> {
|
||||
/// word.match_indices('_').map(|(idx, _)| idx + 1).collect()
|
||||
/// }
|
||||
///
|
||||
/// let word_splitter = WordSplitter::Custom(split_at_underscore);
|
||||
/// assert_eq!(word_splitter.split_points("a_long_identifier"),
|
||||
/// vec![2, 7]);
|
||||
/// ```
|
||||
Custom(fn(word: &str) -> Vec<usize>),
|
||||
|
||||
/// A hyphenation dictionary can be used to do language-specific
|
||||
/// hyphenation using patterns from the [hyphenation] crate.
|
||||
///
|
||||
/// **Note:** Only available when the `hyphenation` Cargo feature is
|
||||
/// enabled.
|
||||
///
|
||||
/// [hyphenation]: https://docs.rs/hyphenation/
|
||||
#[cfg(feature = "hyphenation")]
|
||||
Hyphenation(hyphenation::Standard),
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for WordSplitter {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
WordSplitter::NoHyphenation => f.write_str("NoHyphenation"),
|
||||
WordSplitter::HyphenSplitter => f.write_str("HyphenSplitter"),
|
||||
WordSplitter::Custom(_) => f.write_str("Custom(...)"),
|
||||
#[cfg(feature = "hyphenation")]
|
||||
WordSplitter::Hyphenation(dict) => write!(f, "Hyphenation({})", dict.language()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq<WordSplitter> for WordSplitter {
|
||||
fn eq(&self, other: &WordSplitter) -> bool {
|
||||
match (self, other) {
|
||||
(WordSplitter::NoHyphenation, WordSplitter::NoHyphenation) => true,
|
||||
(WordSplitter::HyphenSplitter, WordSplitter::HyphenSplitter) => true,
|
||||
#[cfg(feature = "hyphenation")]
|
||||
(WordSplitter::Hyphenation(this_dict), WordSplitter::Hyphenation(other_dict)) => {
|
||||
this_dict.language() == other_dict.language()
|
||||
}
|
||||
(_, _) => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl WordSplitter {
|
||||
/// Return all possible indices where `word` can be split.
|
||||
///
|
||||
/// The indices are in the range `0..word.len()`. They point to
|
||||
/// the index _after_ the split point, i.e., after `-` if
|
||||
/// splitting on hyphens. This way, `word.split_at(idx)` will
|
||||
/// break the word into two well-formed pieces.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::WordSplitter;
|
||||
/// assert_eq!(WordSplitter::NoHyphenation.split_points("cannot-be-split"), vec![]);
|
||||
/// assert_eq!(WordSplitter::HyphenSplitter.split_points("can-be-split"), vec![4, 7]);
|
||||
/// assert_eq!(WordSplitter::Custom(|word| vec![word.len()/2]).split_points("middle"), vec![3]);
|
||||
/// ```
|
||||
pub fn split_points(&self, word: &str) -> Vec<usize> {
|
||||
match self {
|
||||
WordSplitter::NoHyphenation => Vec::new(),
|
||||
WordSplitter::HyphenSplitter => {
|
||||
let mut splits = Vec::new();
|
||||
|
||||
for (idx, _) in word.match_indices('-') {
|
||||
// We only use hyphens that are surrounded by alphanumeric
|
||||
// characters. This is to avoid splitting on repeated hyphens,
|
||||
// such as those found in --foo-bar.
|
||||
let prev = word[..idx].chars().next_back();
|
||||
let next = word[idx + 1..].chars().next();
|
||||
|
||||
if prev.filter(|ch| ch.is_alphanumeric()).is_some()
|
||||
&& next.filter(|ch| ch.is_alphanumeric()).is_some()
|
||||
{
|
||||
splits.push(idx + 1); // +1 due to width of '-'.
|
||||
}
|
||||
}
|
||||
|
||||
splits
|
||||
}
|
||||
WordSplitter::Custom(splitter_func) => splitter_func(word),
|
||||
#[cfg(feature = "hyphenation")]
|
||||
WordSplitter::Hyphenation(dictionary) => {
|
||||
use hyphenation::Hyphenator;
|
||||
dictionary.hyphenate(word).breaks
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Split words into smaller words according to the split points given
|
||||
/// by `word_splitter`.
|
||||
///
|
||||
/// Note that we split all words, regardless of their length. This is
|
||||
/// to more cleanly separate the business of splitting (including
|
||||
/// automatic hyphenation) from the business of word wrapping.
|
||||
pub fn split_words<'a, I>(
|
||||
words: I,
|
||||
word_splitter: &'a WordSplitter,
|
||||
) -> impl Iterator<Item = Word<'a>>
|
||||
where
|
||||
I: IntoIterator<Item = Word<'a>>,
|
||||
{
|
||||
words.into_iter().flat_map(move |word| {
|
||||
let mut prev = 0;
|
||||
let mut split_points = word_splitter.split_points(&word).into_iter();
|
||||
std::iter::from_fn(move || {
|
||||
if let Some(idx) = split_points.next() {
|
||||
let need_hyphen = !word[..idx].ends_with('-');
|
||||
let w = Word {
|
||||
word: &word.word[prev..idx],
|
||||
width: display_width(&word[prev..idx]),
|
||||
whitespace: "",
|
||||
penalty: if need_hyphen { "-" } else { "" },
|
||||
};
|
||||
prev = idx;
|
||||
return Some(w);
|
||||
}
|
||||
|
||||
if prev < word.word.len() || prev == 0 {
|
||||
let w = Word {
|
||||
word: &word.word[prev..],
|
||||
width: display_width(&word[prev..]),
|
||||
whitespace: word.whitespace,
|
||||
penalty: word.penalty,
|
||||
};
|
||||
prev = word.word.len() + 1;
|
||||
return Some(w);
|
||||
}
|
||||
|
||||
None
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// Like assert_eq!, but the left expression is an iterator.
|
||||
macro_rules! assert_iter_eq {
|
||||
($left:expr, $right:expr) => {
|
||||
assert_eq!($left.collect::<Vec<_>>(), $right);
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_words_no_words() {
|
||||
assert_iter_eq!(split_words(vec![], &WordSplitter::HyphenSplitter), vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_words_empty_word() {
|
||||
assert_iter_eq!(
|
||||
split_words(vec![Word::from(" ")], &WordSplitter::HyphenSplitter),
|
||||
vec![Word::from(" ")]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_words_single_word() {
|
||||
assert_iter_eq!(
|
||||
split_words(vec![Word::from("foobar")], &WordSplitter::HyphenSplitter),
|
||||
vec![Word::from("foobar")]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_words_hyphen_splitter() {
|
||||
assert_iter_eq!(
|
||||
split_words(vec![Word::from("foo-bar")], &WordSplitter::HyphenSplitter),
|
||||
vec![Word::from("foo-"), Word::from("bar")]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_words_no_hyphenation() {
|
||||
assert_iter_eq!(
|
||||
split_words(vec![Word::from("foo-bar")], &WordSplitter::NoHyphenation),
|
||||
vec![Word::from("foo-bar")]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_words_adds_penalty() {
|
||||
let fixed_split_point = |_: &str| vec![3];
|
||||
|
||||
assert_iter_eq!(
|
||||
split_words(
|
||||
vec![Word::from("foobar")].into_iter(),
|
||||
&WordSplitter::Custom(fixed_split_point)
|
||||
),
|
||||
vec![
|
||||
Word {
|
||||
word: "foo",
|
||||
width: 3,
|
||||
whitespace: "",
|
||||
penalty: "-"
|
||||
},
|
||||
Word {
|
||||
word: "bar",
|
||||
width: 3,
|
||||
whitespace: "",
|
||||
penalty: ""
|
||||
}
|
||||
]
|
||||
);
|
||||
|
||||
assert_iter_eq!(
|
||||
split_words(
|
||||
vec![Word::from("fo-bar")].into_iter(),
|
||||
&WordSplitter::Custom(fixed_split_point)
|
||||
),
|
||||
vec![
|
||||
Word {
|
||||
word: "fo-",
|
||||
width: 3,
|
||||
whitespace: "",
|
||||
penalty: ""
|
||||
},
|
||||
Word {
|
||||
word: "bar",
|
||||
width: 3,
|
||||
whitespace: "",
|
||||
penalty: ""
|
||||
}
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
381
vendor/textwrap/src/wrap_algorithms.rs
vendored
Normal file
381
vendor/textwrap/src/wrap_algorithms.rs
vendored
Normal file
@ -0,0 +1,381 @@
|
||||
//! Word wrapping algorithms.
|
||||
//!
|
||||
//! After a text has been broken into words (or [`Fragment`]s), one
|
||||
//! now has to decide how to break the fragments into lines. The
|
||||
//! simplest algorithm for this is implemented by [`wrap_first_fit`]:
|
||||
//! it uses no look-ahead and simply adds fragments to the line as
|
||||
//! long as they fit. However, this can lead to poor line breaks if a
|
||||
//! large fragment almost-but-not-quite fits on a line. When that
|
||||
//! happens, the fragment is moved to the next line and it will leave
|
||||
//! behind a large gap. A more advanced algorithm, implemented by
|
||||
//! [`wrap_optimal_fit`], will take this into account. The optimal-fit
|
||||
//! algorithm considers all possible line breaks and will attempt to
|
||||
//! minimize the gaps left behind by overly short lines.
|
||||
//!
|
||||
//! While both algorithms run in linear time, the first-fit algorithm
|
||||
//! is about 4 times faster than the optimal-fit algorithm.
|
||||
|
||||
#[cfg(feature = "smawk")]
|
||||
mod optimal_fit;
|
||||
#[cfg(feature = "smawk")]
|
||||
pub use optimal_fit::{wrap_optimal_fit, OverflowError, Penalties};
|
||||
|
||||
use crate::core::{Fragment, Word};
|
||||
|
||||
/// Describes how to wrap words into lines.
|
||||
///
|
||||
/// The simplest approach is to wrap words one word at a time and
|
||||
/// accept the first way of wrapping which fit
|
||||
/// ([`WrapAlgorithm::FirstFit`]). If the `smawk` Cargo feature is
|
||||
/// enabled, a more complex algorithm is available which will look at
|
||||
/// an entire paragraph at a time in order to find optimal line breaks
|
||||
/// ([`WrapAlgorithm::OptimalFit`]).
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum WrapAlgorithm {
|
||||
/// Wrap words using a fast and simple algorithm.
|
||||
///
|
||||
/// This algorithm uses no look-ahead when finding line breaks.
|
||||
/// Implemented by [`wrap_first_fit`], please see that function for
|
||||
/// details and examples.
|
||||
FirstFit,
|
||||
|
||||
/// Wrap words using an advanced algorithm with look-ahead.
|
||||
///
|
||||
/// This wrapping algorithm considers the entire paragraph to find
|
||||
/// optimal line breaks. When wrapping text, "penalties" are
|
||||
/// assigned to line breaks based on the gaps left at the end of
|
||||
/// lines. See [`Penalties`] for details.
|
||||
///
|
||||
/// The underlying wrapping algorithm is implemented by
|
||||
/// [`wrap_optimal_fit`], please see that function for examples.
|
||||
///
|
||||
/// **Note:** Only available when the `smawk` Cargo feature is
|
||||
/// enabled.
|
||||
#[cfg(feature = "smawk")]
|
||||
OptimalFit(Penalties),
|
||||
|
||||
/// Custom wrapping function.
|
||||
///
|
||||
/// Use this if you want to implement your own wrapping algorithm.
|
||||
/// The function can freely decide how to turn a slice of
|
||||
/// [`Word`]s into lines.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::core::Word;
|
||||
/// use textwrap::{wrap, Options, WrapAlgorithm};
|
||||
///
|
||||
/// fn stair<'a, 'b>(words: &'b [Word<'a>], _: &'b [usize]) -> Vec<&'b [Word<'a>]> {
|
||||
/// let mut lines = Vec::new();
|
||||
/// let mut step = 1;
|
||||
/// let mut start_idx = 0;
|
||||
/// while start_idx + step <= words.len() {
|
||||
/// lines.push(&words[start_idx .. start_idx+step]);
|
||||
/// start_idx += step;
|
||||
/// step += 1;
|
||||
/// }
|
||||
/// lines
|
||||
/// }
|
||||
///
|
||||
/// let options = Options::new(10).wrap_algorithm(WrapAlgorithm::Custom(stair));
|
||||
/// assert_eq!(wrap("First, second, third, fourth, fifth, sixth", options),
|
||||
/// vec!["First,",
|
||||
/// "second, third,",
|
||||
/// "fourth, fifth, sixth"]);
|
||||
/// ```
|
||||
Custom(for<'a, 'b> fn(words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>),
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for WrapAlgorithm {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
WrapAlgorithm::FirstFit => f.write_str("FirstFit"),
|
||||
#[cfg(feature = "smawk")]
|
||||
WrapAlgorithm::OptimalFit(penalties) => write!(f, "OptimalFit({:?})", penalties),
|
||||
WrapAlgorithm::Custom(_) => f.write_str("Custom(...)"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl WrapAlgorithm {
|
||||
/// Create new wrap algorithm.
|
||||
///
|
||||
/// The best wrapping algorithm is used by default, i.e.,
|
||||
/// [`WrapAlgorithm::OptimalFit`] if available, otherwise
|
||||
/// [`WrapAlgorithm::FirstFit`].
|
||||
pub const fn new() -> Self {
|
||||
#[cfg(not(feature = "smawk"))]
|
||||
{
|
||||
WrapAlgorithm::FirstFit
|
||||
}
|
||||
|
||||
#[cfg(feature = "smawk")]
|
||||
{
|
||||
WrapAlgorithm::new_optimal_fit()
|
||||
}
|
||||
}
|
||||
|
||||
/// New [`WrapAlgorithm::OptimalFit`] with default penalties. This
|
||||
/// works well for monospace text.
|
||||
///
|
||||
/// **Note:** Only available when the `smawk` Cargo feature is
|
||||
/// enabled.
|
||||
#[cfg(feature = "smawk")]
|
||||
pub const fn new_optimal_fit() -> Self {
|
||||
WrapAlgorithm::OptimalFit(Penalties::new())
|
||||
}
|
||||
|
||||
/// Wrap words according to line widths.
|
||||
///
|
||||
/// The `line_widths` slice gives the target line width for each
|
||||
/// line (the last slice element is repeated as necessary). This
|
||||
/// can be used to implement hanging indentation.
|
||||
#[inline]
|
||||
pub fn wrap<'a, 'b>(
|
||||
&self,
|
||||
words: &'b [Word<'a>],
|
||||
line_widths: &'b [usize],
|
||||
) -> Vec<&'b [Word<'a>]> {
|
||||
// Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53
|
||||
// = 9_007_199_254_740_992 can be represented without loss by
|
||||
// a f64. Larger line widths will be rounded to the nearest
|
||||
// representable number.
|
||||
let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>();
|
||||
|
||||
match self {
|
||||
WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths),
|
||||
|
||||
#[cfg(feature = "smawk")]
|
||||
WrapAlgorithm::OptimalFit(penalties) => {
|
||||
// The computation cannnot overflow when the line
|
||||
// widths are restricted to usize.
|
||||
wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap()
|
||||
}
|
||||
|
||||
WrapAlgorithm::Custom(func) => func(words, line_widths),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for WrapAlgorithm {
|
||||
fn default() -> Self {
|
||||
WrapAlgorithm::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrap abstract fragments into lines with a first-fit algorithm.
|
||||
///
|
||||
/// The `line_widths` slice gives the target line width for each line
|
||||
/// (the last slice element is repeated as necessary). This can be
|
||||
/// used to implement hanging indentation.
|
||||
///
|
||||
/// The fragments must already have been split into the desired
|
||||
/// widths, this function will not (and cannot) attempt to split them
|
||||
/// further when arranging them into lines.
|
||||
///
|
||||
/// # First-Fit Algorithm
|
||||
///
|
||||
/// This implements a simple “greedy” algorithm: accumulate fragments
|
||||
/// one by one and when a fragment no longer fits, start a new line.
|
||||
/// There is no look-ahead, we simply take first fit of the fragments
|
||||
/// we find.
|
||||
///
|
||||
/// While fast and predictable, this algorithm can produce poor line
|
||||
/// breaks when a long fragment is moved to a new line, leaving behind
|
||||
/// a large gap:
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::core::Word;
|
||||
/// use textwrap::wrap_algorithms::wrap_first_fit;
|
||||
/// use textwrap::WordSeparator;
|
||||
///
|
||||
/// // Helper to convert wrapped lines to a Vec<String>.
|
||||
/// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> {
|
||||
/// lines.iter().map(|line| {
|
||||
/// line.iter().map(|word| &**word).collect::<Vec<_>>().join(" ")
|
||||
/// }).collect::<Vec<_>>()
|
||||
/// }
|
||||
///
|
||||
/// let text = "These few words will unfortunately not wrap nicely.";
|
||||
/// let words = WordSeparator::AsciiSpace.find_words(text).collect::<Vec<_>>();
|
||||
/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])),
|
||||
/// vec!["These few words",
|
||||
/// "will", // <-- short line
|
||||
/// "unfortunately",
|
||||
/// "not wrap",
|
||||
/// "nicely."]);
|
||||
///
|
||||
/// // We can avoid the short line if we look ahead:
|
||||
/// #[cfg(feature = "smawk")]
|
||||
/// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
|
||||
/// #[cfg(feature = "smawk")]
|
||||
/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()),
|
||||
/// vec!["These few",
|
||||
/// "words will",
|
||||
/// "unfortunately",
|
||||
/// "not wrap",
|
||||
/// "nicely."]);
|
||||
/// ```
|
||||
///
|
||||
/// The [`wrap_optimal_fit`] function was used above to get better
|
||||
/// line breaks. It uses an advanced algorithm which tries to avoid
|
||||
/// short lines. This function is about 4 times faster than
|
||||
/// [`wrap_optimal_fit`].
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// Imagine you're building a house site and you have a number of
|
||||
/// tasks you need to execute. Things like pour foundation, complete
|
||||
/// framing, install plumbing, electric cabling, install insulation.
|
||||
///
|
||||
/// The construction workers can only work during daytime, so they
|
||||
/// need to pack up everything at night. Because they need to secure
|
||||
/// their tools and move machines back to the garage, this process
|
||||
/// takes much more time than the time it would take them to simply
|
||||
/// switch to another task.
|
||||
///
|
||||
/// You would like to make a list of tasks to execute every day based
|
||||
/// on your estimates. You can model this with a program like this:
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::core::{Fragment, Word};
|
||||
/// use textwrap::wrap_algorithms::wrap_first_fit;
|
||||
///
|
||||
/// #[derive(Debug)]
|
||||
/// struct Task<'a> {
|
||||
/// name: &'a str,
|
||||
/// hours: f64, // Time needed to complete task.
|
||||
/// sweep: f64, // Time needed for a quick sweep after task during the day.
|
||||
/// cleanup: f64, // Time needed for full cleanup if day ends with this task.
|
||||
/// }
|
||||
///
|
||||
/// impl Fragment for Task<'_> {
|
||||
/// fn width(&self) -> f64 { self.hours }
|
||||
/// fn whitespace_width(&self) -> f64 { self.sweep }
|
||||
/// fn penalty_width(&self) -> f64 { self.cleanup }
|
||||
/// }
|
||||
///
|
||||
/// // The morning tasks
|
||||
/// let tasks = vec![
|
||||
/// Task { name: "Foundation", hours: 4.0, sweep: 2.0, cleanup: 3.0 },
|
||||
/// Task { name: "Framing", hours: 3.0, sweep: 1.0, cleanup: 2.0 },
|
||||
/// Task { name: "Plumbing", hours: 2.0, sweep: 2.0, cleanup: 2.0 },
|
||||
/// Task { name: "Electrical", hours: 2.0, sweep: 1.0, cleanup: 2.0 },
|
||||
/// Task { name: "Insulation", hours: 2.0, sweep: 1.0, cleanup: 2.0 },
|
||||
/// Task { name: "Drywall", hours: 3.0, sweep: 1.0, cleanup: 2.0 },
|
||||
/// Task { name: "Floors", hours: 3.0, sweep: 1.0, cleanup: 2.0 },
|
||||
/// Task { name: "Countertops", hours: 1.0, sweep: 1.0, cleanup: 2.0 },
|
||||
/// Task { name: "Bathrooms", hours: 2.0, sweep: 1.0, cleanup: 2.0 },
|
||||
/// ];
|
||||
///
|
||||
/// // Fill tasks into days, taking `day_length` into account. The
|
||||
/// // output shows the hours worked per day along with the names of
|
||||
/// // the tasks for that day.
|
||||
/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: f64) -> Vec<(f64, Vec<&'a str>)> {
|
||||
/// let mut days = Vec::new();
|
||||
/// // Assign tasks to days. The assignment is a vector of slices,
|
||||
/// // with a slice per day.
|
||||
/// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]);
|
||||
/// for day in assigned_days.iter() {
|
||||
/// let last = day.last().unwrap();
|
||||
/// let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum();
|
||||
/// let names = day.iter().map(|t| t.name).collect::<Vec<_>>();
|
||||
/// days.push((work_hours - last.sweep + last.cleanup, names));
|
||||
/// }
|
||||
/// days
|
||||
/// }
|
||||
///
|
||||
/// // With a single crew working 8 hours a day:
|
||||
/// assert_eq!(
|
||||
/// assign_days(&tasks, 8.0),
|
||||
/// [
|
||||
/// (7.0, vec!["Foundation"]),
|
||||
/// (8.0, vec!["Framing", "Plumbing"]),
|
||||
/// (7.0, vec!["Electrical", "Insulation"]),
|
||||
/// (5.0, vec!["Drywall"]),
|
||||
/// (7.0, vec!["Floors", "Countertops"]),
|
||||
/// (4.0, vec!["Bathrooms"]),
|
||||
/// ]
|
||||
/// );
|
||||
///
|
||||
/// // With two crews working in shifts, 16 hours a day:
|
||||
/// assert_eq!(
|
||||
/// assign_days(&tasks, 16.0),
|
||||
/// [
|
||||
/// (14.0, vec!["Foundation", "Framing", "Plumbing"]),
|
||||
/// (15.0, vec!["Electrical", "Insulation", "Drywall", "Floors"]),
|
||||
/// (6.0, vec!["Countertops", "Bathrooms"]),
|
||||
/// ]
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// Apologies to anyone who actually knows how to build a house and
|
||||
/// knows how long each step takes :-)
|
||||
pub fn wrap_first_fit<'a, 'b, T: Fragment>(
|
||||
fragments: &'a [T],
|
||||
line_widths: &'b [f64],
|
||||
) -> Vec<&'a [T]> {
|
||||
// The final line width is used for all remaining lines.
|
||||
let default_line_width = line_widths.last().copied().unwrap_or(0.0);
|
||||
let mut lines = Vec::new();
|
||||
let mut start = 0;
|
||||
let mut width = 0.0;
|
||||
|
||||
for (idx, fragment) in fragments.iter().enumerate() {
|
||||
let line_width = line_widths
|
||||
.get(lines.len())
|
||||
.copied()
|
||||
.unwrap_or(default_line_width);
|
||||
if width + fragment.width() + fragment.penalty_width() > line_width && idx > start {
|
||||
lines.push(&fragments[start..idx]);
|
||||
start = idx;
|
||||
width = 0.0;
|
||||
}
|
||||
width += fragment.width() + fragment.whitespace_width();
|
||||
}
|
||||
lines.push(&fragments[start..]);
|
||||
lines
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
struct Word(f64);
|
||||
|
||||
#[rustfmt::skip]
|
||||
impl Fragment for Word {
|
||||
fn width(&self) -> f64 { self.0 }
|
||||
fn whitespace_width(&self) -> f64 { 1.0 }
|
||||
fn penalty_width(&self) -> f64 { 0.0 }
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrap_string_longer_than_f64() {
|
||||
let words = vec![
|
||||
Word(1e307),
|
||||
Word(2e307),
|
||||
Word(3e307),
|
||||
Word(4e307),
|
||||
Word(5e307),
|
||||
Word(6e307),
|
||||
];
|
||||
// Wrap at just under f64::MAX (~19e307). The tiny
|
||||
// whitespace_widths disappear because of loss of precision.
|
||||
assert_eq!(
|
||||
wrap_first_fit(&words, &[15e307]),
|
||||
&[
|
||||
vec![
|
||||
Word(1e307),
|
||||
Word(2e307),
|
||||
Word(3e307),
|
||||
Word(4e307),
|
||||
Word(5e307)
|
||||
],
|
||||
vec![Word(6e307)]
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
433
vendor/textwrap/src/wrap_algorithms/optimal_fit.rs
vendored
Normal file
433
vendor/textwrap/src/wrap_algorithms/optimal_fit.rs
vendored
Normal file
@ -0,0 +1,433 @@
|
||||
use std::cell::RefCell;
|
||||
|
||||
use crate::core::Fragment;
|
||||
|
||||
/// Penalties for
|
||||
/// [`WrapAlgorithm::OptimalFit`](crate::WrapAlgorithm::OptimalFit)
|
||||
/// and [`wrap_optimal_fit`].
|
||||
///
|
||||
/// This wrapping algorithm in [`wrap_optimal_fit`] considers the
|
||||
/// entire paragraph to find optimal line breaks. When wrapping text,
|
||||
/// "penalties" are assigned to line breaks based on the gaps left at
|
||||
/// the end of lines. The penalties are given by this struct, with
|
||||
/// [`Penalties::default`] assigning penalties that work well for
|
||||
/// monospace text.
|
||||
///
|
||||
/// If you are wrapping proportional text, you are advised to assign
|
||||
/// your own penalties according to your font size. See the individual
|
||||
/// penalties below for details.
|
||||
///
|
||||
/// **Note:** Only available when the `smawk` Cargo feature is
|
||||
/// enabled.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct Penalties {
|
||||
/// Per-line penalty. This is added for every line, which makes it
|
||||
/// expensive to output more lines than the minimum required.
|
||||
pub nline_penalty: usize,
|
||||
|
||||
/// Per-character cost for lines that overflow the target line width.
|
||||
///
|
||||
/// With a default value of 50², every single character costs as
|
||||
/// much as leaving a gap of 50 characters behind. This is because
|
||||
/// we assign as cost of `gap * gap` to a short line. When
|
||||
/// wrapping monospace text, we can overflow the line by 1
|
||||
/// character in extreme cases:
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::core::Word;
|
||||
/// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
|
||||
///
|
||||
/// let short = "foo ";
|
||||
/// let long = "x".repeat(50);
|
||||
/// let length = (short.len() + long.len()) as f64;
|
||||
/// let fragments = vec![Word::from(short), Word::from(&long)];
|
||||
/// let penalties = Penalties::new();
|
||||
///
|
||||
/// // Perfect fit, both words are on a single line with no overflow.
|
||||
/// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap();
|
||||
/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
|
||||
///
|
||||
/// // The words no longer fit, yet we get a single line back. While
|
||||
/// // the cost of overflow (`1 * 2500`) is the same as the cost of the
|
||||
/// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty`
|
||||
/// // which makes it cheaper to overflow than to use two lines.
|
||||
/// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap();
|
||||
/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
|
||||
///
|
||||
/// // The cost of overflow would be 2 * 2500, whereas the cost of
|
||||
/// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 =
|
||||
/// // 3401`. We therefore get two lines.
|
||||
/// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap();
|
||||
/// assert_eq!(wrapped, vec![&[Word::from(short)],
|
||||
/// &[Word::from(&long)]]);
|
||||
/// ```
|
||||
///
|
||||
/// This only happens if the overflowing word is 50 characters
|
||||
/// long _and_ if the word overflows the line by exactly one
|
||||
/// character. If it overflows by more than one character, the
|
||||
/// overflow penalty will quickly outgrow the cost of the gap, as
|
||||
/// seen above.
|
||||
pub overflow_penalty: usize,
|
||||
|
||||
/// When should the a single word on the last line be considered
|
||||
/// "too short"?
|
||||
///
|
||||
/// If the last line of the text consist of a single word and if
|
||||
/// this word is shorter than `1 / short_last_line_fraction` of
|
||||
/// the line width, then the final line will be considered "short"
|
||||
/// and `short_last_line_penalty` is added as an extra penalty.
|
||||
///
|
||||
/// The effect of this is to avoid a final line consisting of a
|
||||
/// single small word. For example, with a
|
||||
/// `short_last_line_penalty` of 25 (the default), a gap of up to
|
||||
/// 5 columns will be seen as more desirable than having a final
|
||||
/// short line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::{wrap, wrap_algorithms, Options, WrapAlgorithm};
|
||||
///
|
||||
/// let text = "This is a demo of the short last line penalty.";
|
||||
///
|
||||
/// // The first-fit algorithm leaves a single short word on the last line:
|
||||
/// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::FirstFit)),
|
||||
/// vec!["This is a demo of the short last line",
|
||||
/// "penalty."]);
|
||||
///
|
||||
/// #[cfg(feature = "smawk")] {
|
||||
/// let mut penalties = wrap_algorithms::Penalties::new();
|
||||
///
|
||||
/// // Since "penalty." is shorter than 25% of the line width, the
|
||||
/// // optimal-fit algorithm adds a penalty of 25. This is enough
|
||||
/// // to move "line " down:
|
||||
/// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
|
||||
/// vec!["This is a demo of the short last",
|
||||
/// "line penalty."]);
|
||||
///
|
||||
/// // We can change the meaning of "short" lines. Here, only words
|
||||
/// // shorter than 1/10th of the line width will be considered short:
|
||||
/// penalties.short_last_line_fraction = 10;
|
||||
/// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
|
||||
/// vec!["This is a demo of the short last line",
|
||||
/// "penalty."]);
|
||||
///
|
||||
/// // If desired, the penalty can also be disabled:
|
||||
/// penalties.short_last_line_fraction = 4;
|
||||
/// penalties.short_last_line_penalty = 0;
|
||||
/// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
|
||||
/// vec!["This is a demo of the short last line",
|
||||
/// "penalty."]);
|
||||
/// }
|
||||
/// ```
|
||||
pub short_last_line_fraction: usize,
|
||||
|
||||
/// Penalty for a last line with a single short word.
|
||||
///
|
||||
/// Set this to zero if you do not want to penalize short last lines.
|
||||
pub short_last_line_penalty: usize,
|
||||
|
||||
/// Penalty for lines ending with a hyphen.
|
||||
pub hyphen_penalty: usize,
|
||||
}
|
||||
|
||||
impl Penalties {
|
||||
/// Default penalties for monospace text.
|
||||
///
|
||||
/// The penalties here work well for monospace text. This is
|
||||
/// because they expect the gaps at the end of lines to be roughly
|
||||
/// in the range `0..100`. If the gaps are larger, the
|
||||
/// `overflow_penalty` and `hyphen_penalty` become insignificant.
|
||||
pub const fn new() -> Self {
|
||||
Penalties {
|
||||
nline_penalty: 1000,
|
||||
overflow_penalty: 50 * 50,
|
||||
short_last_line_fraction: 4,
|
||||
short_last_line_penalty: 25,
|
||||
hyphen_penalty: 25,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Penalties {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Cache for line numbers. This is necessary to avoid a O(n**2)
|
||||
/// behavior when computing line numbers in [`wrap_optimal_fit`].
|
||||
struct LineNumbers {
|
||||
line_numbers: RefCell<Vec<usize>>,
|
||||
}
|
||||
|
||||
impl LineNumbers {
|
||||
fn new(size: usize) -> Self {
|
||||
let mut line_numbers = Vec::with_capacity(size);
|
||||
line_numbers.push(0);
|
||||
LineNumbers {
|
||||
line_numbers: RefCell::new(line_numbers),
|
||||
}
|
||||
}
|
||||
|
||||
fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize {
|
||||
while self.line_numbers.borrow_mut().len() < i + 1 {
|
||||
let pos = self.line_numbers.borrow().len();
|
||||
let line_number = 1 + self.get(minima[pos].0, minima);
|
||||
self.line_numbers.borrow_mut().push(line_number);
|
||||
}
|
||||
|
||||
self.line_numbers.borrow()[i]
|
||||
}
|
||||
}
|
||||
|
||||
/// Overflow error during the [`wrap_optimal_fit`] computation.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct OverflowError;
|
||||
|
||||
impl std::fmt::Display for OverflowError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "wrap_optimal_fit cost computation overflowed")
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for OverflowError {}
|
||||
|
||||
/// Wrap abstract fragments into lines with an optimal-fit algorithm.
|
||||
///
|
||||
/// The `line_widths` slice gives the target line width for each line
|
||||
/// (the last slice element is repeated as necessary). This can be
|
||||
/// used to implement hanging indentation.
|
||||
///
|
||||
/// The fragments must already have been split into the desired
|
||||
/// widths, this function will not (and cannot) attempt to split them
|
||||
/// further when arranging them into lines.
|
||||
///
|
||||
/// # Optimal-Fit Algorithm
|
||||
///
|
||||
/// The algorithm considers all possible break points and picks the
|
||||
/// breaks which minimizes the gaps at the end of each line. More
|
||||
/// precisely, the algorithm assigns a cost or penalty to each break
|
||||
/// point, determined by `cost = gap * gap` where `gap = target_width -
|
||||
/// line_width`. Shorter lines are thus penalized more heavily since
|
||||
/// they leave behind a larger gap.
|
||||
///
|
||||
/// We can illustrate this with the text “To be, or not to be: that is
|
||||
/// the question”. We will be wrapping it in a narrow column with room
|
||||
/// for only 10 characters. The [greedy
|
||||
/// algorithm](super::wrap_first_fit) will produce these lines, each
|
||||
/// annotated with the corresponding penalty:
|
||||
///
|
||||
/// ```text
|
||||
/// "To be, or" 1² = 1
|
||||
/// "not to be:" 0² = 0
|
||||
/// "that is" 3² = 9
|
||||
/// "the" 7² = 49
|
||||
/// "question" 2² = 4
|
||||
/// ```
|
||||
///
|
||||
/// We see that line four with “the” leaves a gap of 7 columns, which
|
||||
/// gives it a penalty of 49. The sum of the penalties is 63.
|
||||
///
|
||||
/// There are 10 words, which means that there are `2_u32.pow(9)` or
|
||||
/// 512 different ways to typeset it. We can compute
|
||||
/// the sum of the penalties for each possible line break and search
|
||||
/// for the one with the lowest sum:
|
||||
///
|
||||
/// ```text
|
||||
/// "To be," 4² = 16
|
||||
/// "or not to" 1² = 1
|
||||
/// "be: that" 2² = 4
|
||||
/// "is the" 4² = 16
|
||||
/// "question" 2² = 4
|
||||
/// ```
|
||||
///
|
||||
/// The sum of the penalties is 41, which is better than what the
|
||||
/// greedy algorithm produced.
|
||||
///
|
||||
/// Searching through all possible combinations would normally be
|
||||
/// prohibitively slow. However, it turns out that the problem can be
|
||||
/// formulated as the task of finding column minima in a cost matrix.
|
||||
/// This matrix has a special form (totally monotone) which lets us
|
||||
/// use a [linear-time algorithm called
|
||||
/// SMAWK](https://lib.rs/crates/smawk) to find the optimal break
|
||||
/// points.
|
||||
///
|
||||
/// This means that the time complexity remains O(_n_) where _n_ is
|
||||
/// the number of words. Compared to
|
||||
/// [`wrap_first_fit`](super::wrap_first_fit), this function is about
|
||||
/// 4 times slower.
|
||||
///
|
||||
/// The optimization of per-line costs over the entire paragraph is
|
||||
/// inspired by the line breaking algorithm used in TeX, as described
|
||||
/// in the 1981 article [_Breaking Paragraphs into
|
||||
/// Lines_](http://www.eprg.org/G53DOC/pdfs/knuth-plass-breaking.pdf)
|
||||
/// by Knuth and Plass. The implementation here is based on [Python
|
||||
/// code by David
|
||||
/// Eppstein](https://github.com/jfinkels/PADS/blob/master/pads/wrap.py).
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// In case of an overflow during the cost computation, an `Err` is
|
||||
/// returned. Overflows happens when fragments or lines have infinite
|
||||
/// widths (`f64::INFINITY`) or if the widths are so large that the
|
||||
/// gaps at the end of lines have sizes larger than `f64::MAX.sqrt()`
|
||||
/// (approximately 1e154):
|
||||
///
|
||||
/// ```
|
||||
/// use textwrap::core::Fragment;
|
||||
/// use textwrap::wrap_algorithms::{wrap_optimal_fit, OverflowError, Penalties};
|
||||
///
|
||||
/// #[derive(Debug, PartialEq)]
|
||||
/// struct Word(f64);
|
||||
///
|
||||
/// impl Fragment for Word {
|
||||
/// fn width(&self) -> f64 { self.0 }
|
||||
/// fn whitespace_width(&self) -> f64 { 1.0 }
|
||||
/// fn penalty_width(&self) -> f64 { 0.0 }
|
||||
/// }
|
||||
///
|
||||
/// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is
|
||||
/// // larger than f64::MAX:
|
||||
/// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &Penalties::default()),
|
||||
/// Err(OverflowError));
|
||||
/// ```
|
||||
///
|
||||
/// When using fragment widths and line widths which fit inside an
|
||||
/// `u64`, overflows cannot happen. This means that fragments derived
|
||||
/// from a `&str` cannot cause overflows.
|
||||
///
|
||||
/// **Note:** Only available when the `smawk` Cargo feature is
|
||||
/// enabled.
|
||||
pub fn wrap_optimal_fit<'a, 'b, T: Fragment>(
|
||||
fragments: &'a [T],
|
||||
line_widths: &'b [f64],
|
||||
penalties: &'b Penalties,
|
||||
) -> Result<Vec<&'a [T]>, OverflowError> {
|
||||
// The final line width is used for all remaining lines.
|
||||
let default_line_width = line_widths.last().copied().unwrap_or(0.0);
|
||||
let mut widths = Vec::with_capacity(fragments.len() + 1);
|
||||
let mut width = 0.0;
|
||||
widths.push(width);
|
||||
for fragment in fragments {
|
||||
width += fragment.width() + fragment.whitespace_width();
|
||||
widths.push(width);
|
||||
}
|
||||
|
||||
let line_numbers = LineNumbers::new(fragments.len());
|
||||
|
||||
let minima = smawk::online_column_minima(0.0, widths.len(), |minima, i, j| {
|
||||
// Line number for fragment `i`.
|
||||
let line_number = line_numbers.get(i, minima);
|
||||
let line_width = line_widths
|
||||
.get(line_number)
|
||||
.copied()
|
||||
.unwrap_or(default_line_width);
|
||||
let target_width = line_width.max(1.0);
|
||||
|
||||
// Compute the width of a line spanning fragments[i..j] in
|
||||
// constant time. We need to adjust widths[j] by subtracting
|
||||
// the whitespace of fragment[j-1] and then add the penalty.
|
||||
let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width()
|
||||
+ fragments[j - 1].penalty_width();
|
||||
|
||||
// We compute cost of the line containing fragments[i..j]. We
|
||||
// start with values[i].1, which is the optimal cost for
|
||||
// breaking before fragments[i].
|
||||
//
|
||||
// First, every extra line cost NLINE_PENALTY.
|
||||
let mut cost = minima[i].1 + penalties.nline_penalty as f64;
|
||||
|
||||
// Next, we add a penalty depending on the line length.
|
||||
if line_width > target_width {
|
||||
// Lines that overflow get a hefty penalty.
|
||||
let overflow = line_width - target_width;
|
||||
cost += overflow * penalties.overflow_penalty as f64;
|
||||
} else if j < fragments.len() {
|
||||
// Other lines (except for the last line) get a milder
|
||||
// penalty which depend on the size of the gap.
|
||||
let gap = target_width - line_width;
|
||||
cost += gap * gap;
|
||||
} else if i + 1 == j
|
||||
&& line_width < target_width / penalties.short_last_line_fraction as f64
|
||||
{
|
||||
// The last line can have any size gap, but we do add a
|
||||
// penalty if the line is very short (typically because it
|
||||
// contains just a single word).
|
||||
cost += penalties.short_last_line_penalty as f64;
|
||||
}
|
||||
|
||||
// Finally, we discourage hyphens.
|
||||
if fragments[j - 1].penalty_width() > 0.0 {
|
||||
// TODO: this should use a penalty value from the fragment
|
||||
// instead.
|
||||
cost += penalties.hyphen_penalty as f64;
|
||||
}
|
||||
|
||||
cost
|
||||
});
|
||||
|
||||
for (_, cost) in &minima {
|
||||
if cost.is_infinite() {
|
||||
return Err(OverflowError);
|
||||
}
|
||||
}
|
||||
|
||||
let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima));
|
||||
let mut pos = fragments.len();
|
||||
loop {
|
||||
let prev = minima[pos].0;
|
||||
lines.push(&fragments[prev..pos]);
|
||||
pos = prev;
|
||||
if pos == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
lines.reverse();
|
||||
Ok(lines)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
struct Word(f64);
|
||||
|
||||
#[rustfmt::skip]
|
||||
impl Fragment for Word {
|
||||
fn width(&self) -> f64 { self.0 }
|
||||
fn whitespace_width(&self) -> f64 { 1.0 }
|
||||
fn penalty_width(&self) -> f64 { 0.0 }
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrap_fragments_with_infinite_widths() {
|
||||
let words = vec![Word(f64::INFINITY)];
|
||||
assert_eq!(
|
||||
wrap_optimal_fit(&words, &[0.0], &Penalties::default()),
|
||||
Err(OverflowError)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrap_fragments_with_huge_widths() {
|
||||
let words = vec![Word(1e200), Word(1e250), Word(1e300)];
|
||||
assert_eq!(
|
||||
wrap_optimal_fit(&words, &[1e300], &Penalties::default()),
|
||||
Err(OverflowError)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrap_fragments_with_large_widths() {
|
||||
// The gaps will be of the sizes between 1e25 and 1e75. This
|
||||
// makes the `gap * gap` cost fit comfortably in a f64.
|
||||
let words = vec![Word(1e25), Word(1e50), Word(1e75)];
|
||||
assert_eq!(
|
||||
wrap_optimal_fit(&words, &[1e100], &Penalties::default()),
|
||||
Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]])
|
||||
);
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user