Fix bug in TextBufferWriter with multibyte chars

Fixed a UTF-8 decoding error that would occur if the `buf` argument to
`TextBufferWriter` ended in the middle of a multibyte character.
This commit is contained in:
Matthew Gordon 2025-11-10 19:52:31 -04:00
parent 36ab1b1769
commit 304bf4da6c
1 changed files with 31 additions and 6 deletions

View File

@ -1,25 +1,50 @@
use super::{Point, TextBuffer};
pub struct TextBufferWriter<'a> {
text_buffer: &'a mut TextBuffer,
/// Stores any partial multi-byte characters that are left over from the
/// last call to `write()`.
overflow: Vec<u8>,
}
impl<'a> TextBufferWriter<'a> {
pub fn new(text_buffer: &'a mut TextBuffer) -> Self {
Self { text_buffer }
Self {
text_buffer,
overflow: vec![],
}
}
}
impl<'a> std::io::Write for TextBufferWriter<'a> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.text_buffer.insert_text(
str::from_utf8(buf).map_err(std::io::Error::other)?,
Point::End,
);
// If we get a UTF-8 decoding error, try backing off up to three bytes.
// We might be in the middle of a multipart character and in that case
// we should store the partial character in `overflow`. Not the most
// efficient way to do this, ideally I should write some way to decode
// text one character at a time.
let bytes = if !self.overflow.is_empty() {
self.overflow.extend_from_slice(buf);
&self.overflow
} else {
buf
};
let text = str::from_utf8(bytes)
.or_else(|_| str::from_utf8(&bytes[0..bytes.len() - 1]))
.or_else(|_| str::from_utf8(&bytes[0..bytes.len() - 2]))
.or_else(|_| str::from_utf8(&bytes[0..bytes.len() - 3]))
.map_err(std::io::Error::other)?;
self.text_buffer.insert_text(text, Point::End);
self.overflow = bytes[text.len()..bytes.len()].to_vec();
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
if !self.overflow.is_empty() {
self.text_buffer.insert_text(
str::from_utf8(&self.overflow).map_err(std::io::Error::other)?,
Point::End,
);
}
Ok(())
}
}