Fix bug in TextBufferWriter with multibyte chars

Fixed a UTF-8 decoding error that would occur if the `buf` argument to
`TextBufferWriter` ended in the middle of a multibyte character.
This commit is contained in:
Matthew Gordon 2025-11-10 19:52:31 -04:00
parent 36ab1b1769
commit 304bf4da6c
1 changed files with 31 additions and 6 deletions

View File

@ -1,25 +1,50 @@
use super::{Point, TextBuffer}; use super::{Point, TextBuffer};
pub struct TextBufferWriter<'a> { pub struct TextBufferWriter<'a> {
text_buffer: &'a mut TextBuffer, text_buffer: &'a mut TextBuffer,
/// Stores any partial multi-byte characters that are left over from the
/// last call to `write()`.
overflow: Vec<u8>,
} }
impl<'a> TextBufferWriter<'a> { impl<'a> TextBufferWriter<'a> {
pub fn new(text_buffer: &'a mut TextBuffer) -> Self { pub fn new(text_buffer: &'a mut TextBuffer) -> Self {
Self { text_buffer } Self {
text_buffer,
overflow: vec![],
}
} }
} }
impl<'a> std::io::Write for TextBufferWriter<'a> { impl<'a> std::io::Write for TextBufferWriter<'a> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.text_buffer.insert_text( // If we get a UTF-8 decoding error, try backing off up to three bytes.
str::from_utf8(buf).map_err(std::io::Error::other)?, // We might be in the middle of a multipart character and in that case
Point::End, // we should store the partial character in `overflow`. Not the most
); // efficient way to do this, ideally I should write some way to decode
// text one character at a time.
let bytes = if !self.overflow.is_empty() {
self.overflow.extend_from_slice(buf);
&self.overflow
} else {
buf
};
let text = str::from_utf8(bytes)
.or_else(|_| str::from_utf8(&bytes[0..bytes.len() - 1]))
.or_else(|_| str::from_utf8(&bytes[0..bytes.len() - 2]))
.or_else(|_| str::from_utf8(&bytes[0..bytes.len() - 3]))
.map_err(std::io::Error::other)?;
self.text_buffer.insert_text(text, Point::End);
self.overflow = bytes[text.len()..bytes.len()].to_vec();
Ok(buf.len()) Ok(buf.len())
} }
fn flush(&mut self) -> std::io::Result<()> { fn flush(&mut self) -> std::io::Result<()> {
if !self.overflow.is_empty() {
self.text_buffer.insert_text(
str::from_utf8(&self.overflow).map_err(std::io::Error::other)?,
Point::End,
);
}
Ok(()) Ok(())
} }
} }