diff --git a/core/src/text_buffer/writer.rs b/core/src/text_buffer/writer.rs index 1f1dcf4..6457401 100644 --- a/core/src/text_buffer/writer.rs +++ b/core/src/text_buffer/writer.rs @@ -1,25 +1,50 @@ use super::{Point, TextBuffer}; - pub struct TextBufferWriter<'a> { text_buffer: &'a mut TextBuffer, + /// Stores any partial multi-byte characters that are left over from the + /// last call to `write()`. + overflow: Vec, } impl<'a> TextBufferWriter<'a> { pub fn new(text_buffer: &'a mut TextBuffer) -> Self { - Self { text_buffer } + Self { + text_buffer, + overflow: vec![], + } } } impl<'a> std::io::Write for TextBufferWriter<'a> { fn write(&mut self, buf: &[u8]) -> std::io::Result { - self.text_buffer.insert_text( - str::from_utf8(buf).map_err(std::io::Error::other)?, - Point::End, - ); + // If we get a UTF-8 decoding error, try backing off up to three bytes. + // We might be in the middle of a multipart character and in that case + // we should store the partial character in `overflow`. Not the most + // efficient way to do this, ideally I should write some way to decode + // text one character at a time. + let bytes = if !self.overflow.is_empty() { + self.overflow.extend_from_slice(buf); + &self.overflow + } else { + buf + }; + let text = str::from_utf8(bytes) + .or_else(|_| str::from_utf8(&bytes[0..bytes.len() - 1])) + .or_else(|_| str::from_utf8(&bytes[0..bytes.len() - 2])) + .or_else(|_| str::from_utf8(&bytes[0..bytes.len() - 3])) + .map_err(std::io::Error::other)?; + self.text_buffer.insert_text(text, Point::End); + self.overflow = bytes[text.len()..bytes.len()].to_vec(); Ok(buf.len()) } fn flush(&mut self) -> std::io::Result<()> { + if !self.overflow.is_empty() { + self.text_buffer.insert_text( + str::from_utf8(&self.overflow).map_err(std::io::Error::other)?, + Point::End, + ); + } Ok(()) } }