Index: pyyaml/trunk/lib3/yaml/reader.py
===================================================================
--- pyyaml/trunk/lib/yaml/reader.py	(revision 323)
+++ pyyaml/trunk/lib3/yaml/reader.py	(revision 328)
@@ -18,42 +18,7 @@
 __all__ = ['Reader', 'ReaderError']
 
-from error import YAMLError, Mark
+from .error import YAMLError, Mark
 
 import codecs, re
-
-# Unfortunately, codec functions in Python 2.3 does not support the `finish`
-# arguments, so we have to write our own wrappers.
-
-try:
-    codecs.utf_8_decode('', 'strict', False)
-    from codecs import utf_8_decode, utf_16_le_decode, utf_16_be_decode
-
-except TypeError:
-
-    def utf_16_le_decode(data, errors, finish=False):
-        if not finish and len(data) % 2 == 1:
-            data = data[:-1]
-        return codecs.utf_16_le_decode(data, errors)
-
-    def utf_16_be_decode(data, errors, finish=False):
-        if not finish and len(data) % 2 == 1:
-            data = data[:-1]
-        return codecs.utf_16_be_decode(data, errors)
-
-    def utf_8_decode(data, errors, finish=False):
-        if not finish:
-            # We are trying to remove a possible incomplete multibyte character
-            # from the suffix of the data.
-            # The first byte of a multi-byte sequence is in the range 0xc0 to 0xfd.
-            # All further bytes are in the range 0x80 to 0xbf.
-            # UTF-8 encoded UCS characters may be up to six bytes long.
-            count = 0
-            while count < 5 and count < len(data)   \
-                    and '\x80' <= data[-count-1] <= '\xBF':
-                count -= 1
-            if count < 5 and count < len(data)  \
-                    and '\xC0' <= data[-count-1] <= '\xFD':
-                data = data[:-count-1]
-        return codecs.utf_8_decode(data, errors)
 
 class ReaderError(YAMLError):
@@ -67,5 +32,5 @@
 
     def __str__(self):
-        if isinstance(self.character, str):
+        if isinstance(self.character, bytes):
             return "'%s' codec can't decode byte #x%02x: %s\n"  \
                     "  in \"%s\", position %d"    \
@@ -80,11 +45,11 @@
 class Reader(object):
     # Reader:
-    # - determines the data encoding and converts it to unicode,
+    # - determines the data encoding and converts it to a unicode string,
     # - checks if characters are in allowed range,
     # - adds '\0' to the end.
 
     # Reader accepts
+    #  - a `bytes` object,
     #  - a `str` object,
-    #  - a `unicode` object,
     #  - a file-like object with its `read` method returning `str`,
     #  - a file-like object with its `read` method returning `unicode`.
@@ -97,5 +62,5 @@
         self.stream_pointer = 0
         self.eof = True
-        self.buffer = u''
+        self.buffer = ''
         self.pointer = 0
         self.raw_buffer = None
@@ -105,10 +70,10 @@
         self.line = 0
         self.column = 0
-        if isinstance(stream, unicode):
+        if isinstance(stream, str):
             self.name = "<unicode string>"
             self.check_printable(stream)
-            self.buffer = stream+u'\0'
-        elif isinstance(stream, str):
-            self.name = "<string>"
+            self.buffer = stream+'\0'
+        elif isinstance(stream, bytes):
+            self.name = "<byte string>"
             self.raw_buffer = stream
             self.determine_encoding()
@@ -117,5 +82,5 @@
             self.name = getattr(stream, 'name', "<file>")
             self.eof = False
-            self.raw_buffer = ''
+            self.raw_buffer = None
             self.determine_encoding()
 
@@ -139,9 +104,9 @@
             self.pointer += 1
             self.index += 1
-            if ch in u'\n\x85\u2028\u2029'  \
-                    or (ch == u'\r' and self.buffer[self.pointer] != u'\n'):
+            if ch in '\n\x85\u2028\u2029'  \
+                    or (ch == '\r' and self.buffer[self.pointer] != '\n'):
                 self.line += 1
                 self.column = 0
-            elif ch != u'\uFEFF':
+            elif ch != '\uFEFF':
                 self.column += 1
             length -= 1
@@ -156,19 +121,19 @@
 
     def determine_encoding(self):
-        while not self.eof and len(self.raw_buffer) < 2:
+        while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
             self.update_raw()
-        if not isinstance(self.raw_buffer, unicode):
+        if isinstance(self.raw_buffer, bytes):
             if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
-                self.raw_decode = utf_16_le_decode
+                self.raw_decode = codecs.utf_16_le_decode
                 self.encoding = 'utf-16-le'
             elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
-                self.raw_decode = utf_16_be_decode
+                self.raw_decode = codecs.utf_16_be_decode
                 self.encoding = 'utf-16-be'
             else:
-                self.raw_decode = utf_8_decode
+                self.raw_decode = codecs.utf_8_decode
                 self.encoding = 'utf-8'
         self.update(1)
 
-    NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
+    NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
     def check_printable(self, data):
         match = self.NON_PRINTABLE.search(data)
@@ -191,5 +156,5 @@
                     data, converted = self.raw_decode(self.raw_buffer,
                             'strict', self.eof)
-                except UnicodeDecodeError, exc:
+                except UnicodeDecodeError as exc:
                     character = exc.object[exc.start]
                     if self.stream is not None:
@@ -206,14 +171,16 @@
             self.raw_buffer = self.raw_buffer[converted:]
             if self.eof:
-                self.buffer += u'\0'
+                self.buffer += '\0'
                 self.raw_buffer = None
                 break
 
-    def update_raw(self, size=1024):
+    def update_raw(self, size=4096):
         data = self.stream.read(size)
-        if data:
+        if self.raw_buffer is None:
+            self.raw_buffer = data
+        else:
             self.raw_buffer += data
-            self.stream_pointer += len(data)
-        else:
+        self.stream_pointer += len(data)
+        if not data:
             self.eof = True
 
