Changeset 380 for pyyaml/trunk/lib/yaml/reader.py
- Timestamp:
- 05/30/11 00:19:04 (2 years ago)
- File:
-
- 1 edited
-
pyyaml/trunk/lib/yaml/reader.py (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
pyyaml/trunk/lib/yaml/reader.py
r323 r380 21 21 22 22 import codecs, re 23 24 # Unfortunately, codec functions in Python 2.3 does not support the `finish`25 # arguments, so we have to write our own wrappers.26 27 try:28 codecs.utf_8_decode('', 'strict', False)29 from codecs import utf_8_decode, utf_16_le_decode, utf_16_be_decode30 31 except TypeError:32 33 def utf_16_le_decode(data, errors, finish=False):34 if not finish and len(data) % 2 == 1:35 data = data[:-1]36 return codecs.utf_16_le_decode(data, errors)37 38 def utf_16_be_decode(data, errors, finish=False):39 if not finish and len(data) % 2 == 1:40 data = data[:-1]41 return codecs.utf_16_be_decode(data, errors)42 43 def utf_8_decode(data, errors, finish=False):44 if not finish:45 # We are trying to remove a possible incomplete multibyte character46 # from the suffix of the data.47 # The first byte of a multi-byte sequence is in the range 0xc0 to 0xfd.48 # All further bytes are in the range 0x80 to 0xbf.49 # UTF-8 encoded UCS characters may be up to six bytes long.50 count = 051 while count < 5 and count < len(data) \52 and '\x80' <= data[-count-1] <= '\xBF':53 count -= 154 if count < 5 and count < len(data) \55 and '\xC0' <= data[-count-1] <= '\xFD':56 data = data[:-count-1]57 return codecs.utf_8_decode(data, errors)58 23 59 24 class ReaderError(YAMLError): … … 160 125 if not isinstance(self.raw_buffer, unicode): 161 126 if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): 162 self.raw_decode = utf_16_le_decode127 self.raw_decode = codecs.utf_16_le_decode 163 128 self.encoding = 'utf-16-le' 164 129 elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): 165 self.raw_decode = utf_16_be_decode130 self.raw_decode = codecs.utf_16_be_decode 166 131 self.encoding = 'utf-16-be' 167 132 else: 168 self.raw_decode = utf_8_decode133 self.raw_decode = codecs.utf_8_decode 169 134 self.encoding = 'utf-8' 170 135 self.update(1)
Note: See TracChangeset
for help on using the changeset viewer.
