Changeset 46 for branches/pyyaml3000/lib/yaml/reader.py
- Timestamp:
- 02/16/06 17:22:59 (7 years ago)
- File:
-
- 1 moved
-
branches/pyyaml3000/lib/yaml/reader.py (moved) (moved from branches/pyyaml3000/lib/yaml/stream.py) (9 diffs)
Legend:
- Unmodified
- Added
- Removed
-
branches/pyyaml3000/lib/yaml/reader.py
r45 r46 8 8 # Parser does not use it for any other purposes. 9 9 # 10 # Stream(source, data) 11 # Stream determines the encoding of `data` and converts it to unicode. 12 # Stream provides the following methods and attributes: 13 # stream.peek(length=1) - return the next `length` characters 14 # stream.forward(length=1) - move the current position to `length` characters. 15 # stream.index - the number of the current character. 16 # stream.line, stream.column - the line and the column of the current character. 17 10 # Reader(source, data) 11 # Reader determines the encoding of `data` and converts it to unicode. 12 # Reader provides the following methods and attributes: 13 # reader.peek(length=1) - return the next `length` characters 14 # reader.forward(length=1) - move the current position to `length` characters. 15 # reader.index - the number of the current character. 16 # reader.line, stream.column - the line and the column of the current character. 17 18 __all__ = ['Marker', 'Reader', 'ReaderError'] 18 19 19 20 from error import YAMLError … … 58 59 class Marker: 59 60 60 def __init__(self, source, line, column, buffer, pointer):61 self. source = source61 def __init__(self, name, line, column, buffer, pointer): 62 self.name = name 62 63 self.line = line 63 64 self.column = column … … 88 89 + ' '*(self.pointer-start+len(head)) + '^' + '\n' 89 90 90 class StreamError(YAMLError): 91 92 def __init__(self, source, encoding, character, position, reason): 93 self.source = source 94 self.encoding = encoding 91 class ReaderError(YAMLError): 92 93 def __init__(self, name, position, character, encoding, reason): 94 self.name = name 95 95 self.character = character 96 96 self.position = position 97 self.encoding = encoding 97 98 self.reason = reason 98 99 … … 100 101 if isinstance(self.character, str): 101 102 return "'%s' codec can't decode byte #x%02x: %s\n" \ 102 "\tin file'%s', position %d." \103 "\tin '%s', position %d." \ 103 104 % (self.encoding, ord(self.character), self.reason, 104 self. source, self.position)105 self.name, self.position) 105 106 else: 106 107 return "unacceptable character #x%04x: %s\n" \ 107 "\tin file'%s', position %d." \108 "\tin '%s', position %d." \ 108 109 % (ord(self.character), self.reason, 109 self. source, self.position)110 111 class Stream:112 # Stream:110 self.name, self.position) 111 112 class Reader: 113 # Reader: 113 114 # - determines the data encoding and converts it to unicode, 114 115 # - checks if characters are in allowed range, 115 116 # - adds '\0' to the end. 116 117 118 # Reader accepts 119 # - a `str` object, 120 # - a `unicode` object, 121 # - a file-like object with its `read` method returning `str`, 122 # - a file-like object with its `read` method returning `unicode`. 123 117 124 # Yeah, it's ugly and slow. 118 125 119 def __init__(self, source,data):120 self. source = source126 def __init__(self, data): 127 self.name = None 121 128 self.stream = None 122 129 self.stream_pointer = 0 … … 125 132 self.pointer = 0 126 133 self.raw_buffer = None 127 self.raw_decode r= None134 self.raw_decode = None 128 135 self.index = 0 129 136 self.line = 0 130 137 self.column = 0 131 138 if isinstance(data, unicode): 139 self.name = "<unicode string>" 132 140 self.check_printable(data) 133 141 self.buffer = data+u'\0' 134 142 elif isinstance(data, str): 143 self.name = "<string>" 135 144 self.raw_buffer = data 136 145 self.determine_encoding() 137 146 else: 138 147 self.stream = data 148 self.name = getattr(data, 'name', "<file>") 139 149 self.eof = False 140 150 self.raw_buffer = '' … … 162 172 def get_marker(self): 163 173 if self.stream is None: 164 return Marker(self. source, self.line, self.column,174 return Marker(self.name, self.line, self.column, 165 175 self.buffer, self.pointer) 166 176 else: 167 return Marker(self. source, self.line, self.column, None, None)177 return Marker(self.name, self.line, self.column, None, None) 168 178 169 179 def determine_encoding(self): 170 180 while not self.eof and len(self.raw_buffer) < 2: 171 181 self.update_raw() 172 if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): 173 self.raw_decode = utf_16_le_decode 174 elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): 175 self.raw_decode = utf_16_be_decode 176 else: 177 self.raw_decode = utf_8_decode 182 if not isinstance(self.raw_buffer, unicode): 183 if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): 184 self.raw_decode = utf_16_le_decode 185 elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): 186 self.raw_decode = utf_16_be_decode 187 else: 188 self.raw_decode = utf_8_decode 178 189 self.update(1) 179 190 … … 184 195 character = match.group() 185 196 position = self.index+(len(self.buffer)-self.pointer)+match.start() 186 raise StreamError(self.source, 'unicode', character, position,187 "control characters are not allowed")197 raise ReaderError(self.name, position, character, 198 'unicode', "special characters are not allowed") 188 199 189 200 def update(self, length): … … 195 206 if not self.eof: 196 207 self.update_raw() 197 try: 198 data, converted = self.raw_decode(self.raw_buffer, 199 'strict', self.eof) 200 except UnicodeDecodeError, exc: 201 character = exc.object[exc.start] 202 if self.stream is not None: 203 position = self.stream_pointer-len(self.raw_buffer)+exc.start 204 else: 205 position = exc.start 206 raise StreamError(self.source, exc.encoding, 207 character, position, exc.reason) 208 if self.raw_decode is not None: 209 try: 210 data, converted = self.raw_decode(self.raw_buffer, 211 'strict', self.eof) 212 except UnicodeDecodeError, exc: 213 character = exc.object[exc.start] 214 if self.stream is not None: 215 position = self.stream_pointer-len(self.raw_buffer)+exc.start 216 else: 217 position = exc.start 218 raise ReaderError(self.name, position, character, 219 exc.encoding, exc.reason) 220 else: 221 data = self.raw_buffer 222 converted = len(data) 208 223 self.check_printable(data) 209 224 self.buffer += data … … 224 239 #try: 225 240 # import psyco 226 # psyco.bind( Stream)241 # psyco.bind(Reader) 227 242 #except ImportError: 228 243 # pass
Note: See TracChangeset
for help on using the changeset viewer.
