Changeset 39 for branches/pyyaml3000/lib/yaml/scanner.py
- Timestamp:
- 02/12/06 18:19:54 (7 years ago)
- Location:
- branches/pyyaml3000/lib
- Files:
-
- 2 added
- 1 copied
-
. (added)
-
yaml (added)
-
yaml/scanner.py (copied) (copied from trunk/sandbox/my-parser/parser2.py) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
branches/pyyaml3000/lib/yaml/scanner.py
r37 r39 1 2 from marker import Marker 3 from error import ParserError 4 from stream import Stream 5 6 class Scanner: 7 8 def __init__(self, source, data): 9 """Initialize the scanner.""" 10 # The input stream. The Stream class do the dirty work of checking for 11 # BOM and converting the input data to Unicode. It also adds LF to the 12 # end if the data does not ends with an EOL character. 13 # 14 # Stream supports the following methods 15 # self.stream.peek(k=1) # peek the next k characters 16 # self.stream.read(k=1) # read the next k characters and move the 17 # # pointer 18 self.stream = Stream(source, data) 19 20 # Had we reached the end of the stream? 21 self.done = False 22 23 # The number of unclosed '{' and '['. `flow_level == 0` means block 24 # context. 25 self.flow_level = 0 26 27 # List of processed tokens that are not yet emitted. 28 self.tokens = [] 29 30 # Number of tokens that were emitted through the `get_token` method. 31 self.tokens_taken = 0 32 33 # The current indentation level. 34 self.indent = -1 35 36 # Past indentation levels. 37 self.indents = [] 38 39 # Variables related to simple key treatment. 40 41 # A simple key is a key that is not denoted by the '?' indicator. 42 # Example of simple keys: 43 # --- 44 # block simple key: value 45 # ? not a simple key: 46 # : { flow simple key: value } 47 # We emit the KEY token before all keys, so when we find a potential 48 # simple key, we try to locate the corresponding ':' indicator. 49 # Simple keys should be limited to a single line and 1024 characters. 50 51 # Can a block collection start at the current position? A block 52 # collection may start: 53 # - at the beginning of the line (not counting spaces), 54 # - after the block sequence indicator '-'. 55 self.allow_block_collection = True 56 57 # Can a simple key in flow context start at the current position? A 58 # simple key may start after the '{', '[', and ',' indicators. 59 self.allow_flow_simple_keys = False 60 61 # Keep track of possible simple keys. This is a dictionary. The key 62 # is `flow_level`; there can be no more that one possible simple key 63 # for each level. The value is a record of 64 # (stream.index, stream.line, stream.column, token_number) 65 self.possible_simple_keys = {} 66 67 # Public methods: 68 69 def peek_token(self): 70 """Get the current token.""" 71 while self.need_more_tokens() 72 self.fetch_more_tokens() 73 if self.tokens: 74 return self.tokens[0] 75 76 def get_token(self): 77 "Get the current token and remove it from the list.""" 78 while self.need_more_tokens(): 79 self.fetch_more_tokens() 80 if self.tokens: 81 self.tokens_taken += 1 82 return self.tokens.pop(0) 83 84 # Private methods: 85 86 def need_more_tokens(self): 87 if self.done: 88 return False 89 if not self.tokens: 90 return True 91 # The current token may be a potential simple key, so we 92 # need to look further. 93 if self.next_possible_simple_key() == self.tokens_taken: 94 return True 95 96 def fetch_more_tokens(self): 97 98 # Eat whitespaces and comments until we reach the next token. 99 self.find_next_token() 100 101 # Compare the current indentation and column. It may add some tokens 102 # and decrease the current indentation. 103 self.unwind_indent(self.stream.column) 104 105 # Peek the next character. 106 ch = self.stream.peek() 107 108 # Is it the end of stream? 109 if ch is None: 110 return self.fetch_end() 111 112 # Is it a directive? 113 if ch == u'%' and self.check_directive(): 114 return self.fetch_directive() 115 116 # Is it the document start? 117 if ch == u'-' and self.check_document_start(): 118 return self.fetch_document_start() 119 120 # Is it the document end? 121 if ch == u'.' and self.check_document_end(): 122 return self.fetch_document_end() 123 124 # Note: the order of the following checks is NOT significant. 125 126 # Is it the sequence indicator? 127 if ch in u'-,' and self.check_entry(): 128 return self.fetch_entry() 129 130 # Is it the flow sequence start indicator? 131 if ch == u'[': 132 return self.fetch_flow_sequence_start() 133 134 # Is it the flow mapping start indicator? 135 if ch == u'{': 136 return self.fetch_flow_mapping_start() 137 138 # Is it the flow sequence end indicator? 139 if ch == u']': 140 return self.fetch_flow_sequence_end() 141 142 # Is it the flow mapping end indicator? 143 if ch == u'}': 144 return self.fetch_flow_mapping_end() 145 146 # Is it the key indicator? 147 if ch == u'?' and self.check_key(): 148 return self.fetch_key() 149 150 # Is it the value indicator? 151 if ch == u':' and self.check_value(): 152 return self.fetch_value() 153 154 # Is it an alias? 155 if ch == u'*': 156 return self.fetch_alias() 157 158 # Is it an anchor? 159 if ch == u'&': 160 return self.fetch_anchor() 161 162 # Is is a tag? 163 if ch == u'!': 164 return self.fetch_tag() 165 166 # Is is a literal scalar? 167 if ch == u'|': 168 return self.fetch_literal() 169 170 # Is it a folded scalar? 171 if ch == u'>': 172 return self.fetch_folded() 173 174 # Is it a single quoted scalar? 175 if ch == u'\'': 176 return self.fetch_single() 177 178 # Is it a double quoted scalar? 179 if ch == u'\"': 180 return self.fetch_double() 181 182 # It must be a plain scalar. 183 if self.check_plain(): 184 return self.fetch_plain() 185 186 # No? It's an error then. Let's produce a nice error message. 187 self.invalid_token() 188 189 def fetch_end(self): 190 191 # Set the current intendation to -1. 192 self.unwind_indents(-1) 193 194 # Reset everything (not really needed). 195 self.allow_block_collection = False 196 self.allow_flow_simple_keys = False 197 self.possible_simple_keys = {} 198 199 # Add END. 200 marker = self.stream.get_marker() 201 self.tokens.append(EndToken(marker)) 202 203 # The stream is ended. 204 self.done = True 205 206 def check_directive(self): 207 208 # Checking for 209 # /* The beginning of the line */ '%' 210 # The '%' indicator is already checked. 211 if self.stream.column == 0: 212 return True 213 214 def check_document_start(self): 215 216 # Checking for 217 # /* The beginning of the line */ '---' /* Space or EOL */ 218 if self.stream.column == 0: 219 prefix = self.stream.peek(4) 220 if prefix[:3] == u'---' and prefix[3] in u' \t\r\n\x85\u2028\u2029': 221 return True 222 223 def fetch_document_start(self): 224 225 # Set the current intendation to -1. 226 self.unwind_indents(-1) 227 228 # No block collections after '---'. 229 self.allow_block_collection = False 230 231 # No flow simple keys (not needed -- we are in the block context). 232 self.allow_flow_simple_keys = False 233 234 # Reset possible simple keys (not needed -- EOL should have reset it). 235 self.possible_simple_keys = {} 236 237 start_marker = self.stream.get_marker() 238 239 # The characters are already checked, just move forward. 240 self.stream.read(3) 241 242 end_marker = self.stream.get_marker() 243 244 # Add DOCUMENT-START. 245 self.tokens.append(DocumentStartToken(start_marker, end_marker)) 246 247 248 def check_document_end(self): 249 if self.stream.column == 0: 250 prefix = self.stream.peek(4) 251 if prefix[:3] == u'...' and prefix[3] in u' \t\r\n\x85\u2028\u2029': 252 return True 253 254 def fetch_document_end(self): 255 # The same code as `fetch_document_start`. 256 257 # Set the current intendation to -1. 258 self.unwind_indents(-1) 259 260 # Reset everything (not really needed). 261 self.allow_block_collection = False 262 self.allow_flow_simple_keys = False 263 self.possible_simple_keys = {} 264 265 start_marker = self.stream.get_marker() 266 267 # The characters are already checked, just move forward. 268 self.stream.read(3) 269 270 end_marker = self.stream.get_marker() 271 272 # Add DOCUMENT-END. 273 self.tokens.append(DocumentEndToken(start_marker, end_marker)) 274 275 276 1 277 # Tokens: 2 278 # YAML_DIRECTIVE: ^ '%' YAML ' '+ (version: \d+ '.' \d+) s-l-comments … … 128 404 return error_position+error_pointer+error_message 129 405 130 class Scanner:406 class _Scanner: 131 407 132 408 def scan(self, source, data):
Note: See TracChangeset
for help on using the changeset viewer.
