| 1 | |
|---|
| 2 | from marker import Marker |
|---|
| 3 | from error import ParserError |
|---|
| 4 | from stream import Stream |
|---|
| 5 | |
|---|
| 6 | class Scanner: |
|---|
| 7 | |
|---|
| 8 | def __init__(self, source, data): |
|---|
| 9 | """Initialize the scanner.""" |
|---|
| 10 | # The input stream. The Stream class do the dirty work of checking for |
|---|
| 11 | # BOM and converting the input data to Unicode. It also adds LF to the |
|---|
| 12 | # end if the data does not ends with an EOL character. |
|---|
| 13 | # |
|---|
| 14 | # Stream supports the following methods |
|---|
| 15 | # self.stream.peek(k=1) # peek the next k characters |
|---|
| 16 | # self.stream.read(k=1) # read the next k characters and move the |
|---|
| 17 | # # pointer |
|---|
| 18 | self.stream = Stream(source, data) |
|---|
| 19 | |
|---|
| 20 | # Had we reached the end of the stream? |
|---|
| 21 | self.done = False |
|---|
| 22 | |
|---|
| 23 | # The number of unclosed '{' and '['. `flow_level == 0` means block |
|---|
| 24 | # context. |
|---|
| 25 | self.flow_level = 0 |
|---|
| 26 | |
|---|
| 27 | # List of processed tokens that are not yet emitted. |
|---|
| 28 | self.tokens = [] |
|---|
| 29 | |
|---|
| 30 | # Number of tokens that were emitted through the `get_token` method. |
|---|
| 31 | self.tokens_taken = 0 |
|---|
| 32 | |
|---|
| 33 | # The current indentation level. |
|---|
| 34 | self.indent = -1 |
|---|
| 35 | |
|---|
| 36 | # Past indentation levels. |
|---|
| 37 | self.indents = [] |
|---|
| 38 | |
|---|
| 39 | # Variables related to simple key treatment. |
|---|
| 40 | |
|---|
| 41 | # A simple key is a key that is not denoted by the '?' indicator. |
|---|
| 42 | # Example of simple keys: |
|---|
| 43 | # --- |
|---|
| 44 | # block simple key: value |
|---|
| 45 | # ? not a simple key: |
|---|
| 46 | # : { flow simple key: value } |
|---|
| 47 | # We emit the KEY token before all keys, so when we find a potential |
|---|
| 48 | # simple key, we try to locate the corresponding ':' indicator. |
|---|
| 49 | # Simple keys should be limited to a single line and 1024 characters. |
|---|
| 50 | |
|---|
| 51 | # Can a block collection start at the current position? A block |
|---|
| 52 | # collection may start: |
|---|
| 53 | # - at the beginning of the line (not counting spaces), |
|---|
| 54 | # - after the block sequence indicator '-'. |
|---|
| 55 | self.allow_block_collection = True |
|---|
| 56 | |
|---|
| 57 | # Can a simple key in flow context start at the current position? A |
|---|
| 58 | # simple key may start after the '{', '[', and ',' indicators. |
|---|
| 59 | self.allow_flow_simple_keys = False |
|---|
| 60 | |
|---|
| 61 | # Keep track of possible simple keys. This is a dictionary. The key |
|---|
| 62 | # is `flow_level`; there can be no more that one possible simple key |
|---|
| 63 | # for each level. The value is a record of |
|---|
| 64 | # (stream.index, stream.line, stream.column, token_number) |
|---|
| 65 | self.possible_simple_keys = {} |
|---|
| 66 | |
|---|
| 67 | # Public methods: |
|---|
| 68 | |
|---|
| 69 | def peek_token(self): |
|---|
| 70 | """Get the current token.""" |
|---|
| 71 | while self.need_more_tokens() |
|---|
| 72 | self.fetch_more_tokens() |
|---|
| 73 | if self.tokens: |
|---|
| 74 | return self.tokens[0] |
|---|
| 75 | |
|---|
| 76 | def get_token(self): |
|---|
| 77 | "Get the current token and remove it from the list.""" |
|---|
| 78 | while self.need_more_tokens(): |
|---|
| 79 | self.fetch_more_tokens() |
|---|
| 80 | if self.tokens: |
|---|
| 81 | self.tokens_taken += 1 |
|---|
| 82 | return self.tokens.pop(0) |
|---|
| 83 | |
|---|
| 84 | # Private methods: |
|---|
| 85 | |
|---|
| 86 | def need_more_tokens(self): |
|---|
| 87 | if self.done: |
|---|
| 88 | return False |
|---|
| 89 | if not self.tokens: |
|---|
| 90 | return True |
|---|
| 91 | # The current token may be a potential simple key, so we |
|---|
| 92 | # need to look further. |
|---|
| 93 | if self.next_possible_simple_key() == self.tokens_taken: |
|---|
| 94 | return True |
|---|
| 95 | |
|---|
| 96 | def fetch_more_tokens(self): |
|---|
| 97 | |
|---|
| 98 | # Eat whitespaces and comments until we reach the next token. |
|---|
| 99 | self.find_next_token() |
|---|
| 100 | |
|---|
| 101 | # Compare the current indentation and column. It may add some tokens |
|---|
| 102 | # and decrease the current indentation. |
|---|
| 103 | self.unwind_indent(self.stream.column) |
|---|
| 104 | |
|---|
| 105 | # Peek the next character. |
|---|
| 106 | ch = self.stream.peek() |
|---|
| 107 | |
|---|
| 108 | # Is it the end of stream? |
|---|
| 109 | if ch is None: |
|---|
| 110 | return self.fetch_end() |
|---|
| 111 | |
|---|
| 112 | # Is it a directive? |
|---|
| 113 | if ch == u'%' and self.check_directive(): |
|---|
| 114 | return self.fetch_directive() |
|---|
| 115 | |
|---|
| 116 | # Is it the document start? |
|---|
| 117 | if ch == u'-' and self.check_document_start(): |
|---|
| 118 | return self.fetch_document_start() |
|---|
| 119 | |
|---|
| 120 | # Is it the document end? |
|---|
| 121 | if ch == u'.' and self.check_document_end(): |
|---|
| 122 | return self.fetch_document_end() |
|---|
| 123 | |
|---|
| 124 | # Note: the order of the following checks is NOT significant. |
|---|
| 125 | |
|---|
| 126 | # Is it the sequence indicator? |
|---|
| 127 | if ch in u'-,' and self.check_entry(): |
|---|
| 128 | return self.fetch_entry() |
|---|
| 129 | |
|---|
| 130 | # Is it the flow sequence start indicator? |
|---|
| 131 | if ch == u'[': |
|---|
| 132 | return self.fetch_flow_sequence_start() |
|---|
| 133 | |
|---|
| 134 | # Is it the flow mapping start indicator? |
|---|
| 135 | if ch == u'{': |
|---|
| 136 | return self.fetch_flow_mapping_start() |
|---|
| 137 | |
|---|
| 138 | # Is it the flow sequence end indicator? |
|---|
| 139 | if ch == u']': |
|---|
| 140 | return self.fetch_flow_sequence_end() |
|---|
| 141 | |
|---|
| 142 | # Is it the flow mapping end indicator? |
|---|
| 143 | if ch == u'}': |
|---|
| 144 | return self.fetch_flow_mapping_end() |
|---|
| 145 | |
|---|
| 146 | # Is it the key indicator? |
|---|
| 147 | if ch == u'?' and self.check_key(): |
|---|
| 148 | return self.fetch_key() |
|---|
| 149 | |
|---|
| 150 | # Is it the value indicator? |
|---|
| 151 | if ch == u':' and self.check_value(): |
|---|
| 152 | return self.fetch_value() |
|---|
| 153 | |
|---|
| 154 | # Is it an alias? |
|---|
| 155 | if ch == u'*': |
|---|
| 156 | return self.fetch_alias() |
|---|
| 157 | |
|---|
| 158 | # Is it an anchor? |
|---|
| 159 | if ch == u'&': |
|---|
| 160 | return self.fetch_anchor() |
|---|
| 161 | |
|---|
| 162 | # Is is a tag? |
|---|
| 163 | if ch == u'!': |
|---|
| 164 | return self.fetch_tag() |
|---|
| 165 | |
|---|
| 166 | # Is is a literal scalar? |
|---|
| 167 | if ch == u'|': |
|---|
| 168 | return self.fetch_literal() |
|---|
| 169 | |
|---|
| 170 | # Is it a folded scalar? |
|---|
| 171 | if ch == u'>': |
|---|
| 172 | return self.fetch_folded() |
|---|
| 173 | |
|---|
| 174 | # Is it a single quoted scalar? |
|---|
| 175 | if ch == u'\'': |
|---|
| 176 | return self.fetch_single() |
|---|
| 177 | |
|---|
| 178 | # Is it a double quoted scalar? |
|---|
| 179 | if ch == u'\"': |
|---|
| 180 | return self.fetch_double() |
|---|
| 181 | |
|---|
| 182 | # It must be a plain scalar. |
|---|
| 183 | if self.check_plain(): |
|---|
| 184 | return self.fetch_plain() |
|---|
| 185 | |
|---|
| 186 | # No? It's an error then. Let's produce a nice error message. |
|---|
| 187 | self.invalid_token() |
|---|
| 188 | |
|---|
| 189 | def fetch_end(self): |
|---|
| 190 | |
|---|
| 191 | # Set the current intendation to -1. |
|---|
| 192 | self.unwind_indents(-1) |
|---|
| 193 | |
|---|
| 194 | # Reset everything (not really needed). |
|---|
| 195 | self.allow_block_collection = False |
|---|
| 196 | self.allow_flow_simple_keys = False |
|---|
| 197 | self.possible_simple_keys = {} |
|---|
| 198 | |
|---|
| 199 | # Add END. |
|---|
| 200 | marker = self.stream.get_marker() |
|---|
| 201 | self.tokens.append(EndToken(marker)) |
|---|
| 202 | |
|---|
| 203 | # The stream is ended. |
|---|
| 204 | self.done = True |
|---|
| 205 | |
|---|
| 206 | def check_directive(self): |
|---|
| 207 | |
|---|
| 208 | # Checking for |
|---|
| 209 | # /* The beginning of the line */ '%' |
|---|
| 210 | # The '%' indicator is already checked. |
|---|
| 211 | if self.stream.column == 0: |
|---|
| 212 | return True |
|---|
| 213 | |
|---|
| 214 | def check_document_start(self): |
|---|
| 215 | |
|---|
| 216 | # Checking for |
|---|
| 217 | # /* The beginning of the line */ '---' /* Space or EOL */ |
|---|
| 218 | if self.stream.column == 0: |
|---|
| 219 | prefix = self.stream.peek(4) |
|---|
| 220 | if prefix[:3] == u'---' and prefix[3] in u' \t\r\n\x85\u2028\u2029': |
|---|
| 221 | return True |
|---|
| 222 | |
|---|
| 223 | def fetch_document_start(self): |
|---|
| 224 | |
|---|
| 225 | # Set the current intendation to -1. |
|---|
| 226 | self.unwind_indents(-1) |
|---|
| 227 | |
|---|
| 228 | # No block collections after '---'. |
|---|
| 229 | self.allow_block_collection = False |
|---|
| 230 | |
|---|
| 231 | # No flow simple keys (not needed -- we are in the block context). |
|---|
| 232 | self.allow_flow_simple_keys = False |
|---|
| 233 | |
|---|
| 234 | # Reset possible simple keys (not needed -- EOL should have reset it). |
|---|
| 235 | self.possible_simple_keys = {} |
|---|
| 236 | |
|---|
| 237 | start_marker = self.stream.get_marker() |
|---|
| 238 | |
|---|
| 239 | # The characters are already checked, just move forward. |
|---|
| 240 | self.stream.read(3) |
|---|
| 241 | |
|---|
| 242 | end_marker = self.stream.get_marker() |
|---|
| 243 | |
|---|
| 244 | # Add DOCUMENT-START. |
|---|
| 245 | self.tokens.append(DocumentStartToken(start_marker, end_marker)) |
|---|
| 246 | |
|---|
| 247 | |
|---|
| 248 | def check_document_end(self): |
|---|
| 249 | if self.stream.column == 0: |
|---|
| 250 | prefix = self.stream.peek(4) |
|---|
| 251 | if prefix[:3] == u'...' and prefix[3] in u' \t\r\n\x85\u2028\u2029': |
|---|
| 252 | return True |
|---|
| 253 | |
|---|
| 254 | def fetch_document_end(self): |
|---|
| 255 | # The same code as `fetch_document_start`. |
|---|
| 256 | |
|---|
| 257 | # Set the current intendation to -1. |
|---|
| 258 | self.unwind_indents(-1) |
|---|
| 259 | |
|---|
| 260 | # Reset everything (not really needed). |
|---|
| 261 | self.allow_block_collection = False |
|---|
| 262 | self.allow_flow_simple_keys = False |
|---|
| 263 | self.possible_simple_keys = {} |
|---|
| 264 | |
|---|
| 265 | start_marker = self.stream.get_marker() |
|---|
| 266 | |
|---|
| 267 | # The characters are already checked, just move forward. |
|---|
| 268 | self.stream.read(3) |
|---|
| 269 | |
|---|
| 270 | end_marker = self.stream.get_marker() |
|---|
| 271 | |
|---|
| 272 | # Add DOCUMENT-END. |
|---|
| 273 | self.tokens.append(DocumentEndToken(start_marker, end_marker)) |
|---|
| 274 | |
|---|
| 275 | |
|---|
| 276 | |
|---|
| 277 | # Tokens: |
|---|
| 278 | # YAML_DIRECTIVE: ^ '%' YAML ' '+ (version: \d+ '.' \d+) s-l-comments |
|---|
| 279 | # TAG_DIRECTIVE: ^ % TAG ' '+ (handle: '!' (word-char* '!')? ) (prefix: uri-char+) s-l-comments |
|---|
| 280 | # RESERVED_DIRECTIVE: ^ '%' (directive-name: ns-char+) (' '+ (directive-parameter: ns-char+))* s-l-comments |
|---|
| 281 | # DOCUMENT_START: ^ '---' (' ' | b-any) |
|---|
| 282 | # DOCUMENT_END: ^ ... (' ' | b-any) |
|---|
| 283 | # TAG: '!' ( ('<' uri-char+ '>') | uri-char* ) (' ' | b-any) |
|---|
| 284 | # ANCHOR: '&' ns-char+ <-- bug |
|---|
| 285 | # ALIAS: * ns-char+ <-- bug |
|---|
| 286 | # ENTRY(block): '-' (' ' | b-any) |
|---|
| 287 | # KEY(block): '?' (' ' | b-any) |
|---|
| 288 | # VALUE(block): ':' (' ' | b-any) |
|---|
| 289 | # FLOW_SEQ_START: '[' |
|---|
| 290 | # FLOW_SEQ_END: ']' |
|---|
| 291 | # FLOW_MAP_START: '{' |
|---|
| 292 | # FLOW_MAP_END: '}' |
|---|
| 293 | # KEY(flow): '?' |
|---|
| 294 | # VALUE(flow): ':' |
|---|
| 295 | # ENTRY(flow): ',' |
|---|
| 296 | # PLAIN: (plain-char - indicator) | ([-?:] plain-char) ... <-- bugs |
|---|
| 297 | # DOUBLE_QUOTED: '"' ... |
|---|
| 298 | # SINGLE_QUOTED: ''' ... |
|---|
| 299 | # LITERAL: '|' ... |
|---|
| 300 | # FOLDED: '>' ... |
|---|
| 301 | # BLOCK_SEQ_START: indentation before '-'. |
|---|
| 302 | # BLOCK_MAP_START: indentation before '?' or a simple key. |
|---|
| 303 | # BLOCK_END: no indentation |
|---|
| 304 | # LINE: end of line |
|---|
| 305 | |
|---|
| 306 | # b-generic: \r \n | \r | \n | #x85 |
|---|
| 307 | # b-specific: #x2028 | #x2029 |
|---|
| 308 | # b-any: b-generic | b-specific |
|---|
| 309 | # hex-digit: [0-9A-Fa-f] |
|---|
| 310 | # word-char: [0-9A-Za-z-] |
|---|
| 311 | # uri-char: word-char | % hex-digit hex-digit | [;/?:@&=+$,_.!~*'()[]] |
|---|
| 312 | |
|---|
| 313 | # Production rules: |
|---|
| 314 | # stream :== implicit_document? explicit_document* END |
|---|
| 315 | # explicit_document :== DIRECTIVE* DOCUMENT_START block_node? DOCUMENT_END? |
|---|
| 316 | # implicit_document :== block_node DOCUMENT_END? |
|---|
| 317 | # block_node :== ALIAS | properties? block_content |
|---|
| 318 | # flow_node :== ALIAS | properties? flow_content |
|---|
| 319 | # properties :== TAG ANCHOR? | ANCHOR TAG? |
|---|
| 320 | # block_content :== block_collection | flow_collection | SCALAR |
|---|
| 321 | # flow_content :== flow_collection | SCALAR |
|---|
| 322 | # block_collection :== block_sequence | block_mapping |
|---|
| 323 | # block_sequence :== BLOCK_SEQ_START (ENTRY block_node?)* BLOCK_END |
|---|
| 324 | # block_mapping :== BLOCK_MAP_START ((KEY block_node_or_indentless_sequence?)? (VALUE block_node_or_indentless_sequence?)?)* BLOCK_END |
|---|
| 325 | # block_node_or_indentless_sequence :== ALIAS | properties? (block_content | indentless_block_sequence) |
|---|
| 326 | # indentless_block_sequence :== (ENTRY block_node?)+ |
|---|
| 327 | # flow_collection :== flow_sequence | flow_mapping |
|---|
| 328 | # flow_sequence :== FLOW_SEQ_START (flow_sequence_entry ENTRY)* flow_sequence_entry? FLOW_SEQ_END |
|---|
| 329 | # flow_sequence_entry :== flow_node | KEY flow_node (VALUE flow_node?)? |
|---|
| 330 | # flow_mapping :== FLOW_MAP_START flow_mapping_entry ENTRY)* flow_mapping_entry? FLOW_MAP_END |
|---|
| 331 | # flow_mapping_entry :== flow_node | KEY flow_node (VALUE flow_node?)? |
|---|
| 332 | |
|---|
| 333 | # FIRST(rule) sets: |
|---|
| 334 | # stream: {} |
|---|
| 335 | # explicit_document: { DIRECTIVE DOCUMENT_START } |
|---|
| 336 | # implicit_document: block_node |
|---|
| 337 | # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK_SEQ_START BLOCK_MAP_START FLOW_SEQ_START FLOW_MAP_START } |
|---|
| 338 | # flow_node: { ALIAS TAG ANCHOR SCALAR FLOW_SEQ_START FLOW_MAP_START } |
|---|
| 339 | # block_content: { BLOCK_SEQ_START BLOCK_MAP_START FLOW_SEQ_START FLOW_MAP_START SCALAR } |
|---|
| 340 | # flow_content: { FLOW_SEQ_START FLOW_MAP_START SCALAR } |
|---|
| 341 | # block_collection: { BLOCK_SEQ_START BLOCK_MAP_START } |
|---|
| 342 | # flow_collection: { FLOW_SEQ_START FLOW_MAP_START } |
|---|
| 343 | # block_sequence: { BLOCK_SEQ_START } |
|---|
| 344 | # block_mapping: { BLOCK_MAP_START } |
|---|
| 345 | # block_node_or_indentless_sequence: { ALIAS TAG ANCHOR SCALAR BLOCK_SEQ_START BLOCK_MAP_START FLOW_SEQ_START FLOW_MAP_START ENTRY } |
|---|
| 346 | # indentless_sequence: { ENTRY } |
|---|
| 347 | # flow_collection: { FLOW_SEQ_START FLOW_MAP_START } |
|---|
| 348 | # flow_sequence: { FLOW_SEQ_START } |
|---|
| 349 | # flow_mapping: { FLOW_MAP_START } |
|---|
| 350 | # flow_sequence_entry: { ALIAS TAG ANCHOR SCALAR FLOW_SEQ_START FLOW_MAP_START KEY } |
|---|
| 351 | # flow_mapping_entry: { ALIAS TAG ANCHOR SCALAR FLOW_SEQ_START FLOW_MAP_START KEY } |
|---|
| 352 | |
|---|
| 353 | class Marker(object): |
|---|
| 354 | |
|---|
| 355 | def __init__(self, source, data, index, length=0): |
|---|
| 356 | self.source = source |
|---|
| 357 | self.data = data |
|---|
| 358 | self.index = index |
|---|
| 359 | self.length = length |
|---|
| 360 | self._line = None |
|---|
| 361 | self._position = None |
|---|
| 362 | |
|---|
| 363 | def line(self): |
|---|
| 364 | if not self._line: |
|---|
| 365 | self._make_line_position() |
|---|
| 366 | return self._line |
|---|
| 367 | |
|---|
| 368 | def position(self): |
|---|
| 369 | if not self._position: |
|---|
| 370 | self._make_line_position() |
|---|
| 371 | return self._position |
|---|
| 372 | |
|---|
| 373 | def _make_line_position(self): |
|---|
| 374 | allow_block_collection = self.data.rfind('\n', 0, self.index)+1 |
|---|
| 375 | line_end = self.data.find('\n', self.index)+1 |
|---|
| 376 | if line_end == 0: |
|---|
| 377 | line_end = len(self.data) |
|---|
| 378 | self._line = (allow_block_collection, line_end) |
|---|
| 379 | row = self.data.count('\n', 0, allow_block_collection) |
|---|
| 380 | col = self.index-allow_block_collection |
|---|
| 381 | self._position = (row, col) |
|---|
| 382 | |
|---|
| 383 | class Error(Exception): |
|---|
| 384 | |
|---|
| 385 | def __init__(self, message=None, marker=None): |
|---|
| 386 | Exception.__init__(self) |
|---|
| 387 | self.message = message |
|---|
| 388 | self.marker = marker |
|---|
| 389 | |
|---|
| 390 | def __str__(self): |
|---|
| 391 | if self.marker is not None: |
|---|
| 392 | row, col = self.marker.position() |
|---|
| 393 | start, end = self.marker.line() |
|---|
| 394 | error_position = "source \"%s\", line %s, column %s:\n%s\n" \ |
|---|
| 395 | % (self.marker.source, row+1, col+1, self.marker.data[start:end].rstrip().encode('utf-8')) |
|---|
| 396 | error_pointer = " " * col + "^\n" |
|---|
| 397 | else: |
|---|
| 398 | error_position = "" |
|---|
| 399 | error_pointer = "" |
|---|
| 400 | if self.message is not None: |
|---|
| 401 | error_message = self.message |
|---|
| 402 | else: |
|---|
| 403 | error_message = "YAML error" |
|---|
| 404 | return error_position+error_pointer+error_message |
|---|
| 405 | |
|---|
| 406 | class _Scanner: |
|---|
| 407 | |
|---|
| 408 | def scan(self, source, data): |
|---|
| 409 | self.source = source |
|---|
| 410 | self.data = data |
|---|
| 411 | self.flow_level = 0 |
|---|
| 412 | self.indents = [] |
|---|
| 413 | self.indent = -1 |
|---|
| 414 | self.index = 0 |
|---|
| 415 | self.line = 0 |
|---|
| 416 | self.column = 0 |
|---|
| 417 | self.allow_block_collection = True |
|---|
| 418 | self.guess_simple_key = False |
|---|
| 419 | self.guess_simple_key_token = None |
|---|
| 420 | self.guess_simple_key_indent = None |
|---|
| 421 | self.allow_flow_key = False |
|---|
| 422 | self.guess_flow_key_levels = [] |
|---|
| 423 | self.guess_flow_key_tokens = [] |
|---|
| 424 | self.tokens = [] |
|---|
| 425 | while self.eat_ignored() or self.fetch_token(): |
|---|
| 426 | pass |
|---|
| 427 | return self.tokens |
|---|
| 428 | |
|---|
| 429 | def eat_ignored(self): |
|---|
| 430 | result = False |
|---|
| 431 | while self.eat_ignored_spaces() or self.eat_ignored_comment() or self.eat_ignored_newline(): |
|---|
| 432 | result = True |
|---|
| 433 | return result |
|---|
| 434 | |
|---|
| 435 | def eat_ignored_spaces(self): |
|---|
| 436 | result = False |
|---|
| 437 | while self.index < len(self.data) and self.data[self.index] == ' ': |
|---|
| 438 | self.index += 1 |
|---|
| 439 | self.column += 1 |
|---|
| 440 | result = True |
|---|
| 441 | return result |
|---|
| 442 | |
|---|
| 443 | def eat_ignored_comment(self): |
|---|
| 444 | if self.index < len(self.data) and self.data[self.index] == '#': |
|---|
| 445 | self.eat_line() |
|---|
| 446 | return False |
|---|
| 447 | |
|---|
| 448 | def eat_line(self): |
|---|
| 449 | result = False |
|---|
| 450 | while self.index < len(self.data) and self.data[self.index] not in '\r\n': |
|---|
| 451 | self.index += 1 |
|---|
| 452 | self.column += 1 |
|---|
| 453 | result = True |
|---|
| 454 | return result |
|---|
| 455 | |
|---|
| 456 | def eat_ignored_newline(self): |
|---|
| 457 | if self.index < len(self.data) and self.data[self.index] in '\r\n': |
|---|
| 458 | if self.data[self.index:self.index+2] == '\r\n': |
|---|
| 459 | self.index += 2 |
|---|
| 460 | else: |
|---|
| 461 | self.index += 1 |
|---|
| 462 | self.line += 1 |
|---|
| 463 | self.column = 0 |
|---|
| 464 | self.allow_block_collection = True |
|---|
| 465 | return True |
|---|
| 466 | return False |
|---|
| 467 | |
|---|
| 468 | def eat_ns(self): |
|---|
| 469 | result = False |
|---|
| 470 | while self.index < len(self.data) and self.data[self.index] not in ' \t\r\n': |
|---|
| 471 | self.index += 1 |
|---|
| 472 | self.column += 1 |
|---|
| 473 | result = True |
|---|
| 474 | return result |
|---|
| 475 | |
|---|
| 476 | def eat_indent(self, indent=0): |
|---|
| 477 | if indent < self.indent: |
|---|
| 478 | indent = self.indent |
|---|
| 479 | if self.column != 0: |
|---|
| 480 | return False |
|---|
| 481 | count = 0 |
|---|
| 482 | while self.index < len(self.data) and self.data[self.index] == ' ' and count < indent: |
|---|
| 483 | self.index += 1 |
|---|
| 484 | self.column += 1 |
|---|
| 485 | count += 1 |
|---|
| 486 | return count == indent |
|---|
| 487 | |
|---|
| 488 | def eat_double_quoted(self): |
|---|
| 489 | if self.index < len(self.data) and self.data[self.index] == '"': |
|---|
| 490 | self.index += 1 |
|---|
| 491 | self.column += 1 |
|---|
| 492 | while self.index < len(self.data) and self.data[self.index] != '"': |
|---|
| 493 | if self.data[self.index:self.index+2] in ['\\\\', '\\"']: |
|---|
| 494 | self.index += 2 |
|---|
| 495 | self.column += 2 |
|---|
| 496 | elif self.data[self.index] in '\r\n': |
|---|
| 497 | self.eat_ignored_newline() |
|---|
| 498 | if not self.eat_indent(1): |
|---|
| 499 | self.error("Invalid indentation") |
|---|
| 500 | else: |
|---|
| 501 | self.index += 1 |
|---|
| 502 | self.column += 1 |
|---|
| 503 | if self.index < len(self.data) and self.data[self.index] == '"': |
|---|
| 504 | self.index += 1 |
|---|
| 505 | self.column += 1 |
|---|
| 506 | return True |
|---|
| 507 | else: |
|---|
| 508 | self.error("unclosed double quoted scalar") |
|---|
| 509 | else: |
|---|
| 510 | return False |
|---|
| 511 | |
|---|
| 512 | def eat_single_quoted(self): |
|---|
| 513 | if self.index < len(self.data) and self.data[self.index] == '\'': |
|---|
| 514 | self.index += 1 |
|---|
| 515 | self.column += 1 |
|---|
| 516 | while self.index < len(self.data) and \ |
|---|
| 517 | (self.data[self.index] != '\'' or self.data[self.index:self.index+2] == '\'\''): |
|---|
| 518 | if self.data[self.index:self.index+2] == '\'\'': |
|---|
| 519 | self.index += 2 |
|---|
| 520 | self.column += 2 |
|---|
| 521 | elif self.data[self.index] in '\r\n': |
|---|
| 522 | self.eat_ignored_newline() |
|---|
| 523 | if not self.eat_indent(1): |
|---|
| 524 | self.error("Invalid indentation") |
|---|
| 525 | else: |
|---|
| 526 | self.index += 1 |
|---|
| 527 | self.column += 1 |
|---|
| 528 | if self.index < len(self.data) and self.data[self.index] == '\'': |
|---|
| 529 | self.index += 1 |
|---|
| 530 | self.column += 1 |
|---|
| 531 | return True |
|---|
| 532 | else: |
|---|
| 533 | self.error("unclosed single quoted scalar") |
|---|
| 534 | else: |
|---|
| 535 | return False |
|---|
| 536 | |
|---|
| 537 | def eat_folded(self): |
|---|
| 538 | self.eat_block_scalar() |
|---|
| 539 | |
|---|
| 540 | def eat_literal(self): |
|---|
| 541 | self.eat_block_scalar() |
|---|
| 542 | |
|---|
| 543 | def eat_block_scalar(self): |
|---|
| 544 | if self.index < len(self.data) and self.data[self.index] in '>|': |
|---|
| 545 | self.eat_line() |
|---|
| 546 | if not self.eat_ignored_newline(): |
|---|
| 547 | return True |
|---|
| 548 | indent = self.indent+1 |
|---|
| 549 | if indent < 1: |
|---|
| 550 | indent = 1 |
|---|
| 551 | while (self.eat_indent(indent) and ((self.eat_line() and self.eat_ignored_newline()) or (self.eat_ignored_newline()))) or \ |
|---|
| 552 | (self.eat_ignored_comment() and self.eat_ignored_newline()) or \ |
|---|
| 553 | self.eat_ignored_newline(): |
|---|
| 554 | pass |
|---|
| 555 | return True |
|---|
| 556 | return False |
|---|
| 557 | |
|---|
| 558 | def eat_block_plain(self): |
|---|
| 559 | return self.eat_plain(block=True) |
|---|
| 560 | |
|---|
| 561 | def eat_flow_plain(self): |
|---|
| 562 | return self.eat_plain(block=False) |
|---|
| 563 | |
|---|
| 564 | def eat_plain(self, block): |
|---|
| 565 | indent = self.indent+1 |
|---|
| 566 | if indent < 1: |
|---|
| 567 | indent = 1 |
|---|
| 568 | if self.index < len(self.data): |
|---|
| 569 | if self.data[self.index] not in ' \t\r\n-?:,[]{}#&*!|>\'"%@`' or \ |
|---|
| 570 | (block and self.data[self.index] == '-' and self.data[self.index:self.index+2] not in ['-', '- ', '-\r', '-\n']) or \ |
|---|
| 571 | (block and self.data[self.index] == '?' and self.data[self.index:self.index+2] not in ['?', '? ', '?\r', '?\n']) or \ |
|---|
| 572 | (block and self.data[self.index] == ':' and self.data[self.index:self.index+2] not in [':', ': ', ':\r', ':\n']): |
|---|
| 573 | if block and self.allow_block_collection: |
|---|
| 574 | self.guessing_simple_key() |
|---|
| 575 | if self.flow_level and self.allow_flow_key: |
|---|
| 576 | self.guess_flow_key_levels.append(self.flow_level) |
|---|
| 577 | self.guess_flow_key_tokens.append(len(self.tokens)) |
|---|
| 578 | self.allow_flow_key = False |
|---|
| 579 | self.index += 1 |
|---|
| 580 | self.column += 1 |
|---|
| 581 | space = False |
|---|
| 582 | while True: |
|---|
| 583 | self.eat_ignored_spaces() |
|---|
| 584 | while self.index < len(self.data) and ( |
|---|
| 585 | self.data[self.index] not in '\r\n?:,[]{}#' or |
|---|
| 586 | (not space and self.data[self.index] == '#') or |
|---|
| 587 | (block and self.data[self.index] in '?,[]{}') or |
|---|
| 588 | (block and self.data[self.index] == ':' and self.data[self.index:self.index+2] not in [':', ': ', ':\r', ':\n'])): |
|---|
| 589 | space = self.data[self.index] not in ' \t' |
|---|
| 590 | self.index += 1 |
|---|
| 591 | self.column += 1 |
|---|
| 592 | self.allow_block_collection = False |
|---|
| 593 | if not (self.eat_ignored_newline() and self.eat_indent(indent)): |
|---|
| 594 | break |
|---|
| 595 | space = True |
|---|
| 596 | return True |
|---|
| 597 | return False |
|---|
| 598 | |
|---|
| 599 | def no_simple_key(self): |
|---|
| 600 | self.guess_simple_key = False |
|---|
| 601 | self.guess_simple_key_token = None |
|---|
| 602 | self.guess_simple_key_indent = None |
|---|
| 603 | |
|---|
| 604 | def guessing_simple_key(self): |
|---|
| 605 | self.guess_simple_key = True |
|---|
| 606 | self.guess_simple_key_token = len(self.tokens) |
|---|
| 607 | self.guess_simple_key_indent = self.column |
|---|
| 608 | |
|---|
| 609 | def unwind_indents(self, level): |
|---|
| 610 | while self.indent > level: |
|---|
| 611 | if self.flow_level: |
|---|
| 612 | self.error("Invalid indentation") |
|---|
| 613 | self.tokens.append('BLOCK_END') |
|---|
| 614 | self.indent = self.indents.pop() |
|---|
| 615 | self.no_simple_key() |
|---|
| 616 | |
|---|
| 617 | def fetch_token(self): |
|---|
| 618 | self.unwind_indents(self.column) |
|---|
| 619 | if self.index < len(self.data): |
|---|
| 620 | if self.column == 0: |
|---|
| 621 | if self.data[self.index] == '%': |
|---|
| 622 | self.tokens.append('DIRECTIVE') |
|---|
| 623 | self.eat_line() |
|---|
| 624 | self.no_simple_key() |
|---|
| 625 | return True |
|---|
| 626 | if self.data[self.index:self.index+3] == '---' and \ |
|---|
| 627 | (not self.data[self.index+3:self.index+4] or self.data[self.index+3:self.index+4] in ' \r\n'): |
|---|
| 628 | self.unwind_indents(-1) |
|---|
| 629 | self.tokens.append('DOCUMENT_START') |
|---|
| 630 | self.index += 3 |
|---|
| 631 | self.column += 3 |
|---|
| 632 | self.allow_block_collection = False |
|---|
| 633 | self.allow_flow_key = False |
|---|
| 634 | self.guess_flow_keys = [] |
|---|
| 635 | self.no_simple_key() |
|---|
| 636 | return True |
|---|
| 637 | if self.data[self.index:self.index+3] == '...' and \ |
|---|
| 638 | (not self.data[self.index+3:self.index+4] or self.data[self.index+3:self.index+4] in ' \r\n'): |
|---|
| 639 | self.unwind_indents(-1) |
|---|
| 640 | self.tokens.append('DOCUMENT_END') |
|---|
| 641 | self.index += 3 |
|---|
| 642 | self.column += 3 |
|---|
| 643 | self.allow_block_collection = False |
|---|
| 644 | self.allow_flow_key = False |
|---|
| 645 | self.guess_flow_keys = [] |
|---|
| 646 | self.no_simple_key() |
|---|
| 647 | return True |
|---|
| 648 | if self.data[self.index] in '[]{}': |
|---|
| 649 | if self.data[self.index] == '[': |
|---|
| 650 | self.flow_level += 1 |
|---|
| 651 | self.allow_flow_key = True |
|---|
| 652 | self.tokens.append('FLOW_SEQ_START') |
|---|
| 653 | elif self.data[self.index] == '{': |
|---|
| 654 | self.flow_level += 1 |
|---|
| 655 | self.allow_flow_key = True |
|---|
| 656 | self.tokens.append('FLOW_MAP_START') |
|---|
| 657 | elif self.data[self.index] == ']': |
|---|
| 658 | if not self.flow_level: |
|---|
| 659 | self.error("Extra ]") |
|---|
| 660 | self.flow_level -= 1 |
|---|
| 661 | self.allow_flow_key = False |
|---|
| 662 | self.tokens.append('FLOW_SEQ_END') |
|---|
| 663 | else: |
|---|
| 664 | if not self.flow_level: |
|---|
| 665 | self.error("Extra }") |
|---|
| 666 | self.flow_level -= 1 |
|---|
| 667 | self.allow_flow_key = False |
|---|
| 668 | self.tokens.append('FLOW_MAP_END') |
|---|
| 669 | while self.guess_flow_key_levels and self.guess_flow_key_levels[-1] > self.flow_level: |
|---|
| 670 | self.guess_flow_key_levels.pop() |
|---|
| 671 | self.guess_flow_key_tokens.pop() |
|---|
| 672 | self.index += 1 |
|---|
| 673 | self.column += 1 |
|---|
| 674 | self.allow_block_collection = False |
|---|
| 675 | return True |
|---|
| 676 | if self.data[self.index] in '!&*': |
|---|
| 677 | if self.flow_level and self.allow_flow_key: |
|---|
| 678 | self.guess_flow_key_levels.append(self.flow_level) |
|---|
| 679 | self.guess_flow_key_tokens.append(len(self.tokens)) |
|---|
| 680 | if not self.flow_level and self.allow_block_collection: |
|---|
| 681 | self.guessing_simple_key() |
|---|
| 682 | if self.data[self.index] == '!': |
|---|
| 683 | self.tokens.append('TAG') |
|---|
| 684 | elif self.data[self.index] == '&': |
|---|
| 685 | self.tokens.append('ANCHOR') |
|---|
| 686 | else: |
|---|
| 687 | self.tokens.append('ALIAS') |
|---|
| 688 | self.eat_ns() |
|---|
| 689 | self.allow_flow_key = False |
|---|
| 690 | self.allow_block_collection = False |
|---|
| 691 | return True |
|---|
| 692 | if self.data[self.index] == '"': |
|---|
| 693 | if self.flow_level and self.allow_flow_key: |
|---|
| 694 | self.guess_flow_key_levels.append(self.flow_level) |
|---|
| 695 | self.guess_flow_key_tokens.append(len(self.tokens)) |
|---|
| 696 | if not self.flow_level and self.allow_block_collection: |
|---|
| 697 | self.guessing_simple_key() |
|---|
| 698 | self.tokens.append('SCALAR') |
|---|
| 699 | self.eat_double_quoted() |
|---|
| 700 | self.allow_flow_key = False |
|---|
| 701 | self.allow_block_collection = False |
|---|
| 702 | return True |
|---|
| 703 | if self.data[self.index] == '\'': |
|---|
| 704 | if self.flow_level and self.allow_flow_key: |
|---|
| 705 | self.guess_flow_key_levels.append(self.flow_level) |
|---|
| 706 | self.guess_flow_key_tokens.append(len(self.tokens)) |
|---|
| 707 | if not self.flow_level and self.allow_block_collection: |
|---|
| 708 | self.guessing_simple_key() |
|---|
| 709 | self.tokens.append('SCALAR') |
|---|
| 710 | self.eat_single_quoted() |
|---|
| 711 | self.allow_flow_key = False |
|---|
| 712 | self.allow_block_collection = False |
|---|
| 713 | return True |
|---|
| 714 | if not self.flow_level: |
|---|
| 715 | if self.data[self.index] in '-?:' and \ |
|---|
| 716 | (not self.data[self.index+1:self.index+2] or self.data[self.index+1:self.index+2] in ' \r\n'): |
|---|
| 717 | if self.guess_simple_key and self.data[self.index] == ':': |
|---|
| 718 | self.tokens.insert(self.guess_simple_key_token, 'KEY') |
|---|
| 719 | if self.guess_simple_key_indent > self.indent: |
|---|
| 720 | self.indents.append(self.indent) |
|---|
| 721 | self.indent = self.guess_simple_key_indent |
|---|
| 722 | self.tokens.insert(self.guess_simple_key_token, 'BLOCK_MAP_START') |
|---|
| 723 | self.tokens.append('VALUE') |
|---|
| 724 | self.no_simple_key() |
|---|
| 725 | self.index += 1 |
|---|
| 726 | self.column += 1 |
|---|
| 727 | self.allow_block_collection = False |
|---|
| 728 | return True |
|---|
| 729 | else: |
|---|
| 730 | if not self.allow_block_collection: |
|---|
| 731 | self.error("Block collection should start at the beginning of the line") |
|---|
| 732 | if self.column > self.indent: |
|---|
| 733 | self.indents.append(self.indent) |
|---|
| 734 | self.indent = self.column |
|---|
| 735 | if self.data[self.index] == '-': |
|---|
| 736 | self.tokens.append('BLOCK_SEQ_START') |
|---|
| 737 | else: |
|---|
| 738 | self.tokens.append('BLOCK_MAP_START') |
|---|
| 739 | if self.data[self.index] == '-': |
|---|
| 740 | self.tokens.append('ENTRY') |
|---|
| 741 | elif self.data[self.index] == '?': |
|---|
| 742 | self.tokens.append('KEY') |
|---|
| 743 | else: |
|---|
| 744 | self.tokens.append('VALUE') |
|---|
| 745 | self.index += 1 |
|---|
| 746 | self.column += 1 |
|---|
| 747 | #self.allow_block_collection = False |
|---|
| 748 | self.allow_block_collection = True |
|---|
| 749 | self.no_simple_key() |
|---|
| 750 | return True |
|---|
| 751 | if self.data[self.index] == '>': |
|---|
| 752 | self.no_simple_key() |
|---|
| 753 | self.tokens.append('SCALAR') |
|---|
| 754 | self.eat_folded() |
|---|
| 755 | self.allow_block_collection = True |
|---|
| 756 | return True |
|---|
| 757 | if self.data[self.index] == '|': |
|---|
| 758 | self.no_simple_key() |
|---|
| 759 | self.tokens.append('SCALAR') |
|---|
| 760 | self.eat_literal() |
|---|
| 761 | self.allow_block_collection = True |
|---|
| 762 | return True |
|---|
| 763 | if self.eat_block_plain(): |
|---|
| 764 | self.tokens.append('SCALAR') |
|---|
| 765 | return True |
|---|
| 766 | else: |
|---|
| 767 | if self.data[self.index] in ',?:': |
|---|
| 768 | if self.data[self.index] == ',': |
|---|
| 769 | self.tokens.append('ENTRY') |
|---|
| 770 | while self.guess_flow_key_levels and self.guess_flow_key_levels[-1] >= self.flow_level: |
|---|
| 771 | self.guess_flow_key_levels.pop() |
|---|
| 772 | self.guess_flow_key_tokens.pop() |
|---|
| 773 | self.allow_flow_key = True |
|---|
| 774 | elif self.data[self.index] == '?': |
|---|
| 775 | self.tokens.append('KEY') |
|---|
| 776 | while self.guess_flow_key_levels and self.guess_flow_key_levels[-1] >= self.flow_level: |
|---|
| 777 | self.guess_flow_key_levels.pop() |
|---|
| 778 | self.guess_flow_key_tokens.pop() |
|---|
| 779 | self.allow_flow_key = False |
|---|
| 780 | else: |
|---|
| 781 | self.tokens.append('VALUE') |
|---|
| 782 | if self.guess_flow_key_levels and self.guess_flow_key_levels[-1] == self.flow_level: |
|---|
| 783 | self.guess_flow_key_levels.pop() |
|---|
| 784 | index = self.guess_flow_key_tokens.pop() |
|---|
| 785 | self.tokens.insert(index, 'KEY') |
|---|
| 786 | self.allow_flow_key =False |
|---|
| 787 | self.index += 1 |
|---|
| 788 | self.column += 1 |
|---|
| 789 | return True |
|---|
| 790 | if self.eat_flow_plain(): |
|---|
| 791 | self.tokens.append('SCALAR') |
|---|
| 792 | return True |
|---|
| 793 | self.error("Invalid token") |
|---|
| 794 | else: |
|---|
| 795 | self.unwind_indents(-1) |
|---|
| 796 | |
|---|
| 797 | def error(self, message): |
|---|
| 798 | raise Error(message, Marker(self.source, self.data, self.index)) |
|---|
| 799 | |
|---|
| 800 | class Parser: |
|---|
| 801 | |
|---|
| 802 | def parse(self, source, data): |
|---|
| 803 | scanner = Scanner() |
|---|
| 804 | self.tokens = scanner.scan(source, data) |
|---|
| 805 | self.tokens.append('END') |
|---|
| 806 | documents = self.parse_stream() |
|---|
| 807 | if len(documents) == 1: |
|---|
| 808 | return documents[0] |
|---|
| 809 | return documents |
|---|
| 810 | |
|---|
| 811 | def parse_stream(self): |
|---|
| 812 | documents = [] |
|---|
| 813 | if self.tokens[0] not in ['DIRECTIVE', 'DOCUMENT_START', 'END']: |
|---|
| 814 | documents.append(self.parse_block_node()) |
|---|
| 815 | while self.tokens[0] != 'END': |
|---|
| 816 | while self.tokens[0] == 'DIRECTIVE': |
|---|
| 817 | self.tokens.pop(0) |
|---|
| 818 | if self.tokens[0] != 'DOCUMENT_START': |
|---|
| 819 | self.error('DOCUMENT_START is expected') |
|---|
| 820 | self.tokens.pop(0) |
|---|
| 821 | if self.tokens[0] in ['DIRECTIVE', 'DOCUMENT_START', 'DOCUMENT_END', 'END']: |
|---|
| 822 | documents.append(None) |
|---|
| 823 | else: |
|---|
| 824 | documents.append(self.parse_block_node()) |
|---|
| 825 | while self.tokens[0] == 'DOCUMENT_END': |
|---|
| 826 | self.tokens.pop(0) |
|---|
| 827 | if self.tokens[0] != 'END': |
|---|
| 828 | self.error("END is expected") |
|---|
| 829 | return tuple(documents) |
|---|
| 830 | |
|---|
| 831 | def parse_block_node(self): |
|---|
| 832 | if self.tokens[0] == 'ALIAS': |
|---|
| 833 | self.tokens.pop(0) |
|---|
| 834 | return '*' |
|---|
| 835 | if self.tokens[0] == 'TAG': |
|---|
| 836 | self.tokens.pop(0) |
|---|
| 837 | if self.tokens[0] == 'ANCHOR': |
|---|
| 838 | self.tokens.pop(0) |
|---|
| 839 | elif self.tokens[0] == 'ANCHOR': |
|---|
| 840 | self.tokens.pop(0) |
|---|
| 841 | if self.tokens[0] == 'TAG': |
|---|
| 842 | self.tokens.pop(0) |
|---|
| 843 | return self.parse_block_content() |
|---|
| 844 | |
|---|
| 845 | def parse_flow_node(self): |
|---|
| 846 | if self.tokens[0] == 'ALIAS': |
|---|
| 847 | self.tokens.pop(0) |
|---|
| 848 | return '*' |
|---|
| 849 | if self.tokens[0] == 'TAG': |
|---|
| 850 | self.tokens.pop(0) |
|---|
| 851 | if self.tokens[0] == 'ANCHOR': |
|---|
| 852 | self.tokens.pop(0) |
|---|
| 853 | elif self.tokens[0] == 'ANCHOR': |
|---|
| 854 | self.tokens.pop(0) |
|---|
| 855 | if self.tokens[0] == 'TAG': |
|---|
| 856 | self.tokens.pop(0) |
|---|
| 857 | return self.parse_flow_content() |
|---|
| 858 | |
|---|
| 859 | def parse_block_node_or_indentless_sequence(self): |
|---|
| 860 | if self.tokens[0] == 'ALIAS': |
|---|
| 861 | self.tokens.pop(0) |
|---|
| 862 | return '*' |
|---|
| 863 | if self.tokens[0] == 'TAG': |
|---|
| 864 | self.tokens.pop(0) |
|---|
| 865 | if self.tokens[0] == 'ANCHOR': |
|---|
| 866 | self.tokens.pop(0) |
|---|
| 867 | elif self.tokens[0] == 'ANCHOR': |
|---|
| 868 | self.tokens.pop(0) |
|---|
| 869 | if self.tokens[0] == 'TAG': |
|---|
| 870 | self.tokens.pop(0) |
|---|
| 871 | if self.tokens[0] == 'ENTRY': |
|---|
| 872 | return self.parse_indentless_sequence(self) |
|---|
| 873 | return self.parse_block_content() |
|---|
| 874 | |
|---|
| 875 | def parse_block_content(self): |
|---|
| 876 | if self.tokens[0] == 'SCALAR': |
|---|
| 877 | self.tokens.pop(0) |
|---|
| 878 | return True |
|---|
| 879 | elif self.tokens[0] == 'BLOCK_SEQ_START': |
|---|
| 880 | return self.parse_block_sequence() |
|---|
| 881 | elif self.tokens[0] == 'BLOCK_MAP_START': |
|---|
| 882 | return self.parse_block_mapping() |
|---|
| 883 | elif self.tokens[0] == 'FLOW_SEQ_START': |
|---|
| 884 | return self.parse_flow_sequence() |
|---|
| 885 | elif self.tokens[0] == 'FLOW_MAP_START': |
|---|
| 886 | return self.parse_flow_mapping() |
|---|
| 887 | else: |
|---|
| 888 | self.error('block content is expected') |
|---|
| 889 | |
|---|
| 890 | def parse_flow_content(self): |
|---|
| 891 | if self.tokens[0] == 'SCALAR': |
|---|
| 892 | self.tokens.pop(0) |
|---|
| 893 | return True |
|---|
| 894 | elif self.tokens[0] == 'FLOW_SEQ_START': |
|---|
| 895 | return self.parse_flow_sequence() |
|---|
| 896 | elif self.tokens[0] == 'FLOW_MAP_START': |
|---|
| 897 | return self.parse_flow_mapping() |
|---|
| 898 | else: |
|---|
| 899 | self.error('flow content is expected') |
|---|
| 900 | |
|---|
| 901 | def parse_block_sequence(self): |
|---|
| 902 | sequence = [] |
|---|
| 903 | if self.tokens[0] != 'BLOCK_SEQ_START': |
|---|
| 904 | self.error('BLOCK_SEQ_START is expected') |
|---|
| 905 | self.tokens.pop(0) |
|---|
| 906 | while self.tokens[0] == 'ENTRY': |
|---|
| 907 | self.tokens.pop(0) |
|---|
| 908 | if self.tokens[0] not in ['ENTRY', 'BLOCK_END']: |
|---|
| 909 | sequence.append(self.parse_block_node()) |
|---|
| 910 | else: |
|---|
| 911 | sequence.append(None) |
|---|
| 912 | if self.tokens[0] != 'BLOCK_END': |
|---|
| 913 | self.error('BLOCK_END is expected') |
|---|
| 914 | self.tokens.pop(0) |
|---|
| 915 | return sequence |
|---|
| 916 | |
|---|
| 917 | def parse_indentless_sequence(self): |
|---|
| 918 | sequence = [] |
|---|
| 919 | while self.tokens[0] == 'ENTRY': |
|---|
| 920 | self.tokens.pop(0) |
|---|
| 921 | if self.tokens[0] not in ['ENTRY']: |
|---|
| 922 | sequence.append(self.parse_block_node()) |
|---|
| 923 | else: |
|---|
| 924 | sequence.append(None) |
|---|
| 925 | return sequence |
|---|
| 926 | |
|---|
| 927 | def parse_block_mapping(self): |
|---|
| 928 | mapping = [] |
|---|
| 929 | if self.tokens[0] != 'BLOCK_MAP_START': |
|---|
| 930 | self.error('BLOCK_MAP_START is expected') |
|---|
| 931 | self.tokens.pop(0) |
|---|
| 932 | while self.tokens[0] in ['KEY', 'VALUE']: |
|---|
| 933 | key = None |
|---|
| 934 | value = None |
|---|
| 935 | if self.tokens[0] == 'KEY': |
|---|
| 936 | self.tokens.pop(0) |
|---|
| 937 | if self.tokens[0] not in ['KEY', 'VALUE', 'BLOCK_END']: |
|---|
| 938 | key = self.parse_block_node_or_indentless_sequence() |
|---|
| 939 | if self.tokens[0] == 'VALUE': |
|---|
| 940 | self.tokens.pop(0) |
|---|
| 941 | if self.tokens[0] not in ['KEY', 'VALUE', 'BLOCK_END']: |
|---|
| 942 | value = self.parse_block_node_or_indentless_sequence() |
|---|
| 943 | mapping.append((key, value)) |
|---|
| 944 | if self.tokens[0] != 'BLOCK_END': |
|---|
| 945 | self.error('BLOCK_END is expected') |
|---|
| 946 | self.tokens.pop(0) |
|---|
| 947 | return mapping |
|---|
| 948 | |
|---|
| 949 | def parse_flow_sequence(self): |
|---|
| 950 | sequence = [] |
|---|
| 951 | if self.tokens[0] != 'FLOW_SEQ_START': |
|---|
| 952 | self.error('FLOW_SEQ_START is expected') |
|---|
| 953 | self.tokens.pop(0) |
|---|
| 954 | while self.tokens[0] != 'FLOW_SEQ_END': |
|---|
| 955 | if self.tokens[0] == 'KEY': |
|---|
| 956 | self.tokens.pop(0) |
|---|
| 957 | key = None |
|---|
| 958 | value = None |
|---|
| 959 | if self.tokens[0] != 'VALUE': |
|---|
| 960 | key = self.parse_flow_node() |
|---|
| 961 | if self.tokens[0] == 'VALUE': |
|---|
| 962 | self.tokens.pop(0) |
|---|
| 963 | if self.tokens[0] not in ['ENTRY', 'FLOW_SEQ_END']: |
|---|
| 964 | value = self.parse_flow_node() |
|---|
| 965 | sequence.append([(key, value)]) |
|---|
| 966 | else: |
|---|
| 967 | sequence.append(self.parse_flow_node()) |
|---|
| 968 | if self.tokens[0] not in ['ENTRY', 'FLOW_SEQ_END']: |
|---|
| 969 | self.error("ENTRY or FLOW_SEQ_END is expected") |
|---|
| 970 | if self.tokens[0] == 'ENTRY': |
|---|
| 971 | self.tokens.pop(0) |
|---|
| 972 | if self.tokens[0] != 'FLOW_SEQ_END': |
|---|
| 973 | self.error('FLOW_SEQ_END is expected') |
|---|
| 974 | self.tokens.pop(0) |
|---|
| 975 | return sequence |
|---|
| 976 | |
|---|
| 977 | def parse_flow_mapping(self): |
|---|
| 978 | mapping = [] |
|---|
| 979 | if self.tokens[0] != 'FLOW_MAP_START': |
|---|
| 980 | self.error('FLOW_MAP_START is expected') |
|---|
| 981 | self.tokens.pop(0) |
|---|
| 982 | while self.tokens[0] != 'FLOW_MAP_END': |
|---|
| 983 | if self.tokens[0] == 'KEY': |
|---|
| 984 | self.tokens.pop(0) |
|---|
| 985 | key = None |
|---|
| 986 | value = None |
|---|
| 987 | if self.tokens[0] != 'VALUE': |
|---|
| 988 | key = self.parse_flow_node() |
|---|
| 989 | if self.tokens[0] == 'VALUE': |
|---|
| 990 | self.tokens.pop(0) |
|---|
| 991 | if self.tokens[0] not in ['ENTRY', 'FLOW_MAP_END']: |
|---|
| 992 | value = self.parse_flow_node() |
|---|
| 993 | mapping.append((key, value)) |
|---|
| 994 | else: |
|---|
| 995 | mapping.append((self.parse_flow_node(), None)) |
|---|
| 996 | if self.tokens[0] not in ['ENTRY', 'FLOW_MAP_END']: |
|---|
| 997 | self.error("ENTRY or FLOW_MAP_END is expected") |
|---|
| 998 | if self.tokens[0] == 'ENTRY': |
|---|
| 999 | self.tokens.pop(0) |
|---|
| 1000 | if self.tokens[0] != 'FLOW_MAP_END': |
|---|
| 1001 | self.error('FLOW_MAP_END is expected') |
|---|
| 1002 | self.tokens.pop(0) |
|---|
| 1003 | return mapping |
|---|
| 1004 | |
|---|
| 1005 | def error(self, message): |
|---|
| 1006 | raise Error(message+': '+str(self.tokens)) |
|---|
| 1007 | |
|---|