| [37] | 1 | # Tokens: |
|---|
| 2 | # YAML_DIRECTIVE: ^ '%' YAML ' '+ (version: \d+ '.' \d+) s-l-comments |
|---|
| 3 | # TAG_DIRECTIVE: ^ % TAG ' '+ (handle: '!' (word-char* '!')? ) (prefix: uri-char+) s-l-comments |
|---|
| 4 | # RESERVED_DIRECTIVE: ^ '%' (directive-name: ns-char+) (' '+ (directive-parameter: ns-char+))* s-l-comments |
|---|
| 5 | # DOCUMENT_START: ^ '---' (' ' | b-any) |
|---|
| 6 | # DOCUMENT_END: ^ ... (' ' | b-any) |
|---|
| 7 | # TAG: '!' ( ('<' uri-char+ '>') | uri-char* ) (' ' | b-any) |
|---|
| 8 | # ANCHOR: '&' ns-char+ <-- bug |
|---|
| 9 | # ALIAS: * ns-char+ <-- bug |
|---|
| 10 | # ENTRY(block): '-' (' ' | b-any) |
|---|
| 11 | # KEY(block): '?' (' ' | b-any) |
|---|
| 12 | # VALUE(block): ':' (' ' | b-any) |
|---|
| 13 | # FLOW_SEQ_START: '[' |
|---|
| 14 | # FLOW_SEQ_END: ']' |
|---|
| 15 | # FLOW_MAP_START: '{' |
|---|
| 16 | # FLOW_MAP_END: '}' |
|---|
| 17 | # KEY(flow): '?' |
|---|
| 18 | # VALUE(flow): ':' |
|---|
| 19 | # ENTRY(flow): ',' |
|---|
| 20 | # PLAIN: (plain-char - indicator) | ([-?:] plain-char) ... <-- bugs |
|---|
| 21 | # DOUBLE_QUOTED: '"' ... |
|---|
| 22 | # SINGLE_QUOTED: ''' ... |
|---|
| 23 | # LITERAL: '|' ... |
|---|
| 24 | # FOLDED: '>' ... |
|---|
| 25 | # BLOCK_SEQ_START: indentation before '-'. |
|---|
| 26 | # BLOCK_MAP_START: indentation before '?' or a simple key. |
|---|
| 27 | # BLOCK_END: no indentation |
|---|
| 28 | # LINE: end of line |
|---|
| 29 | |
|---|
| 30 | # b-generic: \r \n | \r | \n | #x85 |
|---|
| 31 | # b-specific: #x2028 | #x2029 |
|---|
| 32 | # b-any: b-generic | b-specific |
|---|
| 33 | # hex-digit: [0-9A-Fa-f] |
|---|
| 34 | # word-char: [0-9A-Za-z-] |
|---|
| 35 | # uri-char: word-char | % hex-digit hex-digit | [;/?:@&=+$,_.!~*'()[]] |
|---|
| 36 | |
|---|
| 37 | # Production rules: |
|---|
| 38 | # stream :== implicit_document? explicit_document* END |
|---|
| 39 | # explicit_document :== DIRECTIVE* DOCUMENT_START block_node? DOCUMENT_END? |
|---|
| 40 | # implicit_document :== block_node DOCUMENT_END? |
|---|
| 41 | # block_node :== ALIAS | properties? block_content |
|---|
| 42 | # flow_node :== ALIAS | properties? flow_content |
|---|
| 43 | # properties :== TAG ANCHOR? | ANCHOR TAG? |
|---|
| 44 | # block_content :== block_collection | flow_collection | SCALAR |
|---|
| 45 | # flow_content :== flow_collection | SCALAR |
|---|
| 46 | # block_collection :== block_sequence | block_mapping |
|---|
| 47 | # block_sequence :== BLOCK_SEQ_START (ENTRY block_node?)* BLOCK_END |
|---|
| 48 | # block_mapping :== BLOCK_MAP_START ((KEY block_node_or_indentless_sequence?)? (VALUE block_node_or_indentless_sequence?)?)* BLOCK_END |
|---|
| 49 | # block_node_or_indentless_sequence :== ALIAS | properties? (block_content | indentless_block_sequence) |
|---|
| 50 | # indentless_block_sequence :== (ENTRY block_node?)+ |
|---|
| 51 | # flow_collection :== flow_sequence | flow_mapping |
|---|
| 52 | # flow_sequence :== FLOW_SEQ_START (flow_sequence_entry ENTRY)* flow_sequence_entry? FLOW_SEQ_END |
|---|
| 53 | # flow_sequence_entry :== flow_node | KEY flow_node (VALUE flow_node?)? |
|---|
| 54 | # flow_mapping :== FLOW_MAP_START flow_mapping_entry ENTRY)* flow_mapping_entry? FLOW_MAP_END |
|---|
| 55 | # flow_mapping_entry :== flow_node | KEY flow_node (VALUE flow_node?)? |
|---|
| 56 | |
|---|
| 57 | # FIRST(rule) sets: |
|---|
| 58 | # stream: {} |
|---|
| 59 | # explicit_document: { DIRECTIVE DOCUMENT_START } |
|---|
| 60 | # implicit_document: block_node |
|---|
| 61 | # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK_SEQ_START BLOCK_MAP_START FLOW_SEQ_START FLOW_MAP_START } |
|---|
| 62 | # flow_node: { ALIAS TAG ANCHOR SCALAR FLOW_SEQ_START FLOW_MAP_START } |
|---|
| 63 | # block_content: { BLOCK_SEQ_START BLOCK_MAP_START FLOW_SEQ_START FLOW_MAP_START SCALAR } |
|---|
| 64 | # flow_content: { FLOW_SEQ_START FLOW_MAP_START SCALAR } |
|---|
| 65 | # block_collection: { BLOCK_SEQ_START BLOCK_MAP_START } |
|---|
| 66 | # flow_collection: { FLOW_SEQ_START FLOW_MAP_START } |
|---|
| 67 | # block_sequence: { BLOCK_SEQ_START } |
|---|
| 68 | # block_mapping: { BLOCK_MAP_START } |
|---|
| 69 | # block_node_or_indentless_sequence: { ALIAS TAG ANCHOR SCALAR BLOCK_SEQ_START BLOCK_MAP_START FLOW_SEQ_START FLOW_MAP_START ENTRY } |
|---|
| 70 | # indentless_sequence: { ENTRY } |
|---|
| 71 | # flow_collection: { FLOW_SEQ_START FLOW_MAP_START } |
|---|
| 72 | # flow_sequence: { FLOW_SEQ_START } |
|---|
| 73 | # flow_mapping: { FLOW_MAP_START } |
|---|
| 74 | # flow_sequence_entry: { ALIAS TAG ANCHOR SCALAR FLOW_SEQ_START FLOW_MAP_START KEY } |
|---|
| 75 | # flow_mapping_entry: { ALIAS TAG ANCHOR SCALAR FLOW_SEQ_START FLOW_MAP_START KEY } |
|---|
| 76 | |
|---|
| 77 | class Marker(object): |
|---|
| 78 | |
|---|
| 79 | def __init__(self, source, data, index, length=0): |
|---|
| 80 | self.source = source |
|---|
| 81 | self.data = data |
|---|
| 82 | self.index = index |
|---|
| 83 | self.length = length |
|---|
| 84 | self._line = None |
|---|
| 85 | self._position = None |
|---|
| 86 | |
|---|
| 87 | def line(self): |
|---|
| 88 | if not self._line: |
|---|
| 89 | self._make_line_position() |
|---|
| 90 | return self._line |
|---|
| 91 | |
|---|
| 92 | def position(self): |
|---|
| 93 | if not self._position: |
|---|
| 94 | self._make_line_position() |
|---|
| 95 | return self._position |
|---|
| 96 | |
|---|
| 97 | def _make_line_position(self): |
|---|
| 98 | allow_block_collection = self.data.rfind('\n', 0, self.index)+1 |
|---|
| 99 | line_end = self.data.find('\n', self.index)+1 |
|---|
| 100 | if line_end == 0: |
|---|
| 101 | line_end = len(self.data) |
|---|
| 102 | self._line = (allow_block_collection, line_end) |
|---|
| 103 | row = self.data.count('\n', 0, allow_block_collection) |
|---|
| 104 | col = self.index-allow_block_collection |
|---|
| 105 | self._position = (row, col) |
|---|
| 106 | |
|---|
| 107 | class Error(Exception): |
|---|
| 108 | |
|---|
| 109 | def __init__(self, message=None, marker=None): |
|---|
| 110 | Exception.__init__(self) |
|---|
| 111 | self.message = message |
|---|
| 112 | self.marker = marker |
|---|
| 113 | |
|---|
| 114 | def __str__(self): |
|---|
| 115 | if self.marker is not None: |
|---|
| 116 | row, col = self.marker.position() |
|---|
| 117 | start, end = self.marker.line() |
|---|
| 118 | error_position = "source \"%s\", line %s, column %s:\n%s\n" \ |
|---|
| 119 | % (self.marker.source, row+1, col+1, self.marker.data[start:end].rstrip().encode('utf-8')) |
|---|
| 120 | error_pointer = " " * col + "^\n" |
|---|
| 121 | else: |
|---|
| 122 | error_position = "" |
|---|
| 123 | error_pointer = "" |
|---|
| 124 | if self.message is not None: |
|---|
| 125 | error_message = self.message |
|---|
| 126 | else: |
|---|
| 127 | error_message = "YAML error" |
|---|
| 128 | return error_position+error_pointer+error_message |
|---|
| 129 | |
|---|
| 130 | class Scanner: |
|---|
| 131 | |
|---|
| 132 | def scan(self, source, data): |
|---|
| 133 | self.source = source |
|---|
| 134 | self.data = data |
|---|
| 135 | self.flow_level = 0 |
|---|
| 136 | self.indents = [] |
|---|
| 137 | self.indent = -1 |
|---|
| 138 | self.index = 0 |
|---|
| 139 | self.line = 0 |
|---|
| 140 | self.column = 0 |
|---|
| 141 | self.allow_block_collection = True |
|---|
| 142 | self.guess_simple_key = False |
|---|
| 143 | self.guess_simple_key_token = None |
|---|
| 144 | self.guess_simple_key_indent = None |
|---|
| 145 | self.allow_flow_key = False |
|---|
| 146 | self.guess_flow_key_levels = [] |
|---|
| 147 | self.guess_flow_key_tokens = [] |
|---|
| 148 | self.tokens = [] |
|---|
| 149 | while self.eat_ignored() or self.fetch_token(): |
|---|
| 150 | pass |
|---|
| 151 | return self.tokens |
|---|
| 152 | |
|---|
| 153 | def eat_ignored(self): |
|---|
| 154 | result = False |
|---|
| 155 | while self.eat_ignored_spaces() or self.eat_ignored_comment() or self.eat_ignored_newline(): |
|---|
| 156 | result = True |
|---|
| 157 | return result |
|---|
| 158 | |
|---|
| 159 | def eat_ignored_spaces(self): |
|---|
| 160 | result = False |
|---|
| 161 | while self.index < len(self.data) and self.data[self.index] == ' ': |
|---|
| 162 | self.index += 1 |
|---|
| 163 | self.column += 1 |
|---|
| 164 | result = True |
|---|
| 165 | return result |
|---|
| 166 | |
|---|
| 167 | def eat_ignored_comment(self): |
|---|
| 168 | if self.index < len(self.data) and self.data[self.index] == '#': |
|---|
| 169 | self.eat_line() |
|---|
| 170 | return False |
|---|
| 171 | |
|---|
| 172 | def eat_line(self): |
|---|
| 173 | result = False |
|---|
| 174 | while self.index < len(self.data) and self.data[self.index] not in '\r\n': |
|---|
| 175 | self.index += 1 |
|---|
| 176 | self.column += 1 |
|---|
| 177 | result = True |
|---|
| 178 | return result |
|---|
| 179 | |
|---|
| 180 | def eat_ignored_newline(self): |
|---|
| 181 | if self.index < len(self.data) and self.data[self.index] in '\r\n': |
|---|
| 182 | if self.data[self.index:self.index+2] == '\r\n': |
|---|
| 183 | self.index += 2 |
|---|
| 184 | else: |
|---|
| 185 | self.index += 1 |
|---|
| 186 | self.line += 1 |
|---|
| 187 | self.column = 0 |
|---|
| 188 | self.allow_block_collection = True |
|---|
| 189 | return True |
|---|
| 190 | return False |
|---|
| 191 | |
|---|
| 192 | def eat_ns(self): |
|---|
| 193 | result = False |
|---|
| 194 | while self.index < len(self.data) and self.data[self.index] not in ' \t\r\n': |
|---|
| 195 | self.index += 1 |
|---|
| 196 | self.column += 1 |
|---|
| 197 | result = True |
|---|
| 198 | return result |
|---|
| 199 | |
|---|
| 200 | def eat_indent(self, indent=0): |
|---|
| 201 | if indent < self.indent: |
|---|
| 202 | indent = self.indent |
|---|
| 203 | if self.column != 0: |
|---|
| 204 | return False |
|---|
| 205 | count = 0 |
|---|
| 206 | while self.index < len(self.data) and self.data[self.index] == ' ' and count < indent: |
|---|
| 207 | self.index += 1 |
|---|
| 208 | self.column += 1 |
|---|
| 209 | count += 1 |
|---|
| 210 | return count == indent |
|---|
| 211 | |
|---|
| 212 | def eat_double_quoted(self): |
|---|
| 213 | if self.index < len(self.data) and self.data[self.index] == '"': |
|---|
| 214 | self.index += 1 |
|---|
| 215 | self.column += 1 |
|---|
| 216 | while self.index < len(self.data) and self.data[self.index] != '"': |
|---|
| 217 | if self.data[self.index:self.index+2] in ['\\\\', '\\"']: |
|---|
| 218 | self.index += 2 |
|---|
| 219 | self.column += 2 |
|---|
| 220 | elif self.data[self.index] in '\r\n': |
|---|
| 221 | self.eat_ignored_newline() |
|---|
| 222 | if not self.eat_indent(1): |
|---|
| 223 | self.error("Invalid indentation") |
|---|
| 224 | else: |
|---|
| 225 | self.index += 1 |
|---|
| 226 | self.column += 1 |
|---|
| 227 | if self.index < len(self.data) and self.data[self.index] == '"': |
|---|
| 228 | self.index += 1 |
|---|
| 229 | self.column += 1 |
|---|
| 230 | return True |
|---|
| 231 | else: |
|---|
| 232 | self.error("unclosed double quoted scalar") |
|---|
| 233 | else: |
|---|
| 234 | return False |
|---|
| 235 | |
|---|
| 236 | def eat_single_quoted(self): |
|---|
| 237 | if self.index < len(self.data) and self.data[self.index] == '\'': |
|---|
| 238 | self.index += 1 |
|---|
| 239 | self.column += 1 |
|---|
| 240 | while self.index < len(self.data) and \ |
|---|
| 241 | (self.data[self.index] != '\'' or self.data[self.index:self.index+2] == '\'\''): |
|---|
| 242 | if self.data[self.index:self.index+2] == '\'\'': |
|---|
| 243 | self.index += 2 |
|---|
| 244 | self.column += 2 |
|---|
| 245 | elif self.data[self.index] in '\r\n': |
|---|
| 246 | self.eat_ignored_newline() |
|---|
| 247 | if not self.eat_indent(1): |
|---|
| 248 | self.error("Invalid indentation") |
|---|
| 249 | else: |
|---|
| 250 | self.index += 1 |
|---|
| 251 | self.column += 1 |
|---|
| 252 | if self.index < len(self.data) and self.data[self.index] == '\'': |
|---|
| 253 | self.index += 1 |
|---|
| 254 | self.column += 1 |
|---|
| 255 | return True |
|---|
| 256 | else: |
|---|
| 257 | self.error("unclosed single quoted scalar") |
|---|
| 258 | else: |
|---|
| 259 | return False |
|---|
| 260 | |
|---|
| 261 | def eat_folded(self): |
|---|
| 262 | self.eat_block_scalar() |
|---|
| 263 | |
|---|
| 264 | def eat_literal(self): |
|---|
| 265 | self.eat_block_scalar() |
|---|
| 266 | |
|---|
| 267 | def eat_block_scalar(self): |
|---|
| 268 | if self.index < len(self.data) and self.data[self.index] in '>|': |
|---|
| 269 | self.eat_line() |
|---|
| 270 | if not self.eat_ignored_newline(): |
|---|
| 271 | return True |
|---|
| 272 | indent = self.indent+1 |
|---|
| 273 | if indent < 1: |
|---|
| 274 | indent = 1 |
|---|
| 275 | while (self.eat_indent(indent) and ((self.eat_line() and self.eat_ignored_newline()) or (self.eat_ignored_newline()))) or \ |
|---|
| 276 | (self.eat_ignored_comment() and self.eat_ignored_newline()) or \ |
|---|
| 277 | self.eat_ignored_newline(): |
|---|
| 278 | pass |
|---|
| 279 | return True |
|---|
| 280 | return False |
|---|
| 281 | |
|---|
| 282 | def eat_block_plain(self): |
|---|
| 283 | return self.eat_plain(block=True) |
|---|
| 284 | |
|---|
| 285 | def eat_flow_plain(self): |
|---|
| 286 | return self.eat_plain(block=False) |
|---|
| 287 | |
|---|
| 288 | def eat_plain(self, block): |
|---|
| 289 | indent = self.indent+1 |
|---|
| 290 | if indent < 1: |
|---|
| 291 | indent = 1 |
|---|
| 292 | if self.index < len(self.data): |
|---|
| 293 | if self.data[self.index] not in ' \t\r\n-?:,[]{}#&*!|>\'"%@`' or \ |
|---|
| 294 | (block and self.data[self.index] == '-' and self.data[self.index:self.index+2] not in ['-', '- ', '-\r', '-\n']) or \ |
|---|
| 295 | (block and self.data[self.index] == '?' and self.data[self.index:self.index+2] not in ['?', '? ', '?\r', '?\n']) or \ |
|---|
| 296 | (block and self.data[self.index] == ':' and self.data[self.index:self.index+2] not in [':', ': ', ':\r', ':\n']): |
|---|
| 297 | if block and self.allow_block_collection: |
|---|
| 298 | self.guessing_simple_key() |
|---|
| 299 | if self.flow_level and self.allow_flow_key: |
|---|
| 300 | self.guess_flow_key_levels.append(self.flow_level) |
|---|
| 301 | self.guess_flow_key_tokens.append(len(self.tokens)) |
|---|
| 302 | self.allow_flow_key = False |
|---|
| 303 | self.index += 1 |
|---|
| 304 | self.column += 1 |
|---|
| 305 | space = False |
|---|
| 306 | while True: |
|---|
| 307 | self.eat_ignored_spaces() |
|---|
| 308 | while self.index < len(self.data) and ( |
|---|
| 309 | self.data[self.index] not in '\r\n?:,[]{}#' or |
|---|
| 310 | (not space and self.data[self.index] == '#') or |
|---|
| 311 | (block and self.data[self.index] in '?,[]{}') or |
|---|
| 312 | (block and self.data[self.index] == ':' and self.data[self.index:self.index+2] not in [':', ': ', ':\r', ':\n'])): |
|---|
| 313 | space = self.data[self.index] not in ' \t' |
|---|
| 314 | self.index += 1 |
|---|
| 315 | self.column += 1 |
|---|
| 316 | self.allow_block_collection = False |
|---|
| 317 | if not (self.eat_ignored_newline() and self.eat_indent(indent)): |
|---|
| 318 | break |
|---|
| 319 | space = True |
|---|
| 320 | return True |
|---|
| 321 | return False |
|---|
| 322 | |
|---|
| 323 | def no_simple_key(self): |
|---|
| 324 | self.guess_simple_key = False |
|---|
| 325 | self.guess_simple_key_token = None |
|---|
| 326 | self.guess_simple_key_indent = None |
|---|
| 327 | |
|---|
| 328 | def guessing_simple_key(self): |
|---|
| 329 | self.guess_simple_key = True |
|---|
| 330 | self.guess_simple_key_token = len(self.tokens) |
|---|
| 331 | self.guess_simple_key_indent = self.column |
|---|
| 332 | |
|---|
| 333 | def unwind_indents(self, level): |
|---|
| 334 | while self.indent > level: |
|---|
| 335 | if self.flow_level: |
|---|
| 336 | self.error("Invalid indentation") |
|---|
| 337 | self.tokens.append('BLOCK_END') |
|---|
| 338 | self.indent = self.indents.pop() |
|---|
| 339 | self.no_simple_key() |
|---|
| 340 | |
|---|
| 341 | def fetch_token(self): |
|---|
| 342 | self.unwind_indents(self.column) |
|---|
| 343 | if self.index < len(self.data): |
|---|
| 344 | if self.column == 0: |
|---|
| 345 | if self.data[self.index] == '%': |
|---|
| 346 | self.tokens.append('DIRECTIVE') |
|---|
| 347 | self.eat_line() |
|---|
| 348 | self.no_simple_key() |
|---|
| 349 | return True |
|---|
| 350 | if self.data[self.index:self.index+3] == '---' and \ |
|---|
| 351 | (not self.data[self.index+3:self.index+4] or self.data[self.index+3:self.index+4] in ' \r\n'): |
|---|
| 352 | self.unwind_indents(-1) |
|---|
| 353 | self.tokens.append('DOCUMENT_START') |
|---|
| 354 | self.index += 3 |
|---|
| 355 | self.column += 3 |
|---|
| 356 | self.allow_block_collection = False |
|---|
| 357 | self.allow_flow_key = False |
|---|
| 358 | self.guess_flow_keys = [] |
|---|
| 359 | self.no_simple_key() |
|---|
| 360 | return True |
|---|
| 361 | if self.data[self.index:self.index+3] == '...' and \ |
|---|
| 362 | (not self.data[self.index+3:self.index+4] or self.data[self.index+3:self.index+4] in ' \r\n'): |
|---|
| 363 | self.unwind_indents(-1) |
|---|
| 364 | self.tokens.append('DOCUMENT_END') |
|---|
| 365 | self.index += 3 |
|---|
| 366 | self.column += 3 |
|---|
| 367 | self.allow_block_collection = False |
|---|
| 368 | self.allow_flow_key = False |
|---|
| 369 | self.guess_flow_keys = [] |
|---|
| 370 | self.no_simple_key() |
|---|
| 371 | return True |
|---|
| 372 | if self.data[self.index] in '[]{}': |
|---|
| 373 | if self.data[self.index] == '[': |
|---|
| 374 | self.flow_level += 1 |
|---|
| 375 | self.allow_flow_key = True |
|---|
| 376 | self.tokens.append('FLOW_SEQ_START') |
|---|
| 377 | elif self.data[self.index] == '{': |
|---|
| 378 | self.flow_level += 1 |
|---|
| 379 | self.allow_flow_key = True |
|---|
| 380 | self.tokens.append('FLOW_MAP_START') |
|---|
| 381 | elif self.data[self.index] == ']': |
|---|
| 382 | if not self.flow_level: |
|---|
| 383 | self.error("Extra ]") |
|---|
| 384 | self.flow_level -= 1 |
|---|
| 385 | self.allow_flow_key = False |
|---|
| 386 | self.tokens.append('FLOW_SEQ_END') |
|---|
| 387 | else: |
|---|
| 388 | if not self.flow_level: |
|---|
| 389 | self.error("Extra }") |
|---|
| 390 | self.flow_level -= 1 |
|---|
| 391 | self.allow_flow_key = False |
|---|
| 392 | self.tokens.append('FLOW_MAP_END') |
|---|
| 393 | while self.guess_flow_key_levels and self.guess_flow_key_levels[-1] > self.flow_level: |
|---|
| 394 | self.guess_flow_key_levels.pop() |
|---|
| 395 | self.guess_flow_key_tokens.pop() |
|---|
| 396 | self.index += 1 |
|---|
| 397 | self.column += 1 |
|---|
| 398 | self.allow_block_collection = False |
|---|
| 399 | return True |
|---|
| 400 | if self.data[self.index] in '!&*': |
|---|
| 401 | if self.flow_level and self.allow_flow_key: |
|---|
| 402 | self.guess_flow_key_levels.append(self.flow_level) |
|---|
| 403 | self.guess_flow_key_tokens.append(len(self.tokens)) |
|---|
| 404 | if not self.flow_level and self.allow_block_collection: |
|---|
| 405 | self.guessing_simple_key() |
|---|
| 406 | if self.data[self.index] == '!': |
|---|
| 407 | self.tokens.append('TAG') |
|---|
| 408 | elif self.data[self.index] == '&': |
|---|
| 409 | self.tokens.append('ANCHOR') |
|---|
| 410 | else: |
|---|
| 411 | self.tokens.append('ALIAS') |
|---|
| 412 | self.eat_ns() |
|---|
| 413 | self.allow_flow_key = False |
|---|
| 414 | self.allow_block_collection = False |
|---|
| 415 | return True |
|---|
| 416 | if self.data[self.index] == '"': |
|---|
| 417 | if self.flow_level and self.allow_flow_key: |
|---|
| 418 | self.guess_flow_key_levels.append(self.flow_level) |
|---|
| 419 | self.guess_flow_key_tokens.append(len(self.tokens)) |
|---|
| 420 | if not self.flow_level and self.allow_block_collection: |
|---|
| 421 | self.guessing_simple_key() |
|---|
| 422 | self.tokens.append('SCALAR') |
|---|
| 423 | self.eat_double_quoted() |
|---|
| 424 | self.allow_flow_key = False |
|---|
| 425 | self.allow_block_collection = False |
|---|
| 426 | return True |
|---|
| 427 | if self.data[self.index] == '\'': |
|---|
| 428 | if self.flow_level and self.allow_flow_key: |
|---|
| 429 | self.guess_flow_key_levels.append(self.flow_level) |
|---|
| 430 | self.guess_flow_key_tokens.append(len(self.tokens)) |
|---|
| 431 | if not self.flow_level and self.allow_block_collection: |
|---|
| 432 | self.guessing_simple_key() |
|---|
| 433 | self.tokens.append('SCALAR') |
|---|
| 434 | self.eat_single_quoted() |
|---|
| 435 | self.allow_flow_key = False |
|---|
| 436 | self.allow_block_collection = False |
|---|
| 437 | return True |
|---|
| 438 | if not self.flow_level: |
|---|
| 439 | if self.data[self.index] in '-?:' and \ |
|---|
| 440 | (not self.data[self.index+1:self.index+2] or self.data[self.index+1:self.index+2] in ' \r\n'): |
|---|
| 441 | if self.guess_simple_key and self.data[self.index] == ':': |
|---|
| 442 | self.tokens.insert(self.guess_simple_key_token, 'KEY') |
|---|
| 443 | if self.guess_simple_key_indent > self.indent: |
|---|
| 444 | self.indents.append(self.indent) |
|---|
| 445 | self.indent = self.guess_simple_key_indent |
|---|
| 446 | self.tokens.insert(self.guess_simple_key_token, 'BLOCK_MAP_START') |
|---|
| 447 | self.tokens.append('VALUE') |
|---|
| 448 | self.no_simple_key() |
|---|
| 449 | self.index += 1 |
|---|
| 450 | self.column += 1 |
|---|
| 451 | self.allow_block_collection = False |
|---|
| 452 | return True |
|---|
| 453 | else: |
|---|
| 454 | if not self.allow_block_collection: |
|---|
| 455 | self.error("Block collection should start at the beginning of the line") |
|---|
| 456 | if self.column > self.indent: |
|---|
| 457 | self.indents.append(self.indent) |
|---|
| 458 | self.indent = self.column |
|---|
| 459 | if self.data[self.index] == '-': |
|---|
| 460 | self.tokens.append('BLOCK_SEQ_START') |
|---|
| 461 | else: |
|---|
| 462 | self.tokens.append('BLOCK_MAP_START') |
|---|
| 463 | if self.data[self.index] == '-': |
|---|
| 464 | self.tokens.append('ENTRY') |
|---|
| 465 | elif self.data[self.index] == '?': |
|---|
| 466 | self.tokens.append('KEY') |
|---|
| 467 | else: |
|---|
| 468 | self.tokens.append('VALUE') |
|---|
| 469 | self.index += 1 |
|---|
| 470 | self.column += 1 |
|---|
| 471 | #self.allow_block_collection = False |
|---|
| 472 | self.allow_block_collection = True |
|---|
| 473 | self.no_simple_key() |
|---|
| 474 | return True |
|---|
| 475 | if self.data[self.index] == '>': |
|---|
| 476 | self.no_simple_key() |
|---|
| 477 | self.tokens.append('SCALAR') |
|---|
| 478 | self.eat_folded() |
|---|
| 479 | self.allow_block_collection = True |
|---|
| 480 | return True |
|---|
| 481 | if self.data[self.index] == '|': |
|---|
| 482 | self.no_simple_key() |
|---|
| 483 | self.tokens.append('SCALAR') |
|---|
| 484 | self.eat_literal() |
|---|
| 485 | self.allow_block_collection = True |
|---|
| 486 | return True |
|---|
| 487 | if self.eat_block_plain(): |
|---|
| 488 | self.tokens.append('SCALAR') |
|---|
| 489 | return True |
|---|
| 490 | else: |
|---|
| 491 | if self.data[self.index] in ',?:': |
|---|
| 492 | if self.data[self.index] == ',': |
|---|
| 493 | self.tokens.append('ENTRY') |
|---|
| 494 | while self.guess_flow_key_levels and self.guess_flow_key_levels[-1] >= self.flow_level: |
|---|
| 495 | self.guess_flow_key_levels.pop() |
|---|
| 496 | self.guess_flow_key_tokens.pop() |
|---|
| 497 | self.allow_flow_key = True |
|---|
| 498 | elif self.data[self.index] == '?': |
|---|
| 499 | self.tokens.append('KEY') |
|---|
| 500 | while self.guess_flow_key_levels and self.guess_flow_key_levels[-1] >= self.flow_level: |
|---|
| 501 | self.guess_flow_key_levels.pop() |
|---|
| 502 | self.guess_flow_key_tokens.pop() |
|---|
| 503 | self.allow_flow_key = False |
|---|
| 504 | else: |
|---|
| 505 | self.tokens.append('VALUE') |
|---|
| 506 | if self.guess_flow_key_levels and self.guess_flow_key_levels[-1] == self.flow_level: |
|---|
| 507 | self.guess_flow_key_levels.pop() |
|---|
| 508 | index = self.guess_flow_key_tokens.pop() |
|---|
| 509 | self.tokens.insert(index, 'KEY') |
|---|
| 510 | self.allow_flow_key =False |
|---|
| 511 | self.index += 1 |
|---|
| 512 | self.column += 1 |
|---|
| 513 | return True |
|---|
| 514 | if self.eat_flow_plain(): |
|---|
| 515 | self.tokens.append('SCALAR') |
|---|
| 516 | return True |
|---|
| 517 | self.error("Invalid token") |
|---|
| 518 | else: |
|---|
| 519 | self.unwind_indents(-1) |
|---|
| 520 | |
|---|
| 521 | def error(self, message): |
|---|
| 522 | raise Error(message, Marker(self.source, self.data, self.index)) |
|---|
| 523 | |
|---|
| 524 | class Parser: |
|---|
| 525 | |
|---|
| 526 | def parse(self, source, data): |
|---|
| 527 | scanner = Scanner() |
|---|
| 528 | self.tokens = scanner.scan(source, data) |
|---|
| 529 | self.tokens.append('END') |
|---|
| 530 | documents = self.parse_stream() |
|---|
| 531 | if len(documents) == 1: |
|---|
| 532 | return documents[0] |
|---|
| 533 | return documents |
|---|
| 534 | |
|---|
| 535 | def parse_stream(self): |
|---|
| 536 | documents = [] |
|---|
| 537 | if self.tokens[0] not in ['DIRECTIVE', 'DOCUMENT_START', 'END']: |
|---|
| 538 | documents.append(self.parse_block_node()) |
|---|
| 539 | while self.tokens[0] != 'END': |
|---|
| 540 | while self.tokens[0] == 'DIRECTIVE': |
|---|
| 541 | self.tokens.pop(0) |
|---|
| 542 | if self.tokens[0] != 'DOCUMENT_START': |
|---|
| 543 | self.error('DOCUMENT_START is expected') |
|---|
| 544 | self.tokens.pop(0) |
|---|
| 545 | if self.tokens[0] in ['DIRECTIVE', 'DOCUMENT_START', 'DOCUMENT_END', 'END']: |
|---|
| 546 | documents.append(None) |
|---|
| 547 | else: |
|---|
| 548 | documents.append(self.parse_block_node()) |
|---|
| 549 | while self.tokens[0] == 'DOCUMENT_END': |
|---|
| 550 | self.tokens.pop(0) |
|---|
| 551 | if self.tokens[0] != 'END': |
|---|
| 552 | self.error("END is expected") |
|---|
| 553 | return tuple(documents) |
|---|
| 554 | |
|---|
| 555 | def parse_block_node(self): |
|---|
| 556 | if self.tokens[0] == 'ALIAS': |
|---|
| 557 | self.tokens.pop(0) |
|---|
| 558 | return '*' |
|---|
| 559 | if self.tokens[0] == 'TAG': |
|---|
| 560 | self.tokens.pop(0) |
|---|
| 561 | if self.tokens[0] == 'ANCHOR': |
|---|
| 562 | self.tokens.pop(0) |
|---|
| 563 | elif self.tokens[0] == 'ANCHOR': |
|---|
| 564 | self.tokens.pop(0) |
|---|
| 565 | if self.tokens[0] == 'TAG': |
|---|
| 566 | self.tokens.pop(0) |
|---|
| 567 | return self.parse_block_content() |
|---|
| 568 | |
|---|
| 569 | def parse_flow_node(self): |
|---|
| 570 | if self.tokens[0] == 'ALIAS': |
|---|
| 571 | self.tokens.pop(0) |
|---|
| 572 | return '*' |
|---|
| 573 | if self.tokens[0] == 'TAG': |
|---|
| 574 | self.tokens.pop(0) |
|---|
| 575 | if self.tokens[0] == 'ANCHOR': |
|---|
| 576 | self.tokens.pop(0) |
|---|
| 577 | elif self.tokens[0] == 'ANCHOR': |
|---|
| 578 | self.tokens.pop(0) |
|---|
| 579 | if self.tokens[0] == 'TAG': |
|---|
| 580 | self.tokens.pop(0) |
|---|
| 581 | return self.parse_flow_content() |
|---|
| 582 | |
|---|
| 583 | def parse_block_node_or_indentless_sequence(self): |
|---|
| 584 | if self.tokens[0] == 'ALIAS': |
|---|
| 585 | self.tokens.pop(0) |
|---|
| 586 | return '*' |
|---|
| 587 | if self.tokens[0] == 'TAG': |
|---|
| 588 | self.tokens.pop(0) |
|---|
| 589 | if self.tokens[0] == 'ANCHOR': |
|---|
| 590 | self.tokens.pop(0) |
|---|
| 591 | elif self.tokens[0] == 'ANCHOR': |
|---|
| 592 | self.tokens.pop(0) |
|---|
| 593 | if self.tokens[0] == 'TAG': |
|---|
| 594 | self.tokens.pop(0) |
|---|
| 595 | if self.tokens[0] == 'ENTRY': |
|---|
| 596 | return self.parse_indentless_sequence(self) |
|---|
| 597 | return self.parse_block_content() |
|---|
| 598 | |
|---|
| 599 | def parse_block_content(self): |
|---|
| 600 | if self.tokens[0] == 'SCALAR': |
|---|
| 601 | self.tokens.pop(0) |
|---|
| 602 | return True |
|---|
| 603 | elif self.tokens[0] == 'BLOCK_SEQ_START': |
|---|
| 604 | return self.parse_block_sequence() |
|---|
| 605 | elif self.tokens[0] == 'BLOCK_MAP_START': |
|---|
| 606 | return self.parse_block_mapping() |
|---|
| 607 | elif self.tokens[0] == 'FLOW_SEQ_START': |
|---|
| 608 | return self.parse_flow_sequence() |
|---|
| 609 | elif self.tokens[0] == 'FLOW_MAP_START': |
|---|
| 610 | return self.parse_flow_mapping() |
|---|
| 611 | else: |
|---|
| 612 | self.error('block content is expected') |
|---|
| 613 | |
|---|
| 614 | def parse_flow_content(self): |
|---|
| 615 | if self.tokens[0] == 'SCALAR': |
|---|
| 616 | self.tokens.pop(0) |
|---|
| 617 | return True |
|---|
| 618 | elif self.tokens[0] == 'FLOW_SEQ_START': |
|---|
| 619 | return self.parse_flow_sequence() |
|---|
| 620 | elif self.tokens[0] == 'FLOW_MAP_START': |
|---|
| 621 | return self.parse_flow_mapping() |
|---|
| 622 | else: |
|---|
| 623 | self.error('flow content is expected') |
|---|
| 624 | |
|---|
| 625 | def parse_block_sequence(self): |
|---|
| 626 | sequence = [] |
|---|
| 627 | if self.tokens[0] != 'BLOCK_SEQ_START': |
|---|
| 628 | self.error('BLOCK_SEQ_START is expected') |
|---|
| 629 | self.tokens.pop(0) |
|---|
| 630 | while self.tokens[0] == 'ENTRY': |
|---|
| 631 | self.tokens.pop(0) |
|---|
| 632 | if self.tokens[0] not in ['ENTRY', 'BLOCK_END']: |
|---|
| 633 | sequence.append(self.parse_block_node()) |
|---|
| 634 | else: |
|---|
| 635 | sequence.append(None) |
|---|
| 636 | if self.tokens[0] != 'BLOCK_END': |
|---|
| 637 | self.error('BLOCK_END is expected') |
|---|
| 638 | self.tokens.pop(0) |
|---|
| 639 | return sequence |
|---|
| 640 | |
|---|
| 641 | def parse_indentless_sequence(self): |
|---|
| 642 | sequence = [] |
|---|
| 643 | while self.tokens[0] == 'ENTRY': |
|---|
| 644 | self.tokens.pop(0) |
|---|
| 645 | if self.tokens[0] not in ['ENTRY']: |
|---|
| 646 | sequence.append(self.parse_block_node()) |
|---|
| 647 | else: |
|---|
| 648 | sequence.append(None) |
|---|
| 649 | return sequence |
|---|
| 650 | |
|---|
| 651 | def parse_block_mapping(self): |
|---|
| 652 | mapping = [] |
|---|
| 653 | if self.tokens[0] != 'BLOCK_MAP_START': |
|---|
| 654 | self.error('BLOCK_MAP_START is expected') |
|---|
| 655 | self.tokens.pop(0) |
|---|
| 656 | while self.tokens[0] in ['KEY', 'VALUE']: |
|---|
| 657 | key = None |
|---|
| 658 | value = None |
|---|
| 659 | if self.tokens[0] == 'KEY': |
|---|
| 660 | self.tokens.pop(0) |
|---|
| 661 | if self.tokens[0] not in ['KEY', 'VALUE', 'BLOCK_END']: |
|---|
| 662 | key = self.parse_block_node_or_indentless_sequence() |
|---|
| 663 | if self.tokens[0] == 'VALUE': |
|---|
| 664 | self.tokens.pop(0) |
|---|
| 665 | if self.tokens[0] not in ['KEY', 'VALUE', 'BLOCK_END']: |
|---|
| 666 | value = self.parse_block_node_or_indentless_sequence() |
|---|
| 667 | mapping.append((key, value)) |
|---|
| 668 | if self.tokens[0] != 'BLOCK_END': |
|---|
| 669 | self.error('BLOCK_END is expected') |
|---|
| 670 | self.tokens.pop(0) |
|---|
| 671 | return mapping |
|---|
| 672 | |
|---|
| 673 | def parse_flow_sequence(self): |
|---|
| 674 | sequence = [] |
|---|
| 675 | if self.tokens[0] != 'FLOW_SEQ_START': |
|---|
| 676 | self.error('FLOW_SEQ_START is expected') |
|---|
| 677 | self.tokens.pop(0) |
|---|
| 678 | while self.tokens[0] != 'FLOW_SEQ_END': |
|---|
| 679 | if self.tokens[0] == 'KEY': |
|---|
| 680 | self.tokens.pop(0) |
|---|
| 681 | key = None |
|---|
| 682 | value = None |
|---|
| 683 | if self.tokens[0] != 'VALUE': |
|---|
| 684 | key = self.parse_flow_node() |
|---|
| 685 | if self.tokens[0] == 'VALUE': |
|---|
| 686 | self.tokens.pop(0) |
|---|
| 687 | if self.tokens[0] not in ['ENTRY', 'FLOW_SEQ_END']: |
|---|
| 688 | value = self.parse_flow_node() |
|---|
| 689 | sequence.append([(key, value)]) |
|---|
| 690 | else: |
|---|
| 691 | sequence.append(self.parse_flow_node()) |
|---|
| 692 | if self.tokens[0] not in ['ENTRY', 'FLOW_SEQ_END']: |
|---|
| 693 | self.error("ENTRY or FLOW_SEQ_END is expected") |
|---|
| 694 | if self.tokens[0] == 'ENTRY': |
|---|
| 695 | self.tokens.pop(0) |
|---|
| 696 | if self.tokens[0] != 'FLOW_SEQ_END': |
|---|
| 697 | self.error('FLOW_SEQ_END is expected') |
|---|
| 698 | self.tokens.pop(0) |
|---|
| 699 | return sequence |
|---|
| 700 | |
|---|
| 701 | def parse_flow_mapping(self): |
|---|
| 702 | mapping = [] |
|---|
| 703 | if self.tokens[0] != 'FLOW_MAP_START': |
|---|
| 704 | self.error('FLOW_MAP_START is expected') |
|---|
| 705 | self.tokens.pop(0) |
|---|
| 706 | while self.tokens[0] != 'FLOW_MAP_END': |
|---|
| 707 | if self.tokens[0] == 'KEY': |
|---|
| 708 | self.tokens.pop(0) |
|---|
| 709 | key = None |
|---|
| 710 | value = None |
|---|
| 711 | if self.tokens[0] != 'VALUE': |
|---|
| 712 | key = self.parse_flow_node() |
|---|
| 713 | if self.tokens[0] == 'VALUE': |
|---|
| 714 | self.tokens.pop(0) |
|---|
| 715 | if self.tokens[0] not in ['ENTRY', 'FLOW_MAP_END']: |
|---|
| 716 | value = self.parse_flow_node() |
|---|
| 717 | mapping.append((key, value)) |
|---|
| 718 | else: |
|---|
| 719 | mapping.append((self.parse_flow_node(), None)) |
|---|
| 720 | if self.tokens[0] not in ['ENTRY', 'FLOW_MAP_END']: |
|---|
| 721 | self.error("ENTRY or FLOW_MAP_END is expected") |
|---|
| 722 | if self.tokens[0] == 'ENTRY': |
|---|
| 723 | self.tokens.pop(0) |
|---|
| 724 | if self.tokens[0] != 'FLOW_MAP_END': |
|---|
| 725 | self.error('FLOW_MAP_END is expected') |
|---|
| 726 | self.tokens.pop(0) |
|---|
| 727 | return mapping |
|---|
| 728 | |
|---|
| 729 | def error(self, message): |
|---|
| 730 | raise Error(message+': '+str(self.tokens)) |
|---|
| 731 | |
|---|