| [43] | 1 | |
|---|
| 2 | # Production rules: |
|---|
| 3 | # stream ::= implicit_document? explicit_document* END |
|---|
| 4 | # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END? |
|---|
| 5 | # implicit_document ::= block_node DOCUMENT-END? |
|---|
| 6 | # block_node ::= ALIAS | properties? block_content |
|---|
| 7 | # flow_node ::= ALIAS | properties? flow_content |
|---|
| 8 | # properties ::= TAG ANCHOR? | ANCHOR TAG? |
|---|
| 9 | # block_content ::= block_collection | flow_collection | SCALAR |
|---|
| 10 | # flow_content ::= flow_collection | SCALAR |
|---|
| 11 | # block_collection ::= block_sequence | block_mapping |
|---|
| 12 | # block_sequence ::= BLOCK-SEQUENCE-START (ENTRY block_node?)* BLOCK-END |
|---|
| 13 | # block_mapping ::= BLOCK-MAPPING_START ((KEY block_node_or_indentless_sequence?)? (VALUE block_node_or_indentless_sequence?)?)* BLOCK-END |
|---|
| 14 | # block_node_or_indentless_sequence ::= ALIAS | properties? (block_content | indentless_block_sequence) |
|---|
| 15 | # indentless_block_sequence ::= (ENTRY block_node?)+ |
|---|
| 16 | # flow_collection ::= flow_sequence | flow_mapping |
|---|
| 17 | # flow_sequence ::= FLOW-SEQUENCE-START (flow_sequence_entry ENTRY)* flow_sequence_entry? FLOW-SEQUENCE-END |
|---|
| 18 | # flow_mapping ::= FLOW-MAPPING-START flow_mapping_entry ENTRY)* flow_mapping_entry? FLOW-MAPPING-END |
|---|
| 19 | # flow_sequence_entry ::= flow_node | KEY flow_node (VALUE flow_node?)? |
|---|
| 20 | # flow_mapping_entry ::= flow_node | KEY flow_node (VALUE flow_node?)? |
|---|
| 21 | |
|---|
| 22 | # FIRST(rule) sets: |
|---|
| 23 | # stream: {} |
|---|
| 24 | # explicit_document: { DIRECTIVE DOCUMENT-START } |
|---|
| 25 | # implicit_document: block_node |
|---|
| 26 | # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } |
|---|
| 27 | # flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } |
|---|
| 28 | # block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } |
|---|
| 29 | # flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } |
|---|
| 30 | # block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } |
|---|
| 31 | # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } |
|---|
| 32 | # block_sequence: { BLOCK-SEQUENCE-START } |
|---|
| 33 | # block_mapping: { BLOCK-MAPPING-START } |
|---|
| 34 | # block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START ENTRY } |
|---|
| 35 | # indentless_sequence: { ENTRY } |
|---|
| 36 | # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } |
|---|
| 37 | # flow_sequence: { FLOW-SEQUENCE-START } |
|---|
| 38 | # flow_mapping: { FLOW-MAPPING-START } |
|---|
| 39 | # flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } |
|---|
| 40 | # flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } |
|---|
| 41 | |
|---|
| [44] | 42 | from scanner import * |
|---|
| 43 | |
|---|
| 44 | class Error(Exception): |
|---|
| 45 | pass |
|---|
| 46 | |
|---|
| 47 | class Node: |
|---|
| 48 | def __repr__(self): |
|---|
| 49 | args = [] |
|---|
| 50 | for attribute in ['anchor', 'tag', 'value']: |
|---|
| 51 | if hasattr(self, attribute): |
|---|
| 52 | args.append(repr(getattr(self, attribute))) |
|---|
| 53 | return "%s(%s)" % (self.__class__.__name__, ', '.join(args)) |
|---|
| 54 | |
|---|
| 55 | class AliasNode(Node): |
|---|
| 56 | def __init__(self, anchor): |
|---|
| 57 | self.anchor = anchor |
|---|
| 58 | |
|---|
| 59 | class ScalarNode(Node): |
|---|
| 60 | def __init__(self, anchor, tag, value): |
|---|
| 61 | self.anchor = anchor |
|---|
| 62 | self.tag = tag |
|---|
| 63 | self.value = value |
|---|
| 64 | |
|---|
| 65 | class SequenceNode(Node): |
|---|
| 66 | def __init__(self, anchor, tag, value): |
|---|
| 67 | self.anchor = anchor |
|---|
| 68 | self.tag = tag |
|---|
| 69 | self.value = value |
|---|
| 70 | |
|---|
| 71 | class MappingNode(Node): |
|---|
| 72 | def __init__(self, anchor, tag, value): |
|---|
| 73 | self.anchor = anchor |
|---|
| 74 | self.tag = tag |
|---|
| 75 | self.value = value |
|---|
| 76 | |
|---|
| [43] | 77 | class Parser: |
|---|
| 78 | |
|---|
| [44] | 79 | def __init__(self, source, data): |
|---|
| 80 | self.scanner = Scanner(source, data) |
|---|
| [43] | 81 | |
|---|
| [44] | 82 | def is_token(self, *choices): |
|---|
| 83 | token = self.scanner.peek_token() |
|---|
| 84 | for choice in choices: |
|---|
| 85 | if isinstance(token, choices): |
|---|
| 86 | return True |
|---|
| 87 | return False |
|---|
| 88 | |
|---|
| 89 | def get_token(self): |
|---|
| 90 | return self.scanner.get_token() |
|---|
| 91 | |
|---|
| 92 | def parse(self): |
|---|
| 93 | return self.parse_stream() |
|---|
| 94 | |
|---|
| [43] | 95 | def parse_stream(self): |
|---|
| 96 | documents = [] |
|---|
| [44] | 97 | if not self.is_token(DirectiveToken, DocumentStartToken, EndToken): |
|---|
| [43] | 98 | documents.append(self.parse_block_node()) |
|---|
| [44] | 99 | while not self.is_token(EndToken): |
|---|
| 100 | while self.is_token(DirectiveToken): |
|---|
| 101 | self.get_token() |
|---|
| 102 | if not self.is_token(DocumentStartToken): |
|---|
| 103 | self.fail('DOCUMENT-START is expected') |
|---|
| 104 | self.get_token() |
|---|
| 105 | if self.is_token(DirectiveToken, |
|---|
| 106 | DocumentStartToken, DocumentEndToken, EndToken): |
|---|
| [43] | 107 | documents.append(None) |
|---|
| 108 | else: |
|---|
| 109 | documents.append(self.parse_block_node()) |
|---|
| [44] | 110 | while self.is_token(DocumentEndToken): |
|---|
| 111 | self.get_token() |
|---|
| 112 | if not self.is_token(EndToken): |
|---|
| 113 | self.fail("END is expected") |
|---|
| 114 | return documents |
|---|
| [43] | 115 | |
|---|
| 116 | def parse_block_node(self): |
|---|
| [44] | 117 | return self.parse_node(block=True) |
|---|
| [43] | 118 | |
|---|
| 119 | def parse_flow_node(self): |
|---|
| [44] | 120 | return self.parse_node() |
|---|
| [43] | 121 | |
|---|
| 122 | def parse_block_node_or_indentless_sequence(self): |
|---|
| [44] | 123 | return self.parse_node(block=True, indentless_sequence=True) |
|---|
| [43] | 124 | |
|---|
| [44] | 125 | def parse_node(self, block=False, indentless_sequence=False): |
|---|
| 126 | if self.is_token(AliasToken): |
|---|
| 127 | token = self.get_token() |
|---|
| 128 | return AliasNode(token.value) |
|---|
| 129 | anchor = None |
|---|
| 130 | tag = None |
|---|
| 131 | if self.is_token(AnchorToken): |
|---|
| 132 | anchor = self.get_token().value |
|---|
| 133 | if self.is_token(TagToken): |
|---|
| 134 | tag = self.get_token().value |
|---|
| 135 | elif self.is_token(TagToken): |
|---|
| 136 | tag = self.get_token().value |
|---|
| 137 | if self.is_token(AnchorToken): |
|---|
| 138 | anchor = self.get_token().value |
|---|
| 139 | if indentless_sequence and self.is_token(EntryToken): |
|---|
| 140 | NodeClass = SequenceNode |
|---|
| 141 | value = self.parse_indentless_sequence() |
|---|
| 142 | else: |
|---|
| 143 | if self.is_token(ScalarToken): |
|---|
| 144 | NodeClass = ScalarNode |
|---|
| 145 | elif self.is_token(BlockSequenceStartToken, FlowSequenceStartToken): |
|---|
| 146 | NodeClass = SequenceNode |
|---|
| 147 | elif self.is_token(BlockMappingStartToken, FlowMappingStartToken): |
|---|
| 148 | NodeClass = MappingNode |
|---|
| 149 | if block: |
|---|
| 150 | value = self.parse_block_content() |
|---|
| 151 | else: |
|---|
| 152 | value = self.parse_flow_content() |
|---|
| 153 | return NodeClass(anchor, tag, value) |
|---|
| 154 | |
|---|
| [43] | 155 | def parse_block_content(self): |
|---|
| [44] | 156 | if self.is_token(ScalarToken): |
|---|
| 157 | return self.get_token().value |
|---|
| 158 | elif self.is_token(BlockSequenceStartToken): |
|---|
| [43] | 159 | return self.parse_block_sequence() |
|---|
| [44] | 160 | elif self.is_token(BlockMappingStartToken): |
|---|
| [43] | 161 | return self.parse_block_mapping() |
|---|
| [44] | 162 | elif self.is_token(FlowSequenceStartToken): |
|---|
| [43] | 163 | return self.parse_flow_sequence() |
|---|
| [44] | 164 | elif self.is_token(FlowMappingStartToken): |
|---|
| [43] | 165 | return self.parse_flow_mapping() |
|---|
| 166 | else: |
|---|
| [44] | 167 | self.fail('block content is expected') |
|---|
| [43] | 168 | |
|---|
| 169 | def parse_flow_content(self): |
|---|
| [44] | 170 | if self.is_token(ScalarToken): |
|---|
| 171 | return self.get_token().value |
|---|
| 172 | elif self.is_token(FlowSequenceStartToken): |
|---|
| [43] | 173 | return self.parse_flow_sequence() |
|---|
| [44] | 174 | elif self.is_token(FlowMappingStartToken): |
|---|
| [43] | 175 | return self.parse_flow_mapping() |
|---|
| 176 | else: |
|---|
| [44] | 177 | self.fail('flow content is expected') |
|---|
| [43] | 178 | |
|---|
| 179 | def parse_block_sequence(self): |
|---|
| 180 | sequence = [] |
|---|
| [44] | 181 | if not self.is_token(BlockSequenceStartToken): |
|---|
| 182 | self.fail('BLOCK-SEQUENCE-START is expected') |
|---|
| 183 | self.get_token() |
|---|
| 184 | while self.is_token(EntryToken): |
|---|
| 185 | self.get_token() |
|---|
| 186 | if not self.is_token(EntryToken, BlockEndToken): |
|---|
| [43] | 187 | sequence.append(self.parse_block_node()) |
|---|
| 188 | else: |
|---|
| 189 | sequence.append(None) |
|---|
| [44] | 190 | if not self.is_token(BlockEndToken): |
|---|
| 191 | self.fail('BLOCK-END is expected') |
|---|
| 192 | self.get_token() |
|---|
| [43] | 193 | return sequence |
|---|
| 194 | |
|---|
| 195 | def parse_indentless_sequence(self): |
|---|
| 196 | sequence = [] |
|---|
| [44] | 197 | while self.is_token(EntryToken): |
|---|
| 198 | self.get_token() |
|---|
| 199 | if not self.is_token(EntryToken): |
|---|
| [43] | 200 | sequence.append(self.parse_block_node()) |
|---|
| 201 | else: |
|---|
| 202 | sequence.append(None) |
|---|
| 203 | return sequence |
|---|
| 204 | |
|---|
| 205 | def parse_block_mapping(self): |
|---|
| 206 | mapping = [] |
|---|
| [44] | 207 | if not self.is_token(BlockMappingStartToken): |
|---|
| 208 | self.fail('BLOCK-MAPPING-START is expected') |
|---|
| 209 | self.get_token() |
|---|
| 210 | while self.is_token(KeyToken, ValueToken): |
|---|
| [43] | 211 | key = None |
|---|
| 212 | value = None |
|---|
| [44] | 213 | if self.is_token(KeyToken): |
|---|
| 214 | self.get_token() |
|---|
| 215 | if not self.is_token(KeyToken, ValueToken, BlockEndToken): |
|---|
| [43] | 216 | key = self.parse_block_node_or_indentless_sequence() |
|---|
| [44] | 217 | if self.is_token(ValueToken): |
|---|
| 218 | self.get_token() |
|---|
| 219 | if not self.is_token(KeyToken, ValueToken, BlockEndToken): |
|---|
| [43] | 220 | value = self.parse_block_node_or_indentless_sequence() |
|---|
| 221 | mapping.append((key, value)) |
|---|
| [44] | 222 | if not self.is_token(BlockEndToken): |
|---|
| 223 | self.fail('BLOCK-END is expected') |
|---|
| 224 | self.get_token() |
|---|
| [43] | 225 | return mapping |
|---|
| 226 | |
|---|
| 227 | def parse_flow_sequence(self): |
|---|
| 228 | sequence = [] |
|---|
| [44] | 229 | if not self.is_token(FlowSequenceStartToken): |
|---|
| 230 | self.fail('FLOW-SEQUENCE-START is expected') |
|---|
| 231 | self.get_token() |
|---|
| 232 | while not self.is_token(FlowSequenceEndToken): |
|---|
| 233 | if self.is_token(KeyToken): |
|---|
| 234 | self.get_token() |
|---|
| [43] | 235 | key = None |
|---|
| 236 | value = None |
|---|
| [44] | 237 | if not self.is_token(ValueToken): |
|---|
| [43] | 238 | key = self.parse_flow_node() |
|---|
| [44] | 239 | if self.is_token(ValueToken): |
|---|
| 240 | self.get_token() |
|---|
| 241 | if not self.is_token(EntryToken, FlowSequenceEndToken): |
|---|
| [43] | 242 | value = self.parse_flow_node() |
|---|
| [44] | 243 | node = MappingNode(None, None, [(key, value)]) |
|---|
| 244 | sequence.append(node) |
|---|
| [43] | 245 | else: |
|---|
| 246 | sequence.append(self.parse_flow_node()) |
|---|
| [44] | 247 | if not self.is_token(EntryToken, FlowSequenceEndToken): |
|---|
| 248 | self.fail("ENTRY or FLOW-SEQUENCE-END are expected") |
|---|
| 249 | if self.is_token(EntryToken): |
|---|
| 250 | self.get_token() |
|---|
| 251 | if not self.is_token(FlowSequenceEndToken): |
|---|
| 252 | self.fail('FLOW-SEQUENCE-END is expected') |
|---|
| 253 | self.get_token() |
|---|
| [43] | 254 | return sequence |
|---|
| 255 | |
|---|
| 256 | def parse_flow_mapping(self): |
|---|
| 257 | mapping = [] |
|---|
| [44] | 258 | if not self.is_token(FlowMappingStartToken): |
|---|
| 259 | self.fail('FLOW-MAPPING-START is expected') |
|---|
| 260 | self.get_token() |
|---|
| 261 | while not self.is_token(FlowMappingEndToken): |
|---|
| 262 | if self.is_token(KeyToken): |
|---|
| 263 | self.get_token() |
|---|
| [43] | 264 | key = None |
|---|
| 265 | value = None |
|---|
| [44] | 266 | if not self.is_token(ValueToken): |
|---|
| [43] | 267 | key = self.parse_flow_node() |
|---|
| [44] | 268 | if self.is_token(ValueToken): |
|---|
| 269 | self.get_token() |
|---|
| 270 | if not self.is_token(EntryToken, FlowMappingEndToken): |
|---|
| [43] | 271 | value = self.parse_flow_node() |
|---|
| 272 | mapping.append((key, value)) |
|---|
| 273 | else: |
|---|
| 274 | mapping.append((self.parse_flow_node(), None)) |
|---|
| [44] | 275 | if not self.is_token(EntryToken, FlowMappingEndToken): |
|---|
| 276 | self.fail("ENTRY or FLOW-MAPPING-END are expected") |
|---|
| 277 | if self.is_token(EntryToken): |
|---|
| 278 | self.get_token() |
|---|
| 279 | if not self.is_token(FlowMappingEndToken): |
|---|
| 280 | self.fail('FLOW-MAPPING-END is expected') |
|---|
| 281 | self.get_token() |
|---|
| [43] | 282 | return mapping |
|---|
| 283 | |
|---|
| [44] | 284 | def fail(self, message): |
|---|
| 285 | marker = self.scanner.peek_token().start_marker |
|---|
| 286 | raise Error(message+':\n'+marker.get_snippet()) |
|---|
| [43] | 287 | |
|---|