Index: /trunk/sandbox/my-parser/parser2_test.py
===================================================================
--- /trunk/sandbox/my-parser/parser2_test.py	(revision 37)
+++ /trunk/sandbox/my-parser/parser2_test.py	(revision 37)
@@ -0,0 +1,829 @@
+
+import unittest
+import parser2
+
+EX1 = r"""
+- Mark McGwire
+- Sammy Sosa
+- Ken Griffey
+"""
+
+TOKENS1 = """
+BLOCK_SEQ_START
+ENTRY SCALAR
+ENTRY SCALAR
+ENTRY SCALAR
+BLOCK_END
+"""
+
+NODES1 = [True, True, True]
+
+EX2 = r"""
+hr:  65    # Home runs
+avg: 0.278 # Batting average
+rbi: 147   # Runs Batted In
+"""
+
+TOKENS2 = """
+BLOCK_MAP_START
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+BLOCK_END
+"""
+
+NODES2 = [(True, True), (True, True), (True, True)]
+
+EX3 = r"""
+american:
+  - Boston Red Sox
+  - Detroit Tigers
+  - New York Yankees
+national:
+  - New York Mets
+  - Chicago Cubs
+  - Atlanta Braves
+"""
+
+TOKENS3 = """
+BLOCK_MAP_START
+KEY SCALAR VALUE
+    BLOCK_SEQ_START
+    ENTRY SCALAR
+    ENTRY SCALAR
+    ENTRY SCALAR
+    BLOCK_END
+KEY SCALAR VALUE
+    BLOCK_SEQ_START
+    ENTRY SCALAR
+    ENTRY SCALAR
+    ENTRY SCALAR
+    BLOCK_END
+BLOCK_END
+"""
+
+NODES3 = [(True, [True, True, True]), (True, [True, True, True])]
+
+EX4 = r"""
+-
+  name: Mark McGwire
+  hr:   65
+  avg:  0.278
+-
+  name: Sammy Sosa
+  hr:   63
+  avg:  0.288
+"""
+
+TOKENS4 = """
+BLOCK_SEQ_START
+ENTRY
+    BLOCK_MAP_START
+    KEY SCALAR VALUE SCALAR
+    KEY SCALAR VALUE SCALAR
+    KEY SCALAR VALUE SCALAR
+    BLOCK_END
+ENTRY
+    BLOCK_MAP_START
+    KEY SCALAR VALUE SCALAR
+    KEY SCALAR VALUE SCALAR
+    KEY SCALAR VALUE SCALAR
+    BLOCK_END
+BLOCK_END
+"""
+
+NODES4 = [[(True, True), (True, True), (True, True)], [(True, True), (True, True), (True, True)]]
+
+EX5 = r"""
+- [name        , hr, avg  ]
+- [Mark McGwire, 65, 0.278]
+- [Sammy Sosa  , 63, 0.288]
+"""
+
+TOKENS5 = """
+BLOCK_SEQ_START
+ENTRY FLOW_SEQ_START SCALAR ENTRY SCALAR ENTRY SCALAR FLOW_SEQ_END
+ENTRY FLOW_SEQ_START SCALAR ENTRY SCALAR ENTRY SCALAR FLOW_SEQ_END
+ENTRY FLOW_SEQ_START SCALAR ENTRY SCALAR ENTRY SCALAR FLOW_SEQ_END
+BLOCK_END
+"""
+
+NODES5 = [[True, True, True], [True, True, True], [True, True, True]]
+
+EX6 = r"""
+Mark McGwire: {hr: 65, avg: 0.278}
+Sammy Sosa: {
+    hr: 63,
+    avg: 0.288
+  }
+"""
+
+TOKENS6 = """
+BLOCK_MAP_START
+KEY SCALAR VALUE
+    FLOW_MAP_START KEY SCALAR VALUE SCALAR ENTRY KEY SCALAR VALUE SCALAR FLOW_MAP_END
+KEY SCALAR VALUE
+    FLOW_MAP_START KEY SCALAR VALUE SCALAR ENTRY KEY SCALAR VALUE SCALAR FLOW_MAP_END
+BLOCK_END    
+"""
+
+NODES6 = [(True, [(True, True), (True, True)]), (True, [(True, True), (True, True)])]
+
+EX7 = r"""
+# Ranking of 1998 home runs
+---
+- Mark McGwire
+- Sammy Sosa
+- Ken Griffey
+
+# Team ranking
+---
+- Chicago Cubs
+- St Louis Cardinals
+"""
+
+TOKENS7 = """
+DOCUMENT_START 
+BLOCK_SEQ_START
+ENTRY SCALAR
+ENTRY SCALAR
+ENTRY SCALAR
+BLOCK_END
+
+DOCUMENT_START 
+BLOCK_SEQ_START
+ENTRY SCALAR
+ENTRY SCALAR
+BLOCK_END
+"""
+
+NODES7 = ([True, True, True], [True, True])
+
+EX8 = r"""
+---
+time: 20:03:20
+player: Sammy Sosa
+action: strike (miss)
+...
+---
+time: 20:03:47
+player: Sammy Sosa
+action: grand slam
+...
+"""
+
+TOKENS8 = """
+DOCUMENT_START
+BLOCK_MAP_START
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+BLOCK_END
+DOCUMENT_END
+
+DOCUMENT_START
+BLOCK_MAP_START
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+BLOCK_END
+DOCUMENT_END
+"""
+
+NODES8 = ([(True, True), (True, True), (True, True)], [(True, True), (True, True), (True, True)])
+
+EX9 = r"""
+---
+hr: # 1998 hr ranking
+  - Mark McGwire
+  - Sammy Sosa
+rbi:
+  # 1998 rbi ranking
+  - Sammy Sosa
+  - Ken Griffey
+"""
+
+TOKENS9 = """
+DOCUMENT_START
+BLOCK_MAP_START
+KEY SCALAR VALUE
+    BLOCK_SEQ_START
+    ENTRY SCALAR
+    ENTRY SCALAR
+    BLOCK_END
+KEY SCALAR VALUE
+    BLOCK_SEQ_START
+    ENTRY SCALAR
+    ENTRY SCALAR
+    BLOCK_END
+BLOCK_END
+"""
+
+NODES9 = [(True, [True, True]), (True, [True, True])]
+
+EX10 = r"""
+---
+hr:
+  - Mark McGwire
+  # Following node labeled SS
+  - &SS Sammy Sosa
+rbi:
+  - *SS # Subsequent occurrence
+  - Ken Griffey
+"""
+
+TOKENS10 = """
+DOCUMENT_START
+BLOCK_MAP_START
+KEY SCALAR VALUE
+    BLOCK_SEQ_START
+    ENTRY SCALAR
+    ENTRY ANCHOR SCALAR
+    BLOCK_END
+KEY SCALAR VALUE
+    BLOCK_SEQ_START
+    ENTRY ALIAS
+    ENTRY SCALAR
+    BLOCK_END
+BLOCK_END
+"""
+
+NODES10 = [(True, [True, True]), (True, ['*', True])]
+
+EX11 = r"""
+? - Detroit Tigers
+  - Chicago cubs
+:
+  - 2001-07-23
+
+? [ New York Yankees,
+    Atlanta Braves ]
+: [ 2001-07-02, 2001-08-12,
+    2001-08-14 ]
+"""
+
+TOKENS11 = """
+BLOCK_MAP_START
+KEY
+    BLOCK_SEQ_START
+    ENTRY SCALAR
+    ENTRY SCALAR
+    BLOCK_END
+VALUE
+    BLOCK_SEQ_START
+    ENTRY SCALAR
+    BLOCK_END
+KEY
+    FLOW_SEQ_START SCALAR ENTRY SCALAR FLOW_SEQ_END
+VALUE
+    FLOW_SEQ_START SCALAR ENTRY SCALAR ENTRY SCALAR FLOW_SEQ_END
+BLOCK_END
+"""
+
+NODES11 = [([True, True], [True]), ([True, True], [True, True, True])]
+
+EX12 = r"""
+---
+# products purchased
+- item    : Super Hoop
+  quantity: 1
+- item    : Basketball
+  quantity: 4
+- item    : Big Shoes
+  quantity: 1
+"""
+
+TOKENS12 = """
+DOCUMENT_START
+BLOCK_SEQ_START
+ENTRY
+    BLOCK_MAP_START
+    KEY SCALAR VALUE SCALAR
+    KEY SCALAR VALUE SCALAR
+    BLOCK_END
+ENTRY
+    BLOCK_MAP_START
+    KEY SCALAR VALUE SCALAR
+    KEY SCALAR VALUE SCALAR
+    BLOCK_END
+ENTRY
+    BLOCK_MAP_START
+    KEY SCALAR VALUE SCALAR
+    KEY SCALAR VALUE SCALAR
+    BLOCK_END
+BLOCK_END
+"""
+
+NODES12 = [[(True, True), (True, True)], [(True, True), (True, True)], [(True, True), (True, True)]]
+
+EX13 = r"""
+# ASCII Art
+--- |
+  \//||\/||
+  // ||  ||__
+"""
+
+TOKENS13 = """
+DOCUMENT_START SCALAR
+"""
+
+NODES13 = True
+
+EX14 = r"""
+---
+  Mark McGwire's
+  year was crippled
+  by a knee injury.
+"""
+
+TOKENS14 = """
+DOCUMENT_START SCALAR
+"""
+
+NODES14 = True
+
+EX15 = r"""
+>
+ Sammy Sosa completed another
+ fine season with great stats.
+
+   63 Home Runs
+   0.288 Batting Average
+
+ What a year!
+"""
+
+TOKENS15 = """
+SCALAR
+"""
+
+NODES15 = True
+
+EX16 = r"""
+name: Mark McGwire
+accomplishment: >
+  Mark set a major league
+  home run record in 1998.
+stats: |
+  65 Home Runs
+  0.278 Batting Average
+"""
+
+TOKENS16 = """
+BLOCK_MAP_START
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+BLOCK_END
+"""
+
+NODES16 = [(True, True), (True, True), (True, True)]
+
+EX17 = r"""
+unicode: "Sosa did fine.\u263A"
+control: "\b1998\t1999\t2000\n"
+hexesc:  "\x13\x10 is \r\n"
+
+single: '"Howdy!" he cried.'
+quoted: ' # not a ''comment''.'
+tie-fighter: '|\-*-/|'
+"""
+
+TOKENS17 = """
+BLOCK_MAP_START
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+BLOCK_END
+"""
+
+NODES17 = [(True, True), (True, True), (True, True), (True, True), (True, True), (True, True)]
+
+EX18 = r"""
+plain:
+  This unquoted scalar
+  spans many lines.
+
+quoted: "So does this
+  quoted scalar.\n"
+"""
+
+TOKENS18 = """
+BLOCK_MAP_START
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+BLOCK_END
+"""
+
+NODES18 = [(True, True), (True, True)]
+
+EX19 = r"""
+canonical: 12345
+decimal: +12,345
+sexagesimal: 3:25:45
+octal: 014
+hexadecimal: 0xC
+"""
+
+TOKENS19 = """
+BLOCK_MAP_START
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+BLOCK_END
+"""
+
+NODES19 = [(True, True), (True, True), (True, True), (True, True), (True, True)]
+
+EX20 = r"""
+canonical: 1.23015e+3
+exponential: 12.3015e+02
+sexagesimal: 20:30.15
+fixed: 1,230.15
+negative infinity: -.inf
+not a number: .NaN
+"""
+
+TOKENS20 = """
+BLOCK_MAP_START
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+BLOCK_END
+"""
+
+NODES20 = [(True, True), (True, True), (True, True), (True, True), (True, True), (True, True)]
+
+EX21 = r"""
+null: ~
+true: y
+false: n
+string: '12345'
+"""
+
+TOKENS21 = """
+BLOCK_MAP_START
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+BLOCK_END
+"""
+
+NODES21 = [(True, True), (True, True), (True, True), (True, True)]
+
+EX22 = r"""
+canonical: 2001-12-15T02:59:43.1Z
+iso8601: 2001-12-14t21:59:43.10-05:00
+spaced: 2001-12-14 21:59:43.10 -5
+date: 2002-12-14
+"""
+
+TOKENS22 = """
+BLOCK_MAP_START
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+BLOCK_END
+"""
+
+NODES22 = [(True, True), (True, True), (True, True), (True, True)]
+
+EX23 = r"""
+---
+not-date: !!str 2002-04-28
+
+picture: !!binary |
+ R0lGODlhDAAMAIQAAP//9/X
+ 17unp5WZmZgAAAOfn515eXv
+ Pz7Y6OjuDg4J+fn5OTk6enp
+ 56enmleECcgggoBADs=
+
+application specific tag: !something |
+ The semantics of the tag
+ above may be different for
+ different documents.
+"""
+
+TOKENS23 = """
+DOCUMENT_START
+BLOCK_MAP_START
+KEY SCALAR VALUE TAG SCALAR
+KEY SCALAR VALUE TAG SCALAR
+KEY SCALAR VALUE TAG SCALAR
+BLOCK_END
+"""
+
+NODES23 = [(True, True), (True, True), (True, True)]
+
+EX24 = r"""
+%TAG ! tag:clarkevans.com,2002:
+--- !shape
+  # Use the ! handle for presenting
+  # tag:clarkevans.com,2002:circle
+- !circle
+  center: &ORIGIN {x: 73, y: 129}
+  radius: 7
+- !line
+  start: *ORIGIN
+  finish: { x: 89, y: 102 }
+- !label
+  start: *ORIGIN
+  color: 0xFFEEBB
+  text: Pretty vector drawing.
+"""
+
+TOKENS24 = """
+DIRECTIVE
+DOCUMENT_START TAG
+BLOCK_SEQ_START
+ENTRY TAG
+    BLOCK_MAP_START
+    KEY SCALAR VALUE ANCHOR
+        FLOW_MAP_START KEY SCALAR VALUE SCALAR ENTRY KEY SCALAR VALUE SCALAR FLOW_MAP_END
+    KEY SCALAR VALUE SCALAR
+    BLOCK_END
+ENTRY TAG
+    BLOCK_MAP_START
+    KEY SCALAR VALUE ALIAS
+    KEY SCALAR VALUE
+        FLOW_MAP_START KEY SCALAR VALUE SCALAR ENTRY KEY SCALAR VALUE SCALAR FLOW_MAP_END
+    BLOCK_END
+ENTRY TAG
+    BLOCK_MAP_START
+    KEY SCALAR VALUE ALIAS
+    KEY SCALAR VALUE SCALAR
+    KEY SCALAR VALUE SCALAR
+    BLOCK_END
+BLOCK_END
+"""
+
+NODES24 = [[(True, [(True, True), (True, True)]), (True, True)],
+    [(True, '*'), (True, [(True, True), (True, True)])],
+    [(True, '*'), (True, True), (True, True)]]
+
+EX25 = r"""
+# sets are represented as a
+# mapping where each key is
+# associated with the empty string
+--- !!set
+? Mark McGwire
+? Sammy Sosa
+? Ken Griff
+"""
+
+TOKENS25 = """
+DOCUMENT_START TAG
+BLOCK_MAP_START
+KEY SCALAR
+KEY SCALAR
+KEY SCALAR
+BLOCK_END
+"""
+
+NODES25 = [(True, None), (True, None), (True, None)]
+
+EX26 = r"""
+# ordered maps are represented as
+# a sequence of mappings, with
+# each mapping having one key
+--- !!omap
+- Mark McGwire: 65
+- Sammy Sosa: 63
+- Ken Griffy: 58
+"""
+
+TOKENS26 = """
+DOCUMENT_START TAG
+BLOCK_SEQ_START
+ENTRY
+    BLOCK_MAP_START
+    KEY SCALAR VALUE SCALAR
+    BLOCK_END
+ENTRY
+    BLOCK_MAP_START
+    KEY SCALAR VALUE SCALAR
+    BLOCK_END
+ENTRY
+    BLOCK_MAP_START
+    KEY SCALAR VALUE SCALAR
+    BLOCK_END
+BLOCK_END
+"""
+
+NODES26 = [[(True, True)], [(True, True)], [(True, True)]]
+
+EX27 = r"""
+--- !<tag:clarkevans.com,2002:invoice>
+invoice: 34843
+date   : 2001-01-23
+bill-to: &id001
+    given  : Chris
+    family : Dumars
+    address:
+        lines: |
+            458 Walkman Dr.
+            Suite #292
+        city    : Royal Oak
+        state   : MI
+        postal  : 48046
+ship-to: *id001
+product:
+    - sku         : BL394D
+      quantity    : 4
+      description : Basketball
+      price       : 450.00
+    - sku         : BL4438H
+      quantity    : 1
+      description : Super Hoop
+      price       : 2392.00
+tax  : 251.42
+total: 4443.52
+comments:
+    Late afternoon is best.
+    Backup contact is Nancy
+    Billsmer @ 338-4338.
+"""
+
+TOKENS27 = """
+DOCUMENT_START TAG
+BLOCK_MAP_START
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE ANCHOR
+    BLOCK_MAP_START
+    KEY SCALAR VALUE SCALAR
+    KEY SCALAR VALUE SCALAR
+    KEY SCALAR VALUE
+        BLOCK_MAP_START
+        KEY SCALAR VALUE SCALAR
+        KEY SCALAR VALUE SCALAR
+        KEY SCALAR VALUE SCALAR
+        KEY SCALAR VALUE SCALAR
+        BLOCK_END
+    BLOCK_END
+KEY SCALAR VALUE ALIAS
+KEY SCALAR VALUE
+    BLOCK_SEQ_START
+    ENTRY
+        BLOCK_MAP_START
+        KEY SCALAR VALUE SCALAR
+        KEY SCALAR VALUE SCALAR
+        KEY SCALAR VALUE SCALAR
+        KEY SCALAR VALUE SCALAR
+        BLOCK_END
+    ENTRY
+        BLOCK_MAP_START
+        KEY SCALAR VALUE SCALAR
+        KEY SCALAR VALUE SCALAR
+        KEY SCALAR VALUE SCALAR
+        KEY SCALAR VALUE SCALAR
+        BLOCK_END
+    BLOCK_END
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+BLOCK_END
+"""
+
+NODES27 = [
+    (True, True), (True, True), (True, [(True, True), (True, True), (True, [(True, True), (True, True), (True, True), (True, True)])]), (True, '*'),
+    (True, [[(True, True), (True, True), (True, True), (True, True)], [(True, True), (True, True), (True, True), (True, True)]]), (True, True), (True, True), (True, True),
+]
+
+EX28 = r"""
+---
+Time: 2001-11-23 15:01:42 -5
+User: ed
+Warning:
+  This is an error message
+  for the log file
+---
+Time: 2001-11-23 15:02:31 -5
+User: ed
+Warning:
+  A slightly different error
+  message.
+---
+Date: 2001-11-23 15:03:17 -5
+User: ed
+Fatal:
+  Unknown variable "bar"
+Stack:
+  - file: TopClass.py
+    line: 23
+    code: |
+      x = MoreObject("345\n")
+  - file: MoreClass.py
+    line: 58
+    code: |-
+      foo = bar
+"""
+
+TOKENS28 = """
+DOCUMENT_START
+BLOCK_MAP_START
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+BLOCK_END
+
+DOCUMENT_START
+BLOCK_MAP_START
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+BLOCK_END
+
+DOCUMENT_START
+BLOCK_MAP_START
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE SCALAR
+KEY SCALAR VALUE
+    BLOCK_SEQ_START
+    ENTRY
+        BLOCK_MAP_START
+        KEY SCALAR VALUE SCALAR
+        KEY SCALAR VALUE SCALAR
+        KEY SCALAR VALUE SCALAR
+        BLOCK_END
+    ENTRY
+        BLOCK_MAP_START
+        KEY SCALAR VALUE SCALAR
+        KEY SCALAR VALUE SCALAR
+        KEY SCALAR VALUE SCALAR
+        BLOCK_END
+    BLOCK_END
+BLOCK_END
+"""
+
+NODES28 = (
+    [(True, True), (True, True), (True, True)], [(True, True), (True, True), (True, True)],
+    [(True, True), (True, True), (True, True), (True, [[(True, True), (True, True), (True, True)], [(True, True), (True, True), (True, True)]])],
+)
+
+MAX_TESTS = 100
+
+class TestParser2(unittest.TestCase):
+
+    def _testTokens(self, index, EX, TOKENS):
+        try:
+            tokens = None
+            scanner = parser2.Scanner()
+            tokens = scanner.scan('EX'+str(index), EX)
+            self.failUnlessEqual(tokens, TOKENS.split())
+        except:
+            print "EXAMPLE #%s" % index
+            print "EX:"
+            print EX
+            print "TOKENS:"
+            print TOKENS
+            print "RESULT:", tokens
+            print "EXPECT:", TOKENS.split()
+            raise
+
+    def _testNodes(self, index, EX, NODES):
+        try:
+            nodes = None
+            parser = parser2.Parser()
+            nodes = parser.parse('EX'+str(index), EX)
+            self.failUnlessEqual(nodes, NODES)
+        except:
+            print "EXAMPLE #%s" % index
+            print "EX:"
+            print EX
+            print "RESULT:", nodes
+            print "EXPECT:", NODES
+            raise
+
+    @classmethod
+    def add_tests(cls, test_method_name, *tests):
+        for index in range(1, MAX_TESTS):
+            args = []
+            for name in tests:
+                if name+str(index) in globals():
+                    args.append(globals()[name+str(index)])
+                else:
+                    break
+            else:
+                def test_method(self, index=index, args=args):
+                    getattr(self, '_'+test_method_name)(index, *args)
+                test_method.__name__ = '%s%02d' % (test_method_name, index)
+                setattr(cls, test_method.__name__, test_method)
+
+TestParser2.add_tests('testTokens', 'EX', 'TOKENS')
+TestParser2.add_tests('testNodes', 'EX', 'NODES')
+
+if __name__ == '__main__':
+    unittest.main()
+
Index: /trunk/sandbox/my-parser/parser2.py
===================================================================
--- /trunk/sandbox/my-parser/parser2.py	(revision 37)
+++ /trunk/sandbox/my-parser/parser2.py	(revision 37)
@@ -0,0 +1,731 @@
+# Tokens:
+# YAML_DIRECTIVE: ^ '%' YAML ' '+ (version: \d+ '.' \d+) s-l-comments
+# TAG_DIRECTIVE: ^ % TAG ' '+ (handle: '!' (word-char* '!')? )  (prefix: uri-char+) s-l-comments
+# RESERVED_DIRECTIVE: ^ '%' (directive-name: ns-char+) (' '+ (directive-parameter: ns-char+))* s-l-comments
+# DOCUMENT_START: ^ '---' (' ' | b-any)
+# DOCUMENT_END: ^ ... (' ' | b-any)
+# TAG: '!' ( ('<' uri-char+ '>') | uri-char* ) (' ' | b-any)
+# ANCHOR: '&' ns-char+      <-- bug
+# ALIAS: * ns-char+         <-- bug
+# ENTRY(block): '-' (' ' | b-any)
+# KEY(block): '?' (' ' | b-any)
+# VALUE(block): ':' (' ' | b-any)
+# FLOW_SEQ_START: '['
+# FLOW_SEQ_END: ']'
+# FLOW_MAP_START: '{'
+# FLOW_MAP_END: '}'
+# KEY(flow): '?'
+# VALUE(flow): ':'
+# ENTRY(flow): ','
+# PLAIN: (plain-char - indicator) | ([-?:] plain-char) ...  <-- bugs
+# DOUBLE_QUOTED: '"' ...
+# SINGLE_QUOTED: ''' ...
+# LITERAL: '|' ...
+# FOLDED: '>' ...
+# BLOCK_SEQ_START: indentation before '-'.
+# BLOCK_MAP_START: indentation before '?' or a simple key.
+# BLOCK_END: no indentation
+# LINE: end of line
+
+# b-generic: \r \n | \r | \n | #x85
+# b-specific: #x2028 | #x2029
+# b-any: b-generic | b-specific
+# hex-digit: [0-9A-Fa-f]
+# word-char: [0-9A-Za-z-]
+# uri-char: word-char | % hex-digit hex-digit | [;/?:@&=+$,_.!~*'()[]]
+
+# Production rules:
+# stream :== implicit_document? explicit_document* END
+# explicit_document :== DIRECTIVE* DOCUMENT_START block_node? DOCUMENT_END?
+# implicit_document :== block_node DOCUMENT_END?
+# block_node :== ALIAS | properties? block_content
+# flow_node :== ALIAS | properties? flow_content
+# properties :== TAG ANCHOR? | ANCHOR TAG?
+# block_content :== block_collection | flow_collection | SCALAR
+# flow_content :== flow_collection | SCALAR
+# block_collection :== block_sequence | block_mapping
+# block_sequence :== BLOCK_SEQ_START (ENTRY block_node?)* BLOCK_END
+# block_mapping :== BLOCK_MAP_START ((KEY block_node_or_indentless_sequence?)? (VALUE block_node_or_indentless_sequence?)?)* BLOCK_END
+# block_node_or_indentless_sequence :== ALIAS | properties? (block_content | indentless_block_sequence)
+# indentless_block_sequence :== (ENTRY block_node?)+
+# flow_collection :== flow_sequence | flow_mapping
+# flow_sequence :== FLOW_SEQ_START (flow_sequence_entry ENTRY)* flow_sequence_entry? FLOW_SEQ_END
+# flow_sequence_entry :== flow_node | KEY flow_node (VALUE flow_node?)?
+# flow_mapping :== FLOW_MAP_START flow_mapping_entry ENTRY)* flow_mapping_entry? FLOW_MAP_END
+# flow_mapping_entry :== flow_node | KEY flow_node (VALUE flow_node?)?
+
+# FIRST(rule) sets:
+# stream: {}
+# explicit_document: { DIRECTIVE DOCUMENT_START }
+# implicit_document: block_node
+# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK_SEQ_START BLOCK_MAP_START FLOW_SEQ_START FLOW_MAP_START }
+# flow_node: { ALIAS TAG ANCHOR SCALAR FLOW_SEQ_START FLOW_MAP_START }
+# block_content: { BLOCK_SEQ_START BLOCK_MAP_START FLOW_SEQ_START FLOW_MAP_START SCALAR }
+# flow_content: { FLOW_SEQ_START FLOW_MAP_START SCALAR }
+# block_collection: { BLOCK_SEQ_START BLOCK_MAP_START }
+# flow_collection: { FLOW_SEQ_START FLOW_MAP_START }
+# block_sequence: { BLOCK_SEQ_START }
+# block_mapping: { BLOCK_MAP_START }
+# block_node_or_indentless_sequence: { ALIAS TAG ANCHOR SCALAR BLOCK_SEQ_START BLOCK_MAP_START FLOW_SEQ_START FLOW_MAP_START ENTRY }
+# indentless_sequence: { ENTRY }
+# flow_collection: { FLOW_SEQ_START FLOW_MAP_START }
+# flow_sequence: { FLOW_SEQ_START }
+# flow_mapping: { FLOW_MAP_START }
+# flow_sequence_entry: { ALIAS TAG ANCHOR SCALAR FLOW_SEQ_START FLOW_MAP_START KEY }
+# flow_mapping_entry: { ALIAS TAG ANCHOR SCALAR FLOW_SEQ_START FLOW_MAP_START KEY }
+
+class Marker(object):
+
+    def __init__(self, source, data, index, length=0):
+        self.source = source
+        self.data = data
+        self.index = index
+        self.length = length
+        self._line = None
+        self._position = None
+
+    def line(self):
+        if not self._line:
+            self._make_line_position()
+        return self._line
+
+    def position(self):
+        if not self._position:
+            self._make_line_position()
+        return self._position
+
+    def _make_line_position(self):
+        allow_block_collection = self.data.rfind('\n', 0, self.index)+1
+        line_end = self.data.find('\n', self.index)+1
+        if line_end == 0:
+            line_end = len(self.data)
+        self._line = (allow_block_collection, line_end)
+        row = self.data.count('\n', 0, allow_block_collection)
+        col = self.index-allow_block_collection
+        self._position = (row, col)
+
+class Error(Exception):
+
+    def __init__(self, message=None, marker=None):
+        Exception.__init__(self)
+        self.message = message
+        self.marker = marker
+
+    def __str__(self):
+        if self.marker is not None:
+            row, col = self.marker.position()
+            start, end = self.marker.line()
+            error_position = "source \"%s\", line %s, column %s:\n%s\n"  \
+                    % (self.marker.source, row+1, col+1, self.marker.data[start:end].rstrip().encode('utf-8'))
+            error_pointer = " " * col + "^\n"
+        else:
+            error_position = ""
+            error_pointer = ""
+        if self.message is not None:
+            error_message = self.message
+        else:
+            error_message = "YAML error"
+        return error_position+error_pointer+error_message
+
+class Scanner:
+
+    def scan(self, source, data):
+        self.source = source
+        self.data = data
+        self.flow_level = 0
+        self.indents = []
+        self.indent = -1
+        self.index = 0
+        self.line = 0
+        self.column = 0
+        self.allow_block_collection = True
+        self.guess_simple_key = False
+        self.guess_simple_key_token = None
+        self.guess_simple_key_indent = None
+        self.allow_flow_key = False
+        self.guess_flow_key_levels = []
+        self.guess_flow_key_tokens = []
+        self.tokens = []
+        while self.eat_ignored() or self.fetch_token():
+            pass
+        return self.tokens
+
+    def eat_ignored(self):
+        result = False
+        while self.eat_ignored_spaces() or self.eat_ignored_comment() or self.eat_ignored_newline():
+            result = True
+        return result
+
+    def eat_ignored_spaces(self):
+        result = False
+        while self.index < len(self.data) and self.data[self.index] == ' ':
+            self.index += 1
+            self.column += 1
+            result = True
+        return result
+
+    def eat_ignored_comment(self):
+        if self.index < len(self.data) and self.data[self.index] == '#':
+            self.eat_line()
+        return False
+
+    def eat_line(self):
+        result = False
+        while self.index < len(self.data) and self.data[self.index] not in '\r\n':
+            self.index += 1
+            self.column += 1
+            result = True
+        return result
+
+    def eat_ignored_newline(self):
+        if self.index < len(self.data) and self.data[self.index] in '\r\n':
+            if self.data[self.index:self.index+2] == '\r\n':
+                self.index += 2
+            else:
+                self.index += 1
+            self.line += 1
+            self.column = 0
+            self.allow_block_collection = True
+            return True
+        return False
+
+    def eat_ns(self):
+        result = False
+        while self.index < len(self.data) and self.data[self.index] not in ' \t\r\n':
+            self.index += 1
+            self.column += 1
+            result = True
+        return result
+
+    def eat_indent(self, indent=0):
+        if indent < self.indent:
+            indent = self.indent
+        if self.column != 0:
+            return False
+        count = 0
+        while self.index < len(self.data) and self.data[self.index] == ' ' and count < indent:
+            self.index += 1
+            self.column += 1
+            count += 1
+        return count == indent
+
+    def eat_double_quoted(self):
+        if self.index < len(self.data) and self.data[self.index] == '"':
+            self.index += 1
+            self.column += 1
+            while self.index < len(self.data) and self.data[self.index] != '"':
+                if self.data[self.index:self.index+2] in ['\\\\', '\\"']:
+                    self.index += 2
+                    self.column += 2
+                elif self.data[self.index] in '\r\n':
+                    self.eat_ignored_newline()
+                    if not self.eat_indent(1):
+                        self.error("Invalid indentation")
+                else:
+                    self.index += 1
+                    self.column += 1
+            if self.index < len(self.data) and self.data[self.index] == '"':
+                self.index += 1
+                self.column += 1
+                return True
+            else:
+                self.error("unclosed double quoted scalar")
+        else:
+            return False
+
+    def eat_single_quoted(self):
+        if self.index < len(self.data) and self.data[self.index] == '\'':
+            self.index += 1
+            self.column += 1
+            while self.index < len(self.data) and   \
+                    (self.data[self.index] != '\'' or self.data[self.index:self.index+2] == '\'\''):
+                if self.data[self.index:self.index+2] == '\'\'':
+                    self.index += 2
+                    self.column += 2
+                elif self.data[self.index] in '\r\n':
+                    self.eat_ignored_newline()
+                    if not self.eat_indent(1):
+                        self.error("Invalid indentation")
+                else:
+                    self.index += 1
+                    self.column += 1
+            if self.index < len(self.data) and self.data[self.index] == '\'':
+                self.index += 1
+                self.column += 1
+                return True
+            else:
+                self.error("unclosed single quoted scalar")
+        else:
+            return False
+
+    def eat_folded(self):
+        self.eat_block_scalar()
+
+    def eat_literal(self):
+        self.eat_block_scalar()
+
+    def eat_block_scalar(self):
+        if self.index < len(self.data) and self.data[self.index] in '>|':
+            self.eat_line()
+            if not self.eat_ignored_newline():
+                return True
+            indent = self.indent+1
+            if indent < 1:
+                indent = 1
+            while (self.eat_indent(indent) and ((self.eat_line() and self.eat_ignored_newline()) or (self.eat_ignored_newline()))) or  \
+                    (self.eat_ignored_comment() and self.eat_ignored_newline()) or  \
+                    self.eat_ignored_newline():
+                pass
+            return True
+        return False
+
+    def eat_block_plain(self):
+        return self.eat_plain(block=True)
+
+    def eat_flow_plain(self):
+        return self.eat_plain(block=False)
+
+    def eat_plain(self, block):
+        indent = self.indent+1
+        if indent < 1:
+            indent = 1
+        if self.index < len(self.data):
+            if self.data[self.index] not in ' \t\r\n-?:,[]{}#&*!|>\'"%@`' or    \
+                    (block and self.data[self.index] == '-' and self.data[self.index:self.index+2] not in ['-', '- ', '-\r', '-\n']) or \
+                    (block and self.data[self.index] == '?' and self.data[self.index:self.index+2] not in ['?', '? ', '?\r', '?\n']) or \
+                    (block and self.data[self.index] == ':' and self.data[self.index:self.index+2] not in [':', ': ', ':\r', ':\n']):
+                if block and self.allow_block_collection:
+                    self.guessing_simple_key()
+                if self.flow_level and self.allow_flow_key:
+                    self.guess_flow_key_levels.append(self.flow_level)
+                    self.guess_flow_key_tokens.append(len(self.tokens))
+                self.allow_flow_key = False
+                self.index += 1
+                self.column += 1
+                space = False
+                while True:
+                    self.eat_ignored_spaces()
+                    while self.index < len(self.data) and (
+                            self.data[self.index] not in '\r\n?:,[]{}#' or
+                            (not space and self.data[self.index] == '#') or
+                            (block and self.data[self.index] in '?,[]{}') or
+                            (block and self.data[self.index] == ':' and self.data[self.index:self.index+2] not in [':', ': ', ':\r', ':\n'])):
+                        space = self.data[self.index] not in ' \t'
+                        self.index += 1
+                        self.column += 1
+                        self.allow_block_collection = False
+                    if not (self.eat_ignored_newline() and self.eat_indent(indent)):
+                        break
+                    space = True
+                return True
+        return False
+
+    def no_simple_key(self):
+        self.guess_simple_key = False
+        self.guess_simple_key_token = None
+        self.guess_simple_key_indent = None
+
+    def guessing_simple_key(self):
+        self.guess_simple_key = True
+        self.guess_simple_key_token = len(self.tokens)
+        self.guess_simple_key_indent = self.column
+
+    def unwind_indents(self, level):
+        while self.indent > level:
+            if self.flow_level:
+                self.error("Invalid indentation")
+            self.tokens.append('BLOCK_END')
+            self.indent = self.indents.pop()
+            self.no_simple_key()
+
+    def fetch_token(self):
+        self.unwind_indents(self.column)
+        if self.index < len(self.data):
+            if self.column == 0:
+                if self.data[self.index] == '%':
+                    self.tokens.append('DIRECTIVE')
+                    self.eat_line()
+                    self.no_simple_key()
+                    return True
+                if self.data[self.index:self.index+3] == '---' and  \
+                        (not self.data[self.index+3:self.index+4] or self.data[self.index+3:self.index+4] in ' \r\n'):
+                    self.unwind_indents(-1)
+                    self.tokens.append('DOCUMENT_START')
+                    self.index += 3
+                    self.column += 3
+                    self.allow_block_collection = False
+                    self.allow_flow_key = False
+                    self.guess_flow_keys = []
+                    self.no_simple_key()
+                    return True
+                if self.data[self.index:self.index+3] == '...' and   \
+                        (not self.data[self.index+3:self.index+4] or self.data[self.index+3:self.index+4] in ' \r\n'):
+                    self.unwind_indents(-1)
+                    self.tokens.append('DOCUMENT_END')
+                    self.index += 3
+                    self.column += 3
+                    self.allow_block_collection = False
+                    self.allow_flow_key = False
+                    self.guess_flow_keys = []
+                    self.no_simple_key()
+                    return True
+            if self.data[self.index] in '[]{}':
+                if self.data[self.index] == '[':
+                    self.flow_level += 1
+                    self.allow_flow_key = True
+                    self.tokens.append('FLOW_SEQ_START')
+                elif self.data[self.index] == '{':
+                    self.flow_level += 1
+                    self.allow_flow_key = True
+                    self.tokens.append('FLOW_MAP_START')
+                elif self.data[self.index] == ']':
+                    if not self.flow_level:
+                        self.error("Extra ]")
+                    self.flow_level -= 1
+                    self.allow_flow_key = False
+                    self.tokens.append('FLOW_SEQ_END')
+                else:
+                    if not self.flow_level:
+                        self.error("Extra }")
+                    self.flow_level -= 1
+                    self.allow_flow_key = False
+                    self.tokens.append('FLOW_MAP_END')
+                while self.guess_flow_key_levels and self.guess_flow_key_levels[-1] > self.flow_level:
+                    self.guess_flow_key_levels.pop()
+                    self.guess_flow_key_tokens.pop()
+                self.index += 1
+                self.column += 1
+                self.allow_block_collection = False
+                return True
+            if self.data[self.index] in '!&*':
+                if self.flow_level and self.allow_flow_key:
+                    self.guess_flow_key_levels.append(self.flow_level)
+                    self.guess_flow_key_tokens.append(len(self.tokens))
+                if not self.flow_level and self.allow_block_collection:
+                    self.guessing_simple_key()
+                if self.data[self.index] == '!':
+                    self.tokens.append('TAG')
+                elif self.data[self.index] == '&':
+                    self.tokens.append('ANCHOR')
+                else:
+                    self.tokens.append('ALIAS')
+                self.eat_ns()
+                self.allow_flow_key = False
+                self.allow_block_collection = False
+                return True
+            if self.data[self.index] == '"':
+                if self.flow_level and self.allow_flow_key:
+                    self.guess_flow_key_levels.append(self.flow_level)
+                    self.guess_flow_key_tokens.append(len(self.tokens))
+                if not self.flow_level and self.allow_block_collection:
+                    self.guessing_simple_key()
+                self.tokens.append('SCALAR')
+                self.eat_double_quoted()
+                self.allow_flow_key = False
+                self.allow_block_collection = False
+                return True
+            if self.data[self.index] == '\'':
+                if self.flow_level and self.allow_flow_key:
+                    self.guess_flow_key_levels.append(self.flow_level)
+                    self.guess_flow_key_tokens.append(len(self.tokens))
+                if not self.flow_level and self.allow_block_collection:
+                    self.guessing_simple_key()
+                self.tokens.append('SCALAR')
+                self.eat_single_quoted()
+                self.allow_flow_key = False
+                self.allow_block_collection = False
+                return True
+            if not self.flow_level:
+                if self.data[self.index] in '-?:' and \
+                        (not self.data[self.index+1:self.index+2] or self.data[self.index+1:self.index+2] in ' \r\n'):
+                    if self.guess_simple_key and self.data[self.index] == ':':
+                        self.tokens.insert(self.guess_simple_key_token, 'KEY')
+                        if self.guess_simple_key_indent > self.indent:
+                            self.indents.append(self.indent)
+                            self.indent = self.guess_simple_key_indent
+                            self.tokens.insert(self.guess_simple_key_token, 'BLOCK_MAP_START')
+                        self.tokens.append('VALUE')
+                        self.no_simple_key()
+                        self.index += 1
+                        self.column += 1
+                        self.allow_block_collection = False
+                        return True
+                    else:
+                        if not self.allow_block_collection:
+                            self.error("Block collection should start at the beginning of the line")
+                        if self.column > self.indent:
+                            self.indents.append(self.indent)
+                            self.indent = self.column
+                            if self.data[self.index] == '-':
+                                self.tokens.append('BLOCK_SEQ_START')
+                            else:
+                                self.tokens.append('BLOCK_MAP_START')
+                        if self.data[self.index] == '-':
+                            self.tokens.append('ENTRY')
+                        elif self.data[self.index] == '?':
+                            self.tokens.append('KEY')
+                        else:
+                            self.tokens.append('VALUE')
+                        self.index += 1
+                        self.column += 1
+                        #self.allow_block_collection = False
+                        self.allow_block_collection = True
+                        self.no_simple_key()
+                        return True
+                if self.data[self.index] == '>':
+                    self.no_simple_key()
+                    self.tokens.append('SCALAR')
+                    self.eat_folded()
+                    self.allow_block_collection = True
+                    return True
+                if self.data[self.index] == '|':
+                    self.no_simple_key()
+                    self.tokens.append('SCALAR')
+                    self.eat_literal()
+                    self.allow_block_collection = True
+                    return True
+                if self.eat_block_plain():
+                    self.tokens.append('SCALAR')
+                    return True
+            else:
+                if self.data[self.index] in ',?:':
+                    if self.data[self.index] == ',':
+                        self.tokens.append('ENTRY')
+                        while self.guess_flow_key_levels and self.guess_flow_key_levels[-1] >= self.flow_level:
+                            self.guess_flow_key_levels.pop()
+                            self.guess_flow_key_tokens.pop()
+                        self.allow_flow_key = True
+                    elif self.data[self.index] == '?':
+                        self.tokens.append('KEY')
+                        while self.guess_flow_key_levels and self.guess_flow_key_levels[-1] >= self.flow_level:
+                            self.guess_flow_key_levels.pop()
+                            self.guess_flow_key_tokens.pop()
+                        self.allow_flow_key = False
+                    else:
+                        self.tokens.append('VALUE')
+                        if self.guess_flow_key_levels and self.guess_flow_key_levels[-1] == self.flow_level:
+                            self.guess_flow_key_levels.pop()
+                            index = self.guess_flow_key_tokens.pop()
+                            self.tokens.insert(index, 'KEY')
+                        self.allow_flow_key =False
+                    self.index += 1
+                    self.column += 1
+                    return True
+                if self.eat_flow_plain():
+                    self.tokens.append('SCALAR')
+                    return True
+            self.error("Invalid token")
+        else:
+            self.unwind_indents(-1)
+
+    def error(self, message):
+        raise Error(message, Marker(self.source, self.data, self.index))
+
+class Parser:
+
+    def parse(self, source, data):
+        scanner = Scanner()
+        self.tokens = scanner.scan(source, data)
+        self.tokens.append('END')
+        documents = self.parse_stream()
+        if len(documents) == 1:
+            return documents[0]
+        return documents
+
+    def parse_stream(self):
+        documents = []
+        if self.tokens[0] not in ['DIRECTIVE', 'DOCUMENT_START', 'END']:
+            documents.append(self.parse_block_node())
+        while self.tokens[0] != 'END':
+            while self.tokens[0] == 'DIRECTIVE':
+                self.tokens.pop(0)
+            if self.tokens[0] != 'DOCUMENT_START':
+                self.error('DOCUMENT_START is expected')
+            self.tokens.pop(0)
+            if self.tokens[0] in ['DIRECTIVE', 'DOCUMENT_START', 'DOCUMENT_END', 'END']:
+                documents.append(None)
+            else:
+                documents.append(self.parse_block_node())
+            while self.tokens[0] == 'DOCUMENT_END':
+                self.tokens.pop(0)
+        if self.tokens[0] != 'END':
+            self.error("END is expected")
+        return tuple(documents)
+
+    def parse_block_node(self):
+        if self.tokens[0] == 'ALIAS':
+            self.tokens.pop(0)
+            return '*'
+        if self.tokens[0] == 'TAG':
+            self.tokens.pop(0)
+            if self.tokens[0] == 'ANCHOR':
+                self.tokens.pop(0)
+        elif self.tokens[0] == 'ANCHOR':
+            self.tokens.pop(0)
+            if self.tokens[0] == 'TAG':
+                self.tokens.pop(0)
+        return self.parse_block_content()
+
+    def parse_flow_node(self):
+        if self.tokens[0] == 'ALIAS':
+            self.tokens.pop(0)
+            return '*'
+        if self.tokens[0] == 'TAG':
+            self.tokens.pop(0)
+            if self.tokens[0] == 'ANCHOR':
+                self.tokens.pop(0)
+        elif self.tokens[0] == 'ANCHOR':
+            self.tokens.pop(0)
+            if self.tokens[0] == 'TAG':
+                self.tokens.pop(0)
+        return self.parse_flow_content()
+
+    def parse_block_node_or_indentless_sequence(self):
+        if self.tokens[0] == 'ALIAS':
+            self.tokens.pop(0)
+            return '*'
+        if self.tokens[0] == 'TAG':
+            self.tokens.pop(0)
+            if self.tokens[0] == 'ANCHOR':
+                self.tokens.pop(0)
+        elif self.tokens[0] == 'ANCHOR':
+            self.tokens.pop(0)
+            if self.tokens[0] == 'TAG':
+                self.tokens.pop(0)
+        if self.tokens[0] == 'ENTRY':
+            return self.parse_indentless_sequence(self)
+        return self.parse_block_content()
+
+    def parse_block_content(self):
+        if self.tokens[0] == 'SCALAR':
+            self.tokens.pop(0)
+            return True
+        elif self.tokens[0] == 'BLOCK_SEQ_START':
+            return self.parse_block_sequence()
+        elif self.tokens[0] == 'BLOCK_MAP_START':
+            return self.parse_block_mapping()
+        elif self.tokens[0] == 'FLOW_SEQ_START':
+            return self.parse_flow_sequence()
+        elif self.tokens[0] == 'FLOW_MAP_START':
+            return self.parse_flow_mapping()
+        else:
+            self.error('block content is expected')
+
+    def parse_flow_content(self):
+        if self.tokens[0] == 'SCALAR':
+            self.tokens.pop(0)
+            return True
+        elif self.tokens[0] == 'FLOW_SEQ_START':
+            return self.parse_flow_sequence()
+        elif self.tokens[0] == 'FLOW_MAP_START':
+            return self.parse_flow_mapping()
+        else:
+            self.error('flow content is expected')
+
+    def parse_block_sequence(self):
+        sequence = []
+        if self.tokens[0] != 'BLOCK_SEQ_START':
+            self.error('BLOCK_SEQ_START is expected')
+        self.tokens.pop(0)
+        while self.tokens[0] == 'ENTRY':
+            self.tokens.pop(0)
+            if self.tokens[0] not in ['ENTRY', 'BLOCK_END']:
+                sequence.append(self.parse_block_node())
+            else:
+                sequence.append(None)
+        if self.tokens[0] != 'BLOCK_END':
+            self.error('BLOCK_END is expected')
+        self.tokens.pop(0)
+        return sequence
+
+    def parse_indentless_sequence(self):
+        sequence = []
+        while self.tokens[0] == 'ENTRY':
+            self.tokens.pop(0)
+            if self.tokens[0] not in ['ENTRY']:
+                sequence.append(self.parse_block_node())
+            else:
+                sequence.append(None)
+        return sequence
+
+    def parse_block_mapping(self):
+        mapping = []
+        if self.tokens[0] != 'BLOCK_MAP_START':
+            self.error('BLOCK_MAP_START is expected')
+        self.tokens.pop(0)
+        while self.tokens[0] in ['KEY', 'VALUE']:
+            key = None
+            value = None
+            if self.tokens[0] == 'KEY':
+                self.tokens.pop(0)
+                if self.tokens[0] not in ['KEY', 'VALUE', 'BLOCK_END']:
+                    key = self.parse_block_node_or_indentless_sequence()
+            if self.tokens[0] == 'VALUE':
+                self.tokens.pop(0)
+                if self.tokens[0] not in ['KEY', 'VALUE', 'BLOCK_END']:
+                    value = self.parse_block_node_or_indentless_sequence()
+            mapping.append((key, value))
+        if self.tokens[0] != 'BLOCK_END':
+            self.error('BLOCK_END is expected')
+        self.tokens.pop(0)
+        return mapping
+
+    def parse_flow_sequence(self):
+        sequence = []
+        if self.tokens[0] != 'FLOW_SEQ_START':
+            self.error('FLOW_SEQ_START is expected')
+        self.tokens.pop(0)
+        while self.tokens[0] != 'FLOW_SEQ_END':
+            if self.tokens[0] == 'KEY':
+                self.tokens.pop(0)
+                key = None
+                value = None
+                if self.tokens[0] != 'VALUE':
+                    key = self.parse_flow_node()
+                if self.tokens[0] == 'VALUE':
+                    self.tokens.pop(0)
+                    if self.tokens[0] not in ['ENTRY', 'FLOW_SEQ_END']:
+                        value = self.parse_flow_node()
+                sequence.append([(key, value)])
+            else:
+                sequence.append(self.parse_flow_node())
+            if self.tokens[0] not in ['ENTRY', 'FLOW_SEQ_END']:
+                self.error("ENTRY or FLOW_SEQ_END is expected")
+            if self.tokens[0] == 'ENTRY':
+                self.tokens.pop(0)
+        if self.tokens[0] != 'FLOW_SEQ_END':
+            self.error('FLOW_SEQ_END is expected')
+        self.tokens.pop(0)
+        return sequence
+
+    def parse_flow_mapping(self):
+        mapping = []
+        if self.tokens[0] != 'FLOW_MAP_START':
+            self.error('FLOW_MAP_START is expected')
+        self.tokens.pop(0)
+        while self.tokens[0] != 'FLOW_MAP_END':
+            if self.tokens[0] == 'KEY':
+                self.tokens.pop(0)
+                key = None
+                value = None
+                if self.tokens[0] != 'VALUE':
+                    key = self.parse_flow_node()
+                if self.tokens[0] == 'VALUE':
+                    self.tokens.pop(0)
+                    if self.tokens[0] not in ['ENTRY', 'FLOW_MAP_END']:
+                        value = self.parse_flow_node()
+                mapping.append((key, value))
+            else:
+                mapping.append((self.parse_flow_node(), None))
+            if self.tokens[0] not in ['ENTRY', 'FLOW_MAP_END']:
+                self.error("ENTRY or FLOW_MAP_END is expected")
+            if self.tokens[0] == 'ENTRY':
+                self.tokens.pop(0)
+        if self.tokens[0] != 'FLOW_MAP_END':
+            self.error('FLOW_MAP_END is expected')
+        self.tokens.pop(0)
+        return mapping
+
+    def error(self, message):
+        raise Error(message+': '+str(self.tokens))
+
Index: /trunk/sandbox/my-parser/canonical.py
===================================================================
--- /trunk/sandbox/my-parser/canonical.py	(revision 37)
+++ /trunk/sandbox/my-parser/canonical.py	(revision 37)
@@ -0,0 +1,363 @@
+
+import re, unittest
+
+class Marker(object):
+
+    def __init__(self, source, data, index, length=0):
+        self.source = source
+        self.data = data
+        self.index = index
+        self.length = length
+        self._line = None
+        self._position = None
+
+    def line(self):
+        if not self._line:
+            self._make_line_position()
+        return self._line
+
+    def position(self):
+        if not self._position:
+            self._make_line_position()
+        return self._position
+
+    def _make_line_position(self):
+        line_start = self.data.rfind('\n', 0, self.index)+1
+        line_end = self.data.find('\n', self.index)+1
+        if line_end == 0:
+            line_end = len(self.data)
+        self._line = (line_start, line_end)
+        row = self.data.count('\n', 0, line_start)
+        col = self.index-line_start
+        self._position = (row, col)
+
+class Error(Exception):
+
+    def __init__(self, message=None, marker=None):
+        Exception.__init__(self)
+        self.message = message
+        if isinstance(marker, list):
+            if marker:
+                marker = marker[0].marker
+            else:
+                marker = None
+        self.marker = marker
+
+    def __str__(self):
+        if self.marker is not None:
+            row, col = self.marker.position()
+            start, end = self.marker.line()
+            error_position = "source \"%s\", line %s, column %s:\n%s\n"  \
+                    % (self.marker.source, row+1, col+1, self.marker.data[start:end].rstrip().encode('utf-8'))
+            error_pointer = " " * col + "^\n"
+        else:
+            error_position = ""
+            error_pointer = ""
+        if self.message is not None:
+            error_message = self.message
+        else:
+            error_message = "YAML error"
+        return error_position+error_pointer+error_message
+
+def scanner_rule(pattern):
+    def make(function):
+        function.pattern = pattern
+        return function
+    return make
+
+class Token:
+
+    def __init__(self, name, value, marker=None):
+        self.name = name
+        self.value = value
+        self.marker = marker
+
+class YAMLScanner:
+
+    @scanner_rule(r"\s+")
+    def WHITESPACE(self, tokens, token):
+        pass
+            
+    @scanner_rule(r"%YAML")
+    def DIRECTIVE_NAME(self, tokens, token):
+        tokens.append(token)
+
+    @scanner_rule(r"\d+\.\d+")
+    def DIRECTIVE_VALUE(self, tokens, token):
+        token.value = float(token.value)
+        tokens.append(token)
+
+    @scanner_rule(r"---")
+    def DOCUMENT_SEPARATOR(self, tokens, token):
+        tokens.append(token)
+
+    @scanner_rule(r"\[")
+    def SEQ_START(self, tokens, token):
+        tokens.append(token)
+
+    @scanner_rule(r"\]")
+    def SEQ_END(self, tokens, token):
+        tokens.append(token)
+
+    @scanner_rule(r"\{")
+    def MAP_START(self, tokens, token):
+        tokens.append(token)
+
+    @scanner_rule(r"\}")
+    def MAP_END(self, tokens, token):
+        tokens.append(token)
+
+    @scanner_rule(r"\?")
+    def MAP_KEY(self, tokens, token):
+        tokens.append(token)
+
+    @scanner_rule(r":")
+    def MAP_VALUE(self, tokens, token):
+        tokens.append(token)
+
+    @scanner_rule(r",")
+    def COLL_ENTRY(self, tokens, token):
+        tokens.append(token)
+
+    @scanner_rule(r"!\S*")
+    def TAG(self, tokens, token):
+        if token.value == "!":
+            token.value = ""
+        elif token.value.startswith(r"!<") and token.value.endswith(r">"):
+            token.value = token.value[2:-1]
+        elif token.value.startswith(r"!!"):
+            token.value = "tag:yaml.org,2002:" + token.value[2:]
+        tokens.append(token)
+
+    escapes_re = re.compile(r"\\(?P<value>[\\\"abefnrtvNLP_0 ]|x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})", re.U)
+    escapes = {
+        "\\": u"\\",
+        "\"": u"\"",
+        " ": u" ",
+        "a": u"\x07",
+        "b": u"\x08",
+        "e": u"\x1B",
+        "f": u"\x0C",
+        "n": u"\x0A",
+        "r": u"\x0D",
+        "t": u"\x09",
+        "v": u"\x0B",
+        "N": u"\u0085",
+        "L": u"\u2028",
+        "P": u"\u2029",
+        "_": u"_",
+        "0": u"\x00",
+    }
+
+    def escapes_replace(self, match):
+        value = match.group('value')
+        if len(value) == 1:
+            return self.escapes[value]
+        else:
+            return unichr(int(value[1:], 16))
+
+    @scanner_rule(r"\"(?:[^\"\\]|\\[\\\"abefnrtvNLP_0 ]|\\x[0-9A-Fa-f]{2}|\\u[0-9A-Fa-f]{4}|\\U[0-9A-Fa-f]{8})*\"")
+    def SCALAR(self, tokens, token):
+        token.value = self.escapes_re.sub(self.escapes_replace, token.value[1:-1])
+        tokens.append(token)
+
+    @scanner_rule(r"&\S+")
+    def ANCHOR(self, tokens, token):
+        token.value = token.value[1:]
+        tokens.append(token)
+
+    @scanner_rule(r"\*\S+")
+    def ALIAS(self, tokens, token):
+        token.value = token.value[1:]
+        tokens.append(token)
+
+    def __init__(self):
+        rules = []
+        for name, function in vars(self.__class__).items():
+            if hasattr(function, 'pattern'):
+                rules.append((name, function.pattern))
+        patterns = [r'(?P<%s>%s)' % (name, pattern) for name, pattern in rules]
+        self.scanner_re = re.compile('|'.join(patterns), re.U)
+
+    def scan(self, source, data):
+        data = unicode(data, 'utf-8')
+        tokens = []
+        index = 0
+        while index < len(data):
+            match = self.scanner_re.match(data, index)
+            if not match:
+                raise Error("invalid token", Marker(source, data, index))
+            name = match.lastgroup
+            value = match.group()
+            marker = Marker(source, data, index, len(value))
+            token = Token(name, value, marker)
+            processor = getattr(self, name)
+            processor(tokens, token)
+            index += len(value)
+        return tokens
+
+class Value:
+    def __init__(self, tag, anchor, value):
+        self.tag = tag
+        self.anchor = anchor
+        self.value = value
+    def __eq__(self, other):
+        return (self.__class__, self.__dict__) == (other.__class__, other.__dict__)
+
+class Scalar(Value):
+    pass
+
+class Sequence(Value):
+    pass
+
+class Mapping(Value):
+    pass
+
+class Alias:
+    def __init__(self, link):
+        self.link = link
+    def __eq__(self, other):
+        return (self.__class__, self.__dict__) == (other.__class__, other.__dict__)
+
+class YAMLParser:
+
+    # stream: document*
+    def parse_stream(self, tokens):
+        documents = []
+        while tokens:
+            if self.check_token(tokens, ['DIRECTIVE_NAME', 'DOCUMENT_SEPARATOR']):
+                documents.append(self.parse_document(tokens))
+            else:
+                raise Error("document is expected", tokens)
+        return documents
+
+    # document: (DIRECTIVE_NAME DIRECTIVE_VALUE)? DOCUMENT_SEPARATOR node?
+    def parse_document(self, tokens):
+        node = None
+        if self.check_token(tokens, ['DIRECTIVE_NAME']):
+            self.eat_token(tokens, 'DIRECTIVE_NAME')
+            self.eat_token(tokens, 'DIRECTIVE_VALUE')
+        self.eat_token(tokens, 'DOCUMENT_SEPARATOR')
+        if self.check_token(tokens, ['TAG', 'ANCHOR', 'ALIAS', 'SCALAR', 'SEQ_START', 'MAP_START']):
+            node = self.parse_node(tokens)
+        return node
+
+    # node: TAG? ANCHOR? (SCALAR|sequence|mapping) | ALIAS")
+    def parse_node(self, tokens):
+        if self.check_token(tokens, ['ALIAS']):
+            return Alias(self.eat_token(tokens, 'ALIAS'))
+        else:
+            tag = None
+            anchor = None
+            if self.check_token(tokens, ['TAG']):
+                tag = self.eat_token(tokens, 'TAG')
+            if self.check_token(tokens, ['ANCHOR']):
+                anchor = self.eat_token(tokens, 'ANCHOR')
+            if self.check_token(tokens, ['SCALAR']):
+                return Scalar(tag, anchor, self.eat_token(tokens, 'SCALAR'))
+            elif self.check_token(tokens, ['SEQ_START']):
+                return Sequence(tag, anchor, self.parse_sequence(tokens))
+            elif self.check_token(tokens, ['MAP_START']):
+                return Mapping(tag, anchor, self.parse_mapping(tokens))
+            else:
+                raise Error("SCALAR, sequence or mapping is expected", tokens)
+
+    # sequence: SEQ_START (node (COLL_ENTRY node)*)? SEQ_END
+    def parse_sequence(self, tokens):
+        values = []
+        self.eat_token(tokens, 'SEQ_START')
+        if not self.check_token(tokens, ['SEQ_END']):
+            values.append(self.parse_node(tokens))
+            while not self.check_token(tokens, ['SEQ_END']):
+                self.eat_token(tokens, 'COLL_ENTRY')
+                values.append(self.parse_node(tokens))
+        self.eat_token(tokens, 'SEQ_END')
+        return values
+
+    # mapping: MAP_START (map_entry (COLL_ENTRY map_entry)*)? MAP_END
+    def parse_mapping(self, tokens):
+        values = []
+        self.eat_token(tokens, 'MAP_START')
+        if not self.check_token(tokens, ['MAP_END']):
+            values.append(self.parse_map_entry(tokens))
+            while not self.check_token(tokens, ['MAP_END']):
+                self.eat_token(tokens, 'COLL_ENTRY')
+                values.append(self.parse_map_entry(tokens))
+        self.eat_token(tokens, 'MAP_END')
+        return values
+
+    # map_entry: MAP_KEY node MAP_VALUE node
+    def parse_map_entry(self, tokens):
+        self.eat_token(tokens, 'MAP_KEY')
+        key = self.parse_node(tokens)
+        self.eat_token(tokens, 'MAP_VALUE')
+        value = self.parse_node(tokens)
+        return (key, value)
+
+    def check_token(self, tokens, names):
+        return tokens and tokens[0].name in names
+
+    def eat_token(self, tokens, name):
+        if not tokens:
+            raise Error("%s is expected, EOF is found" % name, tokens)
+        if tokens and tokens[0].name != name:
+            raise Error("%s is expected, %s is found" % (name, tokens[0].name), tokens)
+        return tokens.pop(0).value
+
+    def __init__(self):
+        self.scanner = YAMLScanner()
+
+    def parse(self, source, data):
+        tokens = self.scanner.scan(source, data)
+        return self.parse_stream(tokens)
+
+class Test(unittest.TestCase):
+
+    def testScalar(self):
+        parser = YAMLParser()
+        documents = parser.parse('testScalar', """--- !!str "foo"\n""")
+        self.failUnlessEqual(documents, [Scalar('tag:yaml.org,2002:str', None, "foo")])
+
+    def testSequence(self):
+        parser = YAMLParser()
+        documents = parser.parse('testSequence', """%YAML 1.1\n--- !!seq\n["foo", "bar", "baz"]\n""")
+        self.failUnlessEqual(documents, [
+            Sequence('tag:yaml.org,2002:seq', None, [
+                Scalar(None, None, "foo"),
+                Scalar(None, None, "bar"),
+                Scalar(None, None, "baz"),
+            ])
+        ])
+
+    def testMapping(self):
+        parser = YAMLParser()
+        documents = parser.parse('testMapping', """%YAML 1.1\n--- !!map\n{ ? "foo" : "bar", ? "baz" : "bat" }\n""")
+        self.failUnlessEqual(documents, [
+            Mapping('tag:yaml.org,2002:map', None, [
+                (Scalar(None, None, "foo"), Scalar(None, None, "bar")),
+                (Scalar(None, None, "baz"), Scalar(None, None, "bat")),
+            ])
+        ])
+
+    def testAlias(self):
+        parser = YAMLParser()
+        documents = parser.parse('testSequence', """%YAML 1.1\n--- !!seq\n[ &id "foo", *id ]\n""")
+        self.failUnlessEqual(documents, [
+            Sequence('tag:yaml.org,2002:seq', None, [
+                Scalar(None, 'id', "foo"),
+                Alias('id'),
+            ])
+        ])
+
+    def testMultiplyDocuments(self):
+        parser = YAMLParser()
+        documents = parser.parse('testMultiplyDocuments', """%YAML 1.1\n--- "foo"\n--- "bar"\n--- "baz"\n""")
+        self.failUnlessEqual(documents, [
+            Scalar(None, None, "foo"),
+            Scalar(None, None, "bar"),
+            Scalar(None, None, "baz"),
+        ])
+
+if __name__ == '__main__':
+    unittest.main()
+
