Index: /branches/pyyaml3000/tests/test_structure.py
===================================================================
--- /branches/pyyaml3000/tests/test_structure.py	(revision 44)
+++ /branches/pyyaml3000/tests/test_structure.py	(revision 46)
@@ -2,4 +2,6 @@
 import test_appliance
 
+from yaml.reader import Reader
+from yaml.scanner import Scanner
 from yaml.parser import *
 
@@ -10,5 +12,5 @@
         node2 = eval(file(structure_filename, 'rb').read())
         try:
-            parser = Parser(data_filename, file(data_filename, 'rb').read())
+            parser = Parser(Scanner(Reader(file(data_filename, 'rb'))))
             node1 = parser.parse()
             node1 = [self._convert(n) for n in node1]
@@ -50,5 +52,5 @@
         documents2 = None
         try:
-            parser = Parser(data_filename, file(data_filename, 'rb').read())
+            parser = Parser(Scanner(Reader(file(data_filename, 'rb'))))
             documents1 = parser.parse()
             canonical = test_appliance.CanonicalParser(canonical_filename, file(canonical_filename, 'rb').read())
Index: /branches/pyyaml3000/tests/test_marker.py
===================================================================
--- /branches/pyyaml3000/tests/test_marker.py	(revision 45)
+++ /branches/pyyaml3000/tests/test_marker.py	(revision 46)
@@ -2,5 +2,5 @@
 import test_appliance
 
-from yaml.stream import Marker
+from yaml.reader import Marker
 
 class TestMarker(test_appliance.TestAppliance):
Index: /branches/pyyaml3000/tests/test_reader.py
===================================================================
--- /branches/pyyaml3000/tests/test_reader.py	(revision 46)
+++ /branches/pyyaml3000/tests/test_reader.py	(revision 46)
@@ -0,0 +1,44 @@
+
+import test_appliance
+from yaml.reader import Reader, ReaderError
+
+import codecs
+
+class TestReaderErrors(test_appliance.TestAppliance):
+
+    def _testReaderUnicodeErrors(self, test_name, stream_filename):
+        for encoding in ['utf-8', 'utf-16-le', 'utf-16-be']:
+            try:
+                data = unicode(file(stream_filename, 'rb').read(), encoding)
+                break
+            except:
+                pass
+        else:
+            return
+        #self._load(data)
+        self.failUnlessRaises(ReaderError,
+                lambda: self._load(data))
+        #self._load(codecs.open(stream_filename, encoding=encoding))
+        self.failUnlessRaises(ReaderError,
+                lambda: self._load(codecs.open(stream_filename, encoding=encoding)))
+
+    def _testReaderStringErrors(self, test_name, stream_filename):
+        data = file(stream_filename, 'rb').read()
+        #self._load(data)
+        self.failUnlessRaises(ReaderError, lambda: self._load(data))
+
+    def _testReaderFileErrors(self, test_name, stream_filename):
+        data = file(stream_filename, 'rb')
+        #self._load(data)
+        self.failUnlessRaises(ReaderError, lambda: self._load(data))
+
+    def _load(self, data):
+        stream = Reader(data)
+        while stream.peek() != u'\0':
+            stream.forward()
+
+TestReaderErrors.add_tests('testReaderUnicodeErrors', '.stream-error')
+TestReaderErrors.add_tests('testReaderStringErrors', '.stream-error')
+TestReaderErrors.add_tests('testReaderFileErrors', '.stream-error')
+
+
Index: /branches/pyyaml3000/tests/test_yaml.py
===================================================================
--- /branches/pyyaml3000/tests/test_yaml.py	(revision 45)
+++ /branches/pyyaml3000/tests/test_yaml.py	(revision 46)
@@ -3,5 +3,5 @@
 
 from test_marker import *
-from test_stream import *
+from test_reader import *
 from test_canonical import *
 from test_tokens import *
Index: /branches/pyyaml3000/tests/test_tokens.py
===================================================================
--- /branches/pyyaml3000/tests/test_tokens.py	(revision 44)
+++ /branches/pyyaml3000/tests/test_tokens.py	(revision 46)
@@ -2,4 +2,6 @@
 import test_appliance
 
+from yaml.reader import *
+from yaml.tokens import *
 from yaml.scanner import *
 
@@ -51,5 +53,5 @@
         tokens2 = file(tokens_filename, 'rb').read().split()
         try:
-            scanner = Scanner(data_filename, file(data_filename, 'rb').read())
+            scanner = Scanner(Reader(file(data_filename, 'rb')))
             tokens1 = []
             while not isinstance(scanner.peek_token(), EndToken):
@@ -73,5 +75,5 @@
             tokens = None
             try:
-                scanner = Scanner(filename, file(filename, 'rb').read())
+                scanner = Scanner(Reader(file(filename, 'rb')))
                 tokens = []
                 while not isinstance(scanner.peek_token(), EndToken):
Index: anches/pyyaml3000/tests/test_stream.py
===================================================================
--- /branches/pyyaml3000/tests/test_stream.py	(revision 45)
+++ 	(revision )
@@ -1,31 +1,0 @@
-
-import test_appliance
-from yaml.stream import Stream, StreamError
-
-class TestStreamErrors(test_appliance.TestAppliance):
-
-    def _testStreamUnicodeErrors(self, test_name, stream_filename):
-        try:
-            data = unicode(file(stream_filename, 'rb').read(), 'utf-8')
-        except:
-            return
-        self.failUnlessRaises(StreamError, lambda: self._load(stream_filename, data))
-
-    def _testStreamStringErrors(self, test_name, stream_filename):
-        data = file(stream_filename, 'rb').read()
-        self.failUnlessRaises(StreamError, lambda: self._load(stream_filename, data))
-
-    def _testStreamFileErrors(self, test_name, stream_filename):
-        data = file(stream_filename, 'rb')
-        self.failUnlessRaises(StreamError, lambda: self._load(stream_filename, data))
-
-    def _load(self, stream_filename, data):
-        stream = Stream(stream_filename, data)
-        while stream.peek() != u'\0':
-            stream.forward()
-
-TestStreamErrors.add_tests('testStreamUnicodeErrors', '.stream-error')
-TestStreamErrors.add_tests('testStreamStringErrors', '.stream-error')
-TestStreamErrors.add_tests('testStreamFileErrors', '.stream-error')
-
-
Index: /branches/pyyaml3000/lib/yaml/scanner.py
===================================================================
--- /branches/pyyaml3000/lib/yaml/scanner.py	(revision 45)
+++ /branches/pyyaml3000/lib/yaml/scanner.py	(revision 46)
@@ -9,100 +9,20 @@
 # ALIAS(name), ANCHOR(name), TAG(value), SCALAR(value, plain)
 
-
-from marker import Marker
-#from error import YAMLError
-from stream import Stream
-
-#class ScannerError(YAMLError):
-class ScannerError(Exception):
+__all__ = ['Scanner', 'ScannerError']
+
+from error import YAMLError
+from tokens import *
+
+class ScannerError(YAMLError):
+    # TODO:
+    # ScannerError: while reading a quoted string
+    #         in '...', line 5, column 10:
+    # key: "valu\?e"
+    #      ^
+    # got unknown quote character '?'
+    #         in '...', line 5, column 15:
+    # key: "valu\?e"
+    #            ^
     pass
-
-class Token:
-    def __init__(self, start_marker, end_marker):
-        self.start_marker = start_marker
-        self.end_marker = end_marker
-
-class DirectiveToken(Token):
-    pass
-
-class YAMLDirectiveToken(DirectiveToken):
-    def __init__(self, major_version, minor_version, start_marker, end_marker):
-        self.major_version = major_version
-        self.minor_version = minor_version
-        self.start_marker = start_marker
-        self.end_marker = end_marker
-
-class TagDirectiveToken(DirectiveToken):
-    pass
-
-class ReservedDirectiveToken(DirectiveToken):
-    def __init__(self, name, start_marker, end_marker):
-        self.name = name
-        self.start_marker = start_marker
-        self.end_marker = end_marker
-
-class DocumentStartToken(Token):
-    pass
-
-class DocumentEndToken(Token):
-    pass
-
-class EndToken(Token):
-    pass
-
-class BlockSequenceStartToken(Token):
-    pass
-
-class BlockMappingStartToken(Token):
-    pass
-
-class BlockEndToken(Token):
-    pass
-
-class FlowSequenceStartToken(Token):
-    pass
-
-class FlowMappingStartToken(Token):
-    pass
-
-class FlowSequenceEndToken(Token):
-    pass
-
-class FlowMappingEndToken(Token):
-    pass
-
-class KeyToken(Token):
-    pass
-
-class ValueToken(Token):
-    pass
-
-class EntryToken(Token):
-    pass
-
-class AliasToken(Token):
-    def __init__(self, value, start_marker, end_marker):
-        self.value = value
-        self.start_marker = start_marker
-        self.end_marker = end_marker
-
-class AnchorToken(Token):
-    def __init__(self, value, start_marker, end_marker):
-        self.value = value
-        self.start_marker = start_marker
-        self.end_marker = end_marker
-
-class TagToken(Token):
-    def __init__(self, value, start_marker, end_marker):
-        self.value = value
-        self.start_marker = start_marker
-        self.end_marker = end_marker
-
-class ScalarToken(Token):
-    def __init__(self, value, plain, start_marker, end_marker):
-        self.value = value
-        self.plain = plain
-        self.start_marker = start_marker
-        self.end_marker = end_marker
 
 class SimpleKey:
@@ -117,15 +37,16 @@
 class Scanner:
 
-    def __init__(self, source, data):
+
+    def __init__(self, reader):
         """Initialize the scanner."""
-        # The input stream. The Stream class do the dirty work of checking for
+        # The input stream. The Reader class do the dirty work of checking for
         # BOM and converting the input data to Unicode. It also adds NUL to
         # the end.
         #
-        # Stream supports the following methods
-        #   self.stream.peek(k=1)   # peek the next k characters
-        #   self.stream.forward(k=1)   # read the next k characters and move the
+        # Reader supports the following methods
+        #   self.reader.peek(k=1)   # peek the next k characters
+        #   self.reader.forward(k=1)   # read the next k characters and move the
         #                           # pointer
-        self.stream = Stream(source, data)
+        self.reader = reader
 
         # Had we reached the end of the stream?
@@ -218,13 +139,13 @@
         # Compare the current indentation and column. It may add some tokens
         # and decrease the current indentation level.
-        self.unwind_indent(self.stream.column)
+        self.unwind_indent(self.reader.column)
 
         #print
-        #print self.stream.get_marker().get_snippet()
+        #print self.reader.get_marker().get_snippet()
 
         # Peek the next character.
-        ch = self.stream.peek()
-
-        # Is it the end of stream?
+        ch = self.reader.peek()
+
+        # Is it the end of reader?
         if ch == u'\0':
             return self.fetch_end()
@@ -333,6 +254,6 @@
         for level in self.possible_simple_keys.keys():
             key = self.possible_simple_keys[level]
-            if key.line != self.stream.line  \
-                    or self.stream.index-key.index > 1024:
+            if key.line != self.reader.line  \
+                    or self.reader.index-key.index > 1024:
                 if key.required:
                     self.fail("simple key is required")
@@ -345,5 +266,5 @@
 
         # Check if a simple key is required at the current position.
-        required = not self.flow_level and self.indent == self.stream.column
+        required = not self.flow_level and self.indent == self.reader.column
 
         # The next token might be a simple key. Let's save it's number and
@@ -352,8 +273,8 @@
             self.remove_possible_simple_key()
             token_number = self.tokens_taken+len(self.tokens)
-            index = self.stream.index
-            line = self.stream.line
-            column = self.stream.column
-            marker = self.stream.get_marker()
+            index = self.reader.index
+            line = self.reader.line
+            column = self.reader.column
+            marker = self.reader.get_marker()
             key = SimpleKey(token_number, required,
                     index, line, column, marker)
@@ -381,5 +302,5 @@
         # In block context, we may need to issue the BLOCK-END tokens.
         while self.indent > column:
-            marker = self.stream.get_marker()
+            marker = self.reader.get_marker()
             self.indent = self.indents.pop()
             self.tokens.append(BlockEndToken(marker, marker))
@@ -405,10 +326,10 @@
 
         # Read the token.
-        marker = self.stream.get_marker()
+        marker = self.reader.get_marker()
         
         # Add END.
         self.tokens.append(EndToken(marker, marker))
 
-        # The stream is ended.
+        # The reader is ended.
         self.done = True
 
@@ -442,7 +363,7 @@
 
         # Add DOCUMENT-START or DOCUMENT-END.
-        start_marker = self.stream.get_marker()
-        self.stream.forward(3)
-        end_marker = self.stream.get_marker()
+        start_marker = self.reader.get_marker()
+        self.reader.forward(3)
+        end_marker = self.reader.get_marker()
         self.tokens.append(TokenClass(start_marker, end_marker))
 
@@ -465,7 +386,7 @@
 
         # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
-        start_marker = self.stream.get_marker()
-        self.stream.forward()
-        end_marker = self.stream.get_marker()
+        start_marker = self.reader.get_marker()
+        self.reader.forward()
+        end_marker = self.reader.get_marker()
         self.tokens.append(TokenClass(start_marker, end_marker))
 
@@ -488,7 +409,7 @@
 
         # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
-        start_marker = self.stream.get_marker()
-        self.stream.forward()
-        end_marker = self.stream.get_marker()
+        start_marker = self.reader.get_marker()
+        self.reader.forward()
+        end_marker = self.reader.get_marker()
         self.tokens.append(TokenClass(start_marker, end_marker))
 
@@ -503,6 +424,6 @@
 
             # We may need to add BLOCK-SEQUENCE-START.
-            if self.add_indent(self.stream.column):
-                marker = self.stream.get_marker()
+            if self.add_indent(self.reader.column):
+                marker = self.reader.get_marker()
                 self.tokens.append(BlockSequenceStartToken(marker, marker))
 
@@ -514,7 +435,7 @@
 
         # Add ENTRY.
-        start_marker = self.stream.get_marker()
-        self.stream.forward()
-        end_marker = self.stream.get_marker()
+        start_marker = self.reader.get_marker()
+        self.reader.forward()
+        end_marker = self.reader.get_marker()
         self.tokens.append(EntryToken(start_marker, end_marker))
 
@@ -529,6 +450,6 @@
 
             # We may need to add BLOCK-MAPPING-START.
-            if self.add_indent(self.stream.column):
-                marker = self.stream.get_marker()
+            if self.add_indent(self.reader.column):
+                marker = self.reader.get_marker()
                 self.tokens.append(BlockMappingStartToken(marker, marker))
 
@@ -540,7 +461,7 @@
 
         # Add KEY.
-        start_marker = self.stream.get_marker()
-        self.stream.forward()
-        end_marker = self.stream.get_marker()
+        start_marker = self.reader.get_marker()
+        self.reader.forward()
+        end_marker = self.reader.get_marker()
         self.tokens.append(KeyToken(start_marker, end_marker))
 
@@ -576,7 +497,7 @@
 
         # Add VALUE.
-        start_marker = self.stream.get_marker()
-        self.stream.forward()
-        end_marker = self.stream.get_marker()
+        start_marker = self.reader.get_marker()
+        self.reader.forward()
+        end_marker = self.reader.get_marker()
         self.tokens.append(ValueToken(start_marker, end_marker))
 
@@ -667,5 +588,5 @@
         # DIRECTIVE:        ^ '%' ...
         # The '%' indicator is already checked.
-        if self.stream.column == 0:
+        if self.reader.column == 0:
             return True
 
@@ -673,6 +594,6 @@
 
         # DOCUMENT-START:   ^ '---' (' '|'\n')
-        if self.stream.column == 0:
-            prefix = self.stream.peek(4)
+        if self.reader.column == 0:
+            prefix = self.reader.peek(4)
             if prefix[:3] == u'---' and prefix[3] in u'\0 \t\r\n\x85\u2028\u2029':
                 return True
@@ -681,6 +602,6 @@
 
         # DOCUMENT-END:     ^ '...' (' '|'\n')
-        if self.stream.column == 0:
-            prefix = self.stream.peek(4)
+        if self.reader.column == 0:
+            prefix = self.reader.peek(4)
             if prefix[:3] == u'...' and prefix[3] in u'\0 \t\r\n\x85\u2028\u2029':
                 return True
@@ -690,9 +611,9 @@
         # ENTRY(flow context):      ','
         if self.flow_level:
-            return self.stream.peek() == u','
+            return self.reader.peek() == u','
 
         # ENTRY(block context):     '-' (' '|'\n')
         else:
-            prefix = self.stream.peek(2)
+            prefix = self.reader.peek(2)
             return prefix[0] == u'-' and prefix[1] in u'\0 \t\r\n\x85\u2028\u2029'
 
@@ -705,5 +626,5 @@
         # KEY(block context):   '?' (' '|'\n')
         else:
-            prefix = self.stream.peek(2)
+            prefix = self.reader.peek(2)
             return prefix[1] in u'\0 \t\r\n\x85\u2028\u2029'
 
@@ -716,5 +637,5 @@
         # VALUE(block context): ':' (' '|'\n')
         else:
-            prefix = self.stream.peek(2)
+            prefix = self.reader.peek(2)
             return prefix[1] in u'\0 \t\r\n\x85\u2028\u2029'
 
@@ -727,11 +648,11 @@
         found = False
         while not found:
-            while self.stream.peek() == u' ':
-                self.stream.forward()
-            if self.stream.peek() == u'#':
-                while self.stream.peek() not in u'\r\n':
-                    self.stream.forward()
-            if self.stream.peek() in u'\r\n':
-                self.stream.forward()
+            while self.reader.peek() == u' ':
+                self.reader.forward()
+            if self.reader.peek() == u'#':
+                while self.reader.peek() not in u'\r\n':
+                    self.reader.forward()
+            if self.reader.peek() in u'\r\n':
+                self.reader.forward()
                 if not self.flow_level:
                     self.allow_simple_key = True
@@ -740,59 +661,59 @@
 
     def scan_directive(self):
-        marker = self.stream.get_marker()
-        if self.stream.peek(5) == u'%YAML ':
+        marker = self.reader.get_marker()
+        if self.reader.peek(5) == u'%YAML ':
             self.tokens.append(YAMLDirectiveToken(1, 1, marker, marker))
-        elif self.stream.peek(4) == u'%TAG ':
+        elif self.reader.peek(4) == u'%TAG ':
             self.tokens.append(TagDirectiveToken(marker, marker))
         else:
             self.tokens.append(ReservedDirectiveToken('', marker, marker))
-        while self.stream.peek() not in u'\0\r\n':
-            self.stream.forward()
-        self.stream.forward()
+        while self.reader.peek() not in u'\0\r\n':
+            self.reader.forward()
+        self.reader.forward()
 
     def scan_anchor(self, TokenClass):
-        start_marker = self.stream.get_marker()
-        while self.stream.peek() not in u'\0 \t\r\n,:':
-            self.stream.forward()
-        end_marker = self.stream.get_marker()
+        start_marker = self.reader.get_marker()
+        while self.reader.peek() not in u'\0 \t\r\n,:':
+            self.reader.forward()
+        end_marker = self.reader.get_marker()
         self.tokens.append(TokenClass('', start_marker, end_marker))
 
     def scan_tag(self):
-        start_marker = self.stream.get_marker()
-        while self.stream.peek() not in u'\0 \t\r\n':
-            self.stream.forward()
-        end_marker = self.stream.get_marker()
+        start_marker = self.reader.get_marker()
+        while self.reader.peek() not in u'\0 \t\r\n':
+            self.reader.forward()
+        end_marker = self.reader.get_marker()
         self.tokens.append(TagToken('', start_marker, end_marker))
 
     def scan_block_scalar(self, folded):
-        start_marker = self.stream.get_marker()
+        start_marker = self.reader.get_marker()
         indent = self.indent+1
         if indent < 1:
             indent = 1
         while True:
-            while self.stream.peek() and self.stream.peek() and self.stream.peek() not in u'\0\r\n\x85\u2028\u2029':
-                self.stream.forward()
-            if self.stream.peek() != u'\0':
-                self.stream.forward()
+            while self.reader.peek() and self.reader.peek() and self.reader.peek() not in u'\0\r\n\x85\u2028\u2029':
+                self.reader.forward()
+            if self.reader.peek() != u'\0':
+                self.reader.forward()
             count = 0
-            while count < indent and self.stream.peek() == u' ':
-                self.stream.forward()
+            while count < indent and self.reader.peek() == u' ':
+                self.reader.forward()
                 count += 1
-            if count < indent and self.stream.peek() not in u'#\r\n\x85\u2028\u2029':
+            if count < indent and self.reader.peek() not in u'#\r\n\x85\u2028\u2029':
                 break
         self.tokens.append(ScalarToken('', False, start_marker, start_marker))
 
     def scan_flow_scalar(self, double):
-        marker = self.stream.get_marker()
-        quote = self.stream.peek()
-        self.stream.forward()
-        while self.stream.peek() != quote:
-            if double and self.stream.peek() == u'\\':
-                self.stream.forward(2)
-            elif not double and self.stream.peek(3)[1:] == u'\'\'':
-                self.stream.forward(3)
+        marker = self.reader.get_marker()
+        quote = self.reader.peek()
+        self.reader.forward()
+        while self.reader.peek() != quote:
+            if double and self.reader.peek() == u'\\':
+                self.reader.forward(2)
+            elif not double and self.reader.peek(3)[1:] == u'\'\'':
+                self.reader.forward(3)
             else:
-                self.stream.forward(1)
-        self.stream.forward(1)
+                self.reader.forward(1)
+        self.reader.forward(1)
         self.tokens.append(ScalarToken('', False, marker, marker))
 
@@ -802,25 +723,25 @@
             indent = 1
         space = False
-        marker = self.stream.get_marker()
+        marker = self.reader.get_marker()
         while True:
-            while self.stream.peek() == u' ':
-                self.stream.forward()
+            while self.reader.peek() == u' ':
+                self.reader.forward()
                 space = True
-            while self.stream.peek() not in u'\0\r\n?:,[]{}#'   \
-                    or (not space and self.stream.peek() == '#')    \
-                    or (not self.flow_level and self.stream.peek() in '?,[]{}') \
-                    or (not self.flow_level and self.stream.peek() == ':' and self.stream.peek(2)[1] not in u' \0\r\n'):
-                space = self.stream.peek() not in u' \t'
-                self.stream.forward()
+            while self.reader.peek() not in u'\0\r\n?:,[]{}#'   \
+                    or (not space and self.reader.peek() == '#')    \
+                    or (not self.flow_level and self.reader.peek() in '?,[]{}') \
+                    or (not self.flow_level and self.reader.peek() == ':' and self.reader.peek(2)[1] not in u' \0\r\n'):
+                space = self.reader.peek() not in u' \t'
+                self.reader.forward()
                 self.allow_simple_key = False
-            if self.stream.peek() not in u'\r\n':
+            if self.reader.peek() not in u'\r\n':
                 break
-            while self.stream.peek() in u'\r\n':
-                self.stream.forward()
+            while self.reader.peek() in u'\r\n':
+                self.reader.forward()
                 if not self.flow_level:
                     self.allow_simple_key = True
             count = 0
-            while self.stream.peek() == u' ' and count < indent:
-                self.stream.forward()
+            while self.reader.peek() == u' ' and count < indent:
+                self.reader.forward()
                 count += 1
             if count < indent:
Index: /branches/pyyaml3000/lib/yaml/parser.py
===================================================================
--- /branches/pyyaml3000/lib/yaml/parser.py	(revision 44)
+++ /branches/pyyaml3000/lib/yaml/parser.py	(revision 46)
@@ -40,7 +40,8 @@
 # flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
 
-from scanner import *
-
-class Error(Exception):
+from error import YAMLError
+from tokens import *
+
+class ParserError(YAMLError):
     pass
 
@@ -77,6 +78,6 @@
 class Parser:
 
-    def __init__(self, source, data):
-        self.scanner = Scanner(source, data)
+    def __init__(self, scanner):
+        self.scanner = scanner
 
     def is_token(self, *choices):
Index: /branches/pyyaml3000/lib/yaml/reader.py
===================================================================
--- /branches/pyyaml3000/lib/yaml/reader.py	(revision 46)
+++ /branches/pyyaml3000/lib/yaml/reader.py	(revision 46)
@@ -0,0 +1,244 @@
+# This module contains abstractions for the input stream. You don't have to
+# looks further, there are no pretty code.
+#
+# We define two classes here.
+#
+#   Marker(source, line, column)
+# It's just a record and its only use is producing nice error messages.
+# Parser does not use it for any other purposes.
+#
+#   Reader(source, data)
+# Reader determines the encoding of `data` and converts it to unicode.
+# Reader provides the following methods and attributes:
+#   reader.peek(length=1) - return the next `length` characters
+#   reader.forward(length=1) - move the current position to `length` characters.
+#   reader.index - the number of the current character.
+#   reader.line, stream.column - the line and the column of the current character.
+
+__all__ = ['Marker', 'Reader', 'ReaderError']
+
+from error import YAMLError
+
+import codecs, re
+
+# Unfortunately, codec functions in Python 2.3 does not support the `finish`
+# arguments, so we have to write our own wrappers.
+
+try:
+    codecs.utf_8_decode('', 'strict', False)
+    from codecs import utf_8_decode, utf_16_le_decode, utf_16_be_decode
+
+except TypeError:
+
+    def utf_16_le_decode(data, errors, finish=False):
+        if not finish and len(data) % 2 == 1:
+            data = data[:-1]
+        return codecs.utf_16_le_decode(data, errors)
+
+    def utf_16_be_decode(data, errors, finish=False):
+        if not finish and len(data) % 2 == 1:
+            data = data[:-1]
+        return codecs.utf_16_be_decode(data, errors)
+
+    def utf_8_decode(data, errors, finish=False):
+        if not finish:
+            # We are trying to remove a possible incomplete multibyte character
+            # from the suffix of the data.
+            # The first byte of a multi-byte sequence is in the range 0xc0 to 0xfd.
+            # All further bytes are in the range 0x80 to 0xbf.
+            # UTF-8 encoded UCS characters may be up to six bytes long.
+            count = 0
+            while count < 5 and count < len(data)   \
+                    and '\x80' <= data[-count-1] <= '\xBF':
+                count -= 1
+            if count < 5 and count < len(data)  \
+                    and '\xC0' <= data[-count-1] <= '\xFD':
+                data = data[:-count-1]
+        return codecs.utf_8_decode(data, errors)
+
+class Marker:
+
+    def __init__(self, name, line, column, buffer, pointer):
+        self.name = name
+        self.line = line
+        self.column = column
+        self.buffer = buffer
+        self.pointer = pointer
+
+    def get_snippet(self, max_length=79):
+        if self.buffer is None:
+            return None
+        head = ''
+        start = self.pointer
+        while start > 0 and self.buffer[start-1] not in u'\0\r\n\x85\u2028\u2029':
+            start -= 1
+            if self.pointer-start > max_length/2-1:
+                head = ' ... '
+                start += 5
+                break
+        tail = ''
+        end = self.pointer
+        while end < len(self.buffer) and self.buffer[end] not in u'\0\r\n\x85\u2028\u2029':
+            end += 1
+            if end-self.pointer > max_length/2-1:
+                tail = ' ... '
+                end -= 5
+                break
+        snippet = self.buffer[start:end].encode('utf-8')
+        return head + snippet + tail + '\n'  \
+                + ' '*(self.pointer-start+len(head)) + '^' + '\n'
+
+class ReaderError(YAMLError):
+
+    def __init__(self, name, position, character, encoding, reason):
+        self.name = name
+        self.character = character
+        self.position = position
+        self.encoding = encoding
+        self.reason = reason
+
+    def __str__(self):
+        if isinstance(self.character, str):
+            return "'%s' codec can't decode byte #x%02x: %s\n"  \
+                    "\tin '%s', position %d."   \
+                    % (self.encoding, ord(self.character), self.reason,
+                            self.name, self.position)
+        else:
+            return "unacceptable character #x%04x: %s\n"    \
+                    "\tin '%s', position %d."   \
+                    % (ord(self.character), self.reason,
+                            self.name, self.position)
+
+class Reader:
+    # Reader:
+    # - determines the data encoding and converts it to unicode,
+    # - checks if characters are in allowed range,
+    # - adds '\0' to the end.
+
+    # Reader accepts
+    #  - a `str` object,
+    #  - a `unicode` object,
+    #  - a file-like object with its `read` method returning `str`,
+    #  - a file-like object with its `read` method returning `unicode`.
+
+    # Yeah, it's ugly and slow.
+
+    def __init__(self, data):
+        self.name = None
+        self.stream = None
+        self.stream_pointer = 0
+        self.eof = True
+        self.buffer = u''
+        self.pointer = 0
+        self.raw_buffer = None
+        self.raw_decode = None
+        self.index = 0
+        self.line = 0
+        self.column = 0
+        if isinstance(data, unicode):
+            self.name = "<unicode string>"
+            self.check_printable(data)
+            self.buffer = data+u'\0'
+        elif isinstance(data, str):
+            self.name = "<string>"
+            self.raw_buffer = data
+            self.determine_encoding()
+        else:
+            self.stream = data
+            self.name = getattr(data, 'name', "<file>")
+            self.eof = False
+            self.raw_buffer = ''
+            self.determine_encoding()
+
+    def peek(self, length=1):
+        if self.pointer+length >= len(self.buffer):
+            self.update(length)
+        return self.buffer[self.pointer:self.pointer+length]
+
+    def forward(self, length=1):
+        if self.pointer+length+1 >= len(self.buffer):
+            self.update(length+1)
+        for k in range(length):
+            ch = self.buffer[self.pointer]
+            self.pointer += 1
+            self.index += 1
+            if ch in u'\n\x85\u2028\u2029'  \
+                    or (ch == u'\r' and self.buffer[self.pointer+1] != u'\n'):
+                self.line += 1
+                self.column = 0
+            elif ch != u'\uFEFF':
+                self.column += 1
+
+    def get_marker(self):
+        if self.stream is None:
+            return Marker(self.name, self.line, self.column,
+                    self.buffer, self.pointer)
+        else:
+            return Marker(self.name, self.line, self.column, None, None)
+
+    def determine_encoding(self):
+        while not self.eof and len(self.raw_buffer) < 2:
+            self.update_raw()
+        if not isinstance(self.raw_buffer, unicode):
+            if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
+                self.raw_decode = utf_16_le_decode
+            elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
+                self.raw_decode = utf_16_be_decode
+            else:
+                self.raw_decode = utf_8_decode
+        self.update(1)
+
+    NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
+    def check_printable(self, data):
+        match = self.NON_PRINTABLE.search(data)
+        if match:
+            character = match.group()
+            position = self.index+(len(self.buffer)-self.pointer)+match.start()
+            raise ReaderError(self.name, position, character,
+                    'unicode', "special characters are not allowed")
+
+    def update(self, length):
+        if self.raw_buffer is None:
+            return
+        self.buffer = self.buffer[self.pointer:]
+        self.pointer = 0
+        while len(self.buffer) < length:
+            if not self.eof:
+                self.update_raw()
+            if self.raw_decode is not None:
+                try:
+                    data, converted = self.raw_decode(self.raw_buffer,
+                            'strict', self.eof)
+                except UnicodeDecodeError, exc:
+                    character = exc.object[exc.start]
+                    if self.stream is not None:
+                        position = self.stream_pointer-len(self.raw_buffer)+exc.start
+                    else:
+                        position = exc.start
+                    raise ReaderError(self.name, position, character,
+                            exc.encoding, exc.reason)
+            else:
+                data = self.raw_buffer
+                converted = len(data)
+            self.check_printable(data)
+            self.buffer += data
+            self.raw_buffer = self.raw_buffer[converted:]
+            if self.eof:
+                self.buffer += u'\0'
+                self.raw_buffer = None
+                break
+
+    def update_raw(self, size=1024):
+        data = self.stream.read(size)
+        if data:
+            self.raw_buffer += data
+            self.stream_pointer += len(data)
+        else:
+            self.eof = True
+
+#try:
+#    import psyco
+#    psyco.bind(Reader)
+#except ImportError:
+#    pass
+
Index: /branches/pyyaml3000/lib/yaml/tokens.py
===================================================================
--- /branches/pyyaml3000/lib/yaml/tokens.py	(revision 46)
+++ /branches/pyyaml3000/lib/yaml/tokens.py	(revision 46)
@@ -0,0 +1,89 @@
+
+class Token:
+    def __init__(self, start_marker, end_marker):
+        self.start_marker = start_marker
+        self.end_marker = end_marker
+
+class DirectiveToken(Token):
+    pass
+
+class YAMLDirectiveToken(DirectiveToken):
+    def __init__(self, major_version, minor_version, start_marker, end_marker):
+        self.major_version = major_version
+        self.minor_version = minor_version
+        self.start_marker = start_marker
+        self.end_marker = end_marker
+
+class TagDirectiveToken(DirectiveToken):
+    pass
+
+class ReservedDirectiveToken(DirectiveToken):
+    def __init__(self, name, start_marker, end_marker):
+        self.name = name
+        self.start_marker = start_marker
+        self.end_marker = end_marker
+
+class DocumentStartToken(Token):
+    pass
+
+class DocumentEndToken(Token):
+    pass
+
+class EndToken(Token):
+    pass
+
+class BlockSequenceStartToken(Token):
+    pass
+
+class BlockMappingStartToken(Token):
+    pass
+
+class BlockEndToken(Token):
+    pass
+
+class FlowSequenceStartToken(Token):
+    pass
+
+class FlowMappingStartToken(Token):
+    pass
+
+class FlowSequenceEndToken(Token):
+    pass
+
+class FlowMappingEndToken(Token):
+    pass
+
+class KeyToken(Token):
+    pass
+
+class ValueToken(Token):
+    pass
+
+class EntryToken(Token):
+    pass
+
+class AliasToken(Token):
+    def __init__(self, value, start_marker, end_marker):
+        self.value = value
+        self.start_marker = start_marker
+        self.end_marker = end_marker
+
+class AnchorToken(Token):
+    def __init__(self, value, start_marker, end_marker):
+        self.value = value
+        self.start_marker = start_marker
+        self.end_marker = end_marker
+
+class TagToken(Token):
+    def __init__(self, value, start_marker, end_marker):
+        self.value = value
+        self.start_marker = start_marker
+        self.end_marker = end_marker
+
+class ScalarToken(Token):
+    def __init__(self, value, plain, start_marker, end_marker):
+        self.value = value
+        self.plain = plain
+        self.start_marker = start_marker
+        self.end_marker = end_marker
+
Index: anches/pyyaml3000/lib/yaml/stream.py
===================================================================
--- /branches/pyyaml3000/lib/yaml/stream.py	(revision 45)
+++ 	(revision )
@@ -1,229 +1,0 @@
-# This module contains abstractions for the input stream. You don't have to
-# looks further, there are no pretty code.
-#
-# We define two classes here.
-#
-#   Marker(source, line, column)
-# It's just a record and its only use is producing nice error messages.
-# Parser does not use it for any other purposes.
-#
-#   Stream(source, data)
-# Stream determines the encoding of `data` and converts it to unicode.
-# Stream provides the following methods and attributes:
-#   stream.peek(length=1) - return the next `length` characters
-#   stream.forward(length=1) - move the current position to `length` characters.
-#   stream.index - the number of the current character.
-#   stream.line, stream.column - the line and the column of the current character.
-
-
-from error import YAMLError
-
-import codecs, re
-
-# Unfortunately, codec functions in Python 2.3 does not support the `finish`
-# arguments, so we have to write our own wrappers.
-
-try:
-    codecs.utf_8_decode('', 'strict', False)
-    from codecs import utf_8_decode, utf_16_le_decode, utf_16_be_decode
-
-except TypeError:
-
-    def utf_16_le_decode(data, errors, finish=False):
-        if not finish and len(data) % 2 == 1:
-            data = data[:-1]
-        return codecs.utf_16_le_decode(data, errors)
-
-    def utf_16_be_decode(data, errors, finish=False):
-        if not finish and len(data) % 2 == 1:
-            data = data[:-1]
-        return codecs.utf_16_be_decode(data, errors)
-
-    def utf_8_decode(data, errors, finish=False):
-        if not finish:
-            # We are trying to remove a possible incomplete multibyte character
-            # from the suffix of the data.
-            # The first byte of a multi-byte sequence is in the range 0xc0 to 0xfd.
-            # All further bytes are in the range 0x80 to 0xbf.
-            # UTF-8 encoded UCS characters may be up to six bytes long.
-            count = 0
-            while count < 5 and count < len(data)   \
-                    and '\x80' <= data[-count-1] <= '\xBF':
-                count -= 1
-            if count < 5 and count < len(data)  \
-                    and '\xC0' <= data[-count-1] <= '\xFD':
-                data = data[:-count-1]
-        return codecs.utf_8_decode(data, errors)
-
-class Marker:
-
-    def __init__(self, source, line, column, buffer, pointer):
-        self.source = source
-        self.line = line
-        self.column = column
-        self.buffer = buffer
-        self.pointer = pointer
-
-    def get_snippet(self, max_length=79):
-        if self.buffer is None:
-            return None
-        head = ''
-        start = self.pointer
-        while start > 0 and self.buffer[start-1] not in u'\0\r\n\x85\u2028\u2029':
-            start -= 1
-            if self.pointer-start > max_length/2-1:
-                head = ' ... '
-                start += 5
-                break
-        tail = ''
-        end = self.pointer
-        while end < len(self.buffer) and self.buffer[end] not in u'\0\r\n\x85\u2028\u2029':
-            end += 1
-            if end-self.pointer > max_length/2-1:
-                tail = ' ... '
-                end -= 5
-                break
-        snippet = self.buffer[start:end].encode('utf-8')
-        return head + snippet + tail + '\n'  \
-                + ' '*(self.pointer-start+len(head)) + '^' + '\n'
-
-class StreamError(YAMLError):
-
-    def __init__(self, source, encoding, character, position, reason):
-        self.source = source
-        self.encoding = encoding
-        self.character = character
-        self.position = position
-        self.reason = reason
-
-    def __str__(self):
-        if isinstance(self.character, str):
-            return "'%s' codec can't decode byte #x%02x: %s\n"  \
-                    "\tin file '%s', position %d."   \
-                    % (self.encoding, ord(self.character), self.reason,
-                            self.source, self.position)
-        else:
-            return "unacceptable character #x%04x: %s\n"    \
-                    "\tin file '%s', position %d."   \
-                    % (ord(self.character), self.reason,
-                            self.source, self.position)
-
-class Stream:
-    # Stream:
-    # - determines the data encoding and converts it to unicode,
-    # - checks if characters are in allowed range,
-    # - adds '\0' to the end.
-
-    # Yeah, it's ugly and slow.
-
-    def __init__(self, source, data):
-        self.source = source
-        self.stream = None
-        self.stream_pointer = 0
-        self.eof = True
-        self.buffer = u''
-        self.pointer = 0
-        self.raw_buffer = None
-        self.raw_decoder = None
-        self.index = 0
-        self.line = 0
-        self.column = 0
-        if isinstance(data, unicode):
-            self.check_printable(data)
-            self.buffer = data+u'\0'
-        elif isinstance(data, str):
-            self.raw_buffer = data
-            self.determine_encoding()
-        else:
-            self.stream = data
-            self.eof = False
-            self.raw_buffer = ''
-            self.determine_encoding()
-
-    def peek(self, length=1):
-        if self.pointer+length >= len(self.buffer):
-            self.update(length)
-        return self.buffer[self.pointer:self.pointer+length]
-
-    def forward(self, length=1):
-        if self.pointer+length+1 >= len(self.buffer):
-            self.update(length+1)
-        for k in range(length):
-            ch = self.buffer[self.pointer]
-            self.pointer += 1
-            self.index += 1
-            if ch in u'\n\x85\u2028\u2029'  \
-                    or (ch == u'\r' and self.buffer[self.pointer+1] != u'\n'):
-                self.line += 1
-                self.column = 0
-            elif ch != u'\uFEFF':
-                self.column += 1
-
-    def get_marker(self):
-        if self.stream is None:
-            return Marker(self.source, self.line, self.column,
-                    self.buffer, self.pointer)
-        else:
-            return Marker(self.source, self.line, self.column, None, None)
-
-    def determine_encoding(self):
-        while not self.eof and len(self.raw_buffer) < 2:
-            self.update_raw()
-        if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
-            self.raw_decode = utf_16_le_decode
-        elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
-            self.raw_decode = utf_16_be_decode
-        else:
-            self.raw_decode = utf_8_decode
-        self.update(1)
-
-    NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
-    def check_printable(self, data):
-        match = self.NON_PRINTABLE.search(data)
-        if match:
-            character = match.group()
-            position = self.index+(len(self.buffer)-self.pointer)+match.start()
-            raise StreamError(self.source, 'unicode', character, position,
-                    "control characters are not allowed")
-
-    def update(self, length):
-        if self.raw_buffer is None:
-            return
-        self.buffer = self.buffer[self.pointer:]
-        self.pointer = 0
-        while len(self.buffer) < length:
-            if not self.eof:
-                self.update_raw()
-            try:
-                data, converted = self.raw_decode(self.raw_buffer,
-                        'strict', self.eof)
-            except UnicodeDecodeError, exc:
-                character = exc.object[exc.start]
-                if self.stream is not None:
-                    position = self.stream_pointer-len(self.raw_buffer)+exc.start
-                else:
-                    position = exc.start
-                raise StreamError(self.source, exc.encoding,
-                        character, position, exc.reason)
-            self.check_printable(data)
-            self.buffer += data
-            self.raw_buffer = self.raw_buffer[converted:]
-            if self.eof:
-                self.buffer += u'\0'
-                self.raw_buffer = None
-                break
-
-    def update_raw(self, size=1024):
-        data = self.stream.read(size)
-        if data:
-            self.raw_buffer += data
-            self.stream_pointer += len(data)
-        else:
-            self.eof = True
-
-#try:
-#    import psyco
-#    psyco.bind(Stream)
-#except ImportError:
-#    pass
-
Index: anches/pyyaml3000/lib/yaml/marker.py
===================================================================
--- /branches/pyyaml3000/lib/yaml/marker.py	(revision 43)
+++ 	(revision )
@@ -1,35 +1,0 @@
-
-class Marker:
-
-    def __init__(self, source, data, index, line, column):
-        self.source = source
-        self.data = data
-        self.index = index
-        self.line = line
-        self.column = column
-
-    def get_snippet(self, max_length=79):
-        if not isinstance(self.data, basestring):
-            return None
-        head = ''
-        start = self.index
-        while start > 0 and self.data[start-1] not in '\r\n':
-            start -= 1
-            if self.index-start > max_length/2-1:
-                head = ' ... '
-                start += 5
-                break
-        tail = ''
-        end = self.index
-        while end < len(self.data) and self.data[end] not in '\r\n':
-            end += 1
-            if end-self.index > max_length/2-1:
-                tail = ' ... '
-                end -= 5
-                break
-        snippet = self.data[start:end]
-        if isinstance(snippet, unicode):
-            snippet = snippet.encode('utf-8')
-        return head + snippet + tail + '\n'  \
-                + ' '*(self.index-start+len(head)) + '^' + '\n'
-
