import YamlTest from here import flushLeft from test import assertEquals, assertError from TestPullParser import mockParser, Loader from yaml import load """ Part of the parse/pull experimental code. This does a schema-driven validating parse of YAML documents, but it's built on top of a crufty interim solution. The schema-driven parser requires a pull parser interface. Ideally a pull-parser would be pulling nodes from a YAML document on an as-needed basis, and this is the eventual goal. But, I don't have a pull parser yet, so I simulate one by reading in the entire YAML document into a Python data structure, then I do a push-based dump of the data structure to a mock emitter that stores up a list of parser events that a mock parser then serves up to the schema-driven parser on an as-needed basis. Sounds complex, but there's really not much code involved. Nothing fancy is supported yet--just lists, dictionaries, and scalars; no aliases, class transformations, multiple docs, etc. Also, we lose the sort order on map keys, so you will notice that all the examples have alphabetically sorted keys. Also, once we go to a true pull parser, we could have more metadata, such as line numbers for nodes, attached comments, etc., that can help with error reporting and round-tripping issues. """ testCases = """ - data: | --- foo schema: type: scalar - data: | --- foo schema: type: seq error: | Wanted seq, got scalar - data: &list123 | --- - 1 - 2 - 3 schema: type: seq child: type: scalar - data: *list123 schema: type: seq max: 2 child: type: scalar error: | Seq has max 2 elements - data: | --- city: New Orleans state: LA street: Bourbon schema: &StreetCityState type: map items: - name: city value: type: scalar - name: state value: type: scalar - name: street value: type: scalar - data: | --- city: New Orleans state: LA where ya got ya shoes: on ya feet, on Bourbon St. schema: *StreetCityState error: | Expected key 'street', got 'where ya got ya shoes' - data: | --- banana: yellow carrot: orange people: - fname: al salary: 44 - fname: bob salary: 33 schema: type: map items: - name: banana value: type: scalar - name: carrot value: type: scalar - name: people value: type: seq child: type: map items: - name: fname value: type: scalar - name: salary value: type: scalar """ class ValidatingLoader: def load(self, data, schema): self.simulateParser(data) return self.loadData(schema) def loadData(self, schema): typ = self.parser.getType() return self._load(typ, schema) def _load(self, typ, schema): if typ != schema['type']: raise Exception("Wanted %s, got %s\n" % (schema['type'], typ)) if typ == 'seq': return self._loadSeq(schema) if typ == 'map': return self._loadMap(schema) else: return self.parser.getScalar() def _loadSeq(self, schema): results = [] cnt = 0 max = schema.get('max', None) schema = schema['child'] while 1: typ = self.parser.getType() if typ is None: return results else: cnt += 1 self.checkMax(cnt, max) results.append(self._load(typ, schema)) def _loadMap(self, schema): results = {} for item in schema['items']: self.parser.getType() name = self.parser.getScalar() self.checkName(name, item) value = self.loadData(item['value']) results[name] = value self.parser.getType() return results def checkMax(self, cnt, max): if max is not None and cnt > max: raise Exception("Seq has max %d elements\n" % max) def checkName(self, name, item): if name != item['name']: raise Exception("Expected key '%s', got '%s'\n" % \ (item['name'], name)) def simulateParser(self, data): # This is the huge hack to work around # not having a true pull parser self.parser = mockParser(oldYamlLoad(data)) def testRoundTrip(data, schema): expected = oldYamlLoad(data) obj = ValidatingLoader().load(data, schema) assertEquals(expected, obj) def oldYamlLoad(data): return load(data).next() def testOneCase(test): data = test['data'] schema = test['schema'] if test.has_key('error'): assertError(lambda: testRoundTrip(data, schema), test['error']) else: testRoundTrip(data, schema) class Test(YamlTest.YamlTest): def testFromYaml(self): for test in load(testCases).next(): testOneCase(test) if __name__ == '__main__': import unittest unittest.main()