Changeset 208 for libyaml/trunk/src/reader.c
- Timestamp:
- 07/21/06 09:50:32 (7 years ago)
- File:
-
- 1 edited
-
libyaml/trunk/src/reader.c (modified) (19 diffs)
Legend:
- Unmodified
- Added
- Removed
-
libyaml/trunk/src/reader.c
r200 r208 1 1 2 #if HAVE_CONFIG_H 3 #include <config.h> 4 #endif 5 6 #include <yaml.h> 7 8 #include <assert.h> 2 #include "yaml_private.h" 3 4 /* 5 * Declarations. 6 */ 7 8 static int 9 yaml_parser_set_reader_error(yaml_parser_t *parser, const char *problem, 10 size_t offset, int value); 11 12 static int 13 yaml_parser_update_raw_buffer(yaml_parser_t *parser); 14 15 static int 16 yaml_parser_determine_encoding(yaml_parser_t *parser); 17 18 YAML_DECLARE(int) 19 yaml_parser_update_buffer(yaml_parser_t *parser, size_t length); 9 20 10 21 /* … … 25 36 26 37 /* 38 * Byte order marks. 39 */ 40 41 #define BOM_UTF8 "\xef\xbb\xbf" 42 #define BOM_UTF16LE "\xff\xfe" 43 #define BOM_UTF16BE "\xfe\xff" 44 45 /* 46 * Determine the input stream encoding by checking the BOM symbol. If no BOM is 47 * found, the UTF-8 encoding is assumed. Return 1 on success, 0 on failure. 48 */ 49 50 static int 51 yaml_parser_determine_encoding(yaml_parser_t *parser) 52 { 53 /* Ensure that we had enough bytes in the raw buffer. */ 54 55 while (!parser->eof 56 && parser->raw_buffer.last - parser->raw_buffer.pointer < 3) { 57 if (!yaml_parser_update_raw_buffer(parser)) { 58 return 0; 59 } 60 } 61 62 /* Determine the encoding. */ 63 64 if (parser->raw_buffer.last - parser->raw_buffer.pointer >= 2 65 && !memcmp(parser->raw_buffer.pointer, BOM_UTF16LE, 2)) { 66 parser->encoding = YAML_UTF16LE_ENCODING; 67 parser->raw_buffer.pointer += 2; 68 parser->offset += 2; 69 } 70 else if (parser->raw_buffer.last - parser->raw_buffer.pointer >= 2 71 && !memcmp(parser->raw_buffer.pointer, BOM_UTF16BE, 2)) { 72 parser->encoding = YAML_UTF16BE_ENCODING; 73 parser->raw_buffer.pointer += 2; 74 parser->offset += 2; 75 } 76 else if (parser->raw_buffer.last - parser->raw_buffer.pointer >= 3 77 && !memcmp(parser->raw_buffer.pointer, BOM_UTF8, 3)) { 78 parser->encoding = YAML_UTF8_ENCODING; 79 parser->raw_buffer.pointer += 3; 80 parser->offset += 3; 81 } 82 else { 83 parser->encoding = YAML_UTF8_ENCODING; 84 } 85 86 return 1; 87 } 88 89 /* 27 90 * Update the raw buffer. 28 91 */ … … 35 98 /* Return if the raw buffer is full. */ 36 99 37 if (parser->raw_unread == YAML_RAW_BUFFER_SIZE) return 1; 100 if (parser->raw_buffer.start == parser->raw_buffer.pointer 101 && parser->raw_buffer.last == parser->raw_buffer.end) 102 return 1; 38 103 39 104 /* Return on EOF. */ … … 43 108 /* Move the remaining bytes in the raw buffer to the beginning. */ 44 109 45 if (parser->raw_unread && parser->raw_buffer < parser->raw_pointer) { 46 memmove(parser->raw_buffer, parser->raw_pointer, parser->raw_unread); 47 } 48 parser->raw_pointer = parser->raw_buffer; 110 if (parser->raw_buffer.start < parser->raw_buffer.pointer 111 && parser->raw_buffer.pointer < parser->raw_buffer.last) { 112 memmove(parser->raw_buffer.start, parser->raw_buffer.pointer, 113 parser->raw_buffer.last - parser->raw_buffer.pointer); 114 } 115 parser->raw_buffer.last -= 116 parser->raw_buffer.pointer - parser->raw_buffer.start; 117 parser->raw_buffer.pointer = parser->raw_buffer.start; 49 118 50 119 /* Call the read handler to fill the buffer. */ 51 120 52 if (!parser->read_handler(parser->read_handler_data, 53 parser->raw_buffer + parser->raw_unread, 54 YAML_RAW_BUFFER_SIZE - parser->raw_unread, 55 &size_read)) { 121 if (!parser->read_handler(parser->read_handler_data, parser->raw_buffer.last, 122 parser->raw_buffer.end - parser->raw_buffer.last, &size_read)) { 56 123 return yaml_parser_set_reader_error(parser, "Input error", 57 124 parser->offset, -1); 58 125 } 59 parser->raw_ unread+= size_read;126 parser->raw_buffer.last += size_read; 60 127 if (!size_read) { 61 128 parser->eof = 1; … … 66 133 67 134 /* 68 * Determine the input stream encoding by checking the BOM symbol. If no BOM is 69 * found, the UTF-8 encoding is assumed. Return 1 on success, 0 on failure. 70 */ 71 72 #define BOM_UTF8 "\xef\xbb\xbf" 73 #define BOM_UTF16LE "\xff\xfe" 74 #define BOM_UTF16BE "\xfe\xff" 75 76 static int 77 yaml_parser_determine_encoding(yaml_parser_t *parser) 78 { 79 /* Ensure that we had enough bytes in the raw buffer. */ 80 81 while (!parser->eof && parser->raw_unread < 3) { 82 if (!yaml_parser_update_raw_buffer(parser)) { 83 return 0; 84 } 85 } 86 87 /* Determine the encoding. */ 88 89 if (parser->raw_unread >= 2 90 && !memcmp(parser->raw_pointer, BOM_UTF16LE, 2)) { 91 parser->encoding = YAML_UTF16LE_ENCODING; 92 parser->raw_pointer += 2; 93 parser->raw_unread -= 2; 94 parser->offset += 2; 95 } 96 else if (parser->raw_unread >= 2 97 && !memcmp(parser->raw_pointer, BOM_UTF16BE, 2)) { 98 parser->encoding = YAML_UTF16BE_ENCODING; 99 parser->raw_pointer += 2; 100 parser->raw_unread -= 2; 101 parser->offset += 2; 102 } 103 else if (parser->raw_unread >= 3 104 && !memcmp(parser->raw_pointer, BOM_UTF8, 3)) { 105 parser->encoding = YAML_UTF8_ENCODING; 106 parser->raw_pointer += 3; 107 parser->raw_unread -= 3; 108 parser->offset += 3; 109 } 110 else { 111 parser->encoding = YAML_UTF8_ENCODING; 112 } 113 114 return 1; 115 } 116 117 /* 118 * Ensure that the buffer contains at least length characters. 135 * Ensure that the buffer contains at least `length` characters. 119 136 * Return 1 on success, 0 on failure. 120 137 * … … 125 142 yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) 126 143 { 144 assert(parser->read_handler); /* Read handler must be set. */ 145 127 146 /* If the EOF flag is set and the raw buffer is empty, do nothing. */ 128 147 129 if (parser->eof && !parser->raw_unread)148 if (parser->eof && parser->raw_buffer.pointer == parser->raw_buffer.last) 130 149 return 1; 131 150 … … 144 163 /* Move the unread characters to the beginning of the buffer. */ 145 164 146 if (parser->buffer < parser->pointer147 && parser-> pointer < parser->buffer_end) {148 size_t size = parser->buffer _end - parser->pointer;149 memmove(parser->buffer , parser->pointer, size);150 parser-> pointer = parser->buffer;151 parser->buffer _end = parser->buffer+ size;152 } 153 else if (parser-> pointer == parser->buffer_end) {154 parser-> pointer = parser->buffer;155 parser->buffer _end = parser->buffer;165 if (parser->buffer.start < parser->buffer.pointer 166 && parser->buffer.pointer < parser->buffer.last) { 167 size_t size = parser->buffer.last - parser->buffer.pointer; 168 memmove(parser->buffer.start, parser->buffer.pointer, size); 169 parser->buffer.pointer = parser->buffer.start; 170 parser->buffer.last = parser->buffer.start + size; 171 } 172 else if (parser->buffer.pointer == parser->buffer.last) { 173 parser->buffer.pointer = parser->buffer.start; 174 parser->buffer.last = parser->buffer.start; 156 175 } 157 176 … … 166 185 /* Decode the raw buffer. */ 167 186 168 while (parser->raw_ unread)187 while (parser->raw_buffer.pointer != parser->raw_buffer.last) 169 188 { 170 189 unsigned int value, value2; … … 173 192 unsigned int width; 174 193 int k, low, high; 194 int raw_unread = parser->raw_buffer.last - parser->raw_buffer.pointer; 175 195 176 196 /* Decode the next character. */ … … 202 222 /* Determine the length of the UTF-8 sequence. */ 203 223 204 octet = parser->raw_ pointer[0];224 octet = parser->raw_buffer.pointer[0]; 205 225 width = (octet & 0x80) == 0x00 ? 1 : 206 226 (octet & 0xE0) == 0xC0 ? 2 : … … 217 237 /* Check if the raw buffer contains an incomplete character. */ 218 238 219 if (width > parser->raw_unread) {239 if (width > raw_unread) { 220 240 if (parser->eof) { 221 241 return yaml_parser_set_reader_error(parser, … … 238 258 for (k = 1; k < width; k ++) 239 259 { 240 octet = parser->raw_ pointer[k];260 octet = parser->raw_buffer.pointer[k]; 241 261 242 262 /* Check if the octet is valid. */ … … 305 325 /* Check for incomplete UTF-16 character. */ 306 326 307 if ( parser->raw_unread < 2) {327 if (raw_unread < 2) { 308 328 if (parser->eof) { 309 329 return yaml_parser_set_reader_error(parser, … … 317 337 /* Get the character. */ 318 338 319 value = parser->raw_ pointer[low]320 + (parser->raw_ pointer[high] << 8);339 value = parser->raw_buffer.pointer[low] 340 + (parser->raw_buffer.pointer[high] << 8); 321 341 322 342 /* Check for unexpected low surrogate area. */ … … 335 355 /* Check for incomplete surrogate pair. */ 336 356 337 if ( parser->raw_unread < 4) {357 if (raw_unread < 4) { 338 358 if (parser->eof) { 339 359 return yaml_parser_set_reader_error(parser, … … 347 367 /* Get the next character. */ 348 368 349 unsigned int value2 = parser->raw_ pointer[low+2]350 + (parser->raw_ pointer[high+2] << 8);369 unsigned int value2 = parser->raw_buffer.pointer[low+2] 370 + (parser->raw_buffer.pointer[high+2] << 8); 351 371 352 372 /* Check for a low surrogate area. */ … … 391 411 /* Move the raw pointers. */ 392 412 393 parser->raw_pointer += width; 394 parser->raw_unread -= width; 413 parser->raw_buffer.pointer += width; 395 414 parser->offset += width; 396 415 … … 399 418 /* 0000 0000-0000 007F -> 0xxxxxxx */ 400 419 if (value <= 0x7F) { 401 *(parser->buffer _end++) = value;420 *(parser->buffer.last++) = value; 402 421 } 403 422 /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */ 404 423 else if (value <= 0x7FF) { 405 *(parser->buffer _end++) = 0xC0 + (value >> 6);406 *(parser->buffer _end++) = 0x80 + (value & 0x3F);424 *(parser->buffer.last++) = 0xC0 + (value >> 6); 425 *(parser->buffer.last++) = 0x80 + (value & 0x3F); 407 426 } 408 427 /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ 409 428 else if (value <= 0xFFFF) { 410 *(parser->buffer _end++) = 0xE0 + (value >> 12);411 *(parser->buffer _end++) = 0x80 + ((value >> 6) & 0x3F);412 *(parser->buffer _end++) = 0x80 + (value & 0x3F);429 *(parser->buffer.last++) = 0xE0 + (value >> 12); 430 *(parser->buffer.last++) = 0x80 + ((value >> 6) & 0x3F); 431 *(parser->buffer.last++) = 0x80 + (value & 0x3F); 413 432 } 414 433 /* 0001 0000-0010 FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 415 434 else { 416 *(parser->buffer _end++) = 0xF0 + (value >> 18);417 *(parser->buffer _end++) = 0x80 + ((value >> 12) & 0x3F);418 *(parser->buffer _end++) = 0x80 + ((value >> 6) & 0x3F);419 *(parser->buffer _end++) = 0x80 + (value & 0x3F);435 *(parser->buffer.last++) = 0xF0 + (value >> 18); 436 *(parser->buffer.last++) = 0x80 + ((value >> 12) & 0x3F); 437 *(parser->buffer.last++) = 0x80 + ((value >> 6) & 0x3F); 438 *(parser->buffer.last++) = 0x80 + (value & 0x3F); 420 439 } 421 440 … … 426 445 427 446 if (parser->eof) { 428 *(parser->buffer _end++) = '\0';447 *(parser->buffer.last++) = '\0'; 429 448 parser->unread ++; 430 449 return 1;
Note: See TracChangeset
for help on using the changeset viewer.
