JSON Parser in Scheme
JavaScript Object Notation (JSON) ist ein einfach zu lesendes und zu parsendes Datenformat. Es basiert auf der Syntax von Array- und Objectdeclarationen in Sprachen wie JavaScript und Python.
Hier ist ein Parser für dieses Datenformat in der Programmiersprache Scheme.
;;; parser for json syntax (http://json.org/) ;;; ;;; tested with sisc (http://sisc.sf.net/) ;;; ;;; (c) Jörn Horstmann (http://blog.planetxml.de/) (module json (json/tokenize json/tokenize-string json/parse json/parse-string json/parse-file) (import string-io) (define (json/parse p) (with-input-from-port p parse-object)) (define (json/parse-string str) (with-input-from-string str parse-object)) (define (json/parse-file file) (with-input-from-file file parse-object)) (define (json/tokenize p) (with-input-from-port p tokenize)) (define (json/tokenize-string str) (with-input-from-string str tokenize)) (define (tokenize) (let loop ((res '()) (token (next-token))) (if (eof-object? token) (reverse res) (loop (cons token res) (next-token))))) ;;; helper for error reporting (define (->string x) (cond ((char? x) (string x)) ((symbol? x) (symbol->string x)) ((eof-object? x) "<EOF>") (else ""))) ;;; testing for control characters, handles only ascii and iso-8859-1 characters (define (char-control? ch) (let ((i (char->integer ch))) (or (< i 32) (< 127 i 160)))) (define (lexer-error ch) (error (string-append "unexpected character " (->string ch)))) (define (parse-error token) (error (string-append "unexpected token " (->string token)))) ;;; reads a character and signals an error if it does not match the expected character (define (consume-char expect) (let ((ch (read-char))) (if (eqv? ch expect) ch (lexer-error ch)))) (define (next-token) (let ((ch (read-char))) (case ch ((#\space #\newline #\tab #\return) (next-token)) ((#\" #\') (parse-string ch)) ((#\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (parse-number ch)) ((#\[) 'open-brace) ((#\]) 'close-brace) ((#\{) 'open-curly) ((#\}) 'close-curly) ((#\:) 'colon) ((#\,) 'comma) ((#\t) (parse-true)) ((#\f) (parse-false)) ((#\n) (parse-null)) (else (if (eof-object? ch) ch (lexer-error ch)))))) ;;; XXX parsing of numbers is not really correct (define (parse-number ch) (let loop ((res (string ch))) (let ((ch (peek-char))) (case ch ((#\- #\+ #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\. #\e #\E) (begin (read-char) (loop (string-append res (string ch))))) (else (string->number res)))))) ;;; reads the symbol 'true, the first character is already read by next-token (define (parse-true) (begin (consume-char #\r) (consume-char #\u) (consume-char #\e) #t)) ;;; reads the symbol 'false the first character is already read by next-token (define (parse-false) (begin (consume-char #\a) (consume-char #\l) (consume-char #\s) (consume-char #\e) #f)) ;;; reads the symbol 'null the first character is already read by next-token (define (parse-null) (begin (consume-char #\u) (consume-char #\l) (consume-char #\l) '())) (define (parse-string q) (let loop ((res "")) (let ((ch (read-char))) (cond ((eqv? ch #\\) (loop (string-append res (string (parse-escape q))))) ((eqv? ch q) res) ((eqv? ch q) res) ((not (char-control? ch)) (loop (string-append res (string ch)))) (else (lexer-error ch)))))) (define (parse-escape q) (let ((ch (read-char))) (case ch ((#\b) #\backspace) ((#\f) #\page) ((#\n) #\newline) ((#\r) #\return) ((#\t) #\tab) ((#\\) #\\) ((#\u) (parse-unicode)) (else (if (eqv? ch q) q (lexer-error ch)))))) (define (numeric-char-value ch) (- (char->integer ch) (char->integer #\0))) (define (hex-char-value ch) (let ((i (char->integer ch))) (cond ((<= 97 i 102) (- i 87)) ; a-f ((<= 65 i 70) (- i 55)) ; A-F ((<= 48 i 57) (- i 48)) ; 0-9 (else (lexer-error ch))))) ;;; parse an unicode escape consisting of four hexadecimal characters (define (parse-unicode) (let* ((a (hex-char-value (read-char))) (b (hex-char-value (read-char))) (c (hex-char-value (read-char))) (d (hex-char-value (read-char)))) (integer->char (+ (* 4096 a) (* 256 b) (* 16 c) d)))) (define (parse-object) (parse-object-helper (next-token))) (define (parse-object-helper token) (cond ((eqv? token 'open-curly) (parse-map)) ((eqv? token 'open-brace) (parse-list)) ((null? token) token) ((string? token) token) ((number? token) token) ((boolean? token) token) (else (parse-error token)))) (define (parse-map) (let loop ((res '(map))) (let ((token (next-token))) (cond ((eqv? token 'close-curly) (reverse res)) ((string? token) (let* ((res (cons (list (string->symbol token) (parse-map-value)) res)) (next (next-token))) (cond ((eqv? next 'close-curly) (reverse res)) ((eqv? next 'comma) (loop res)) (else (parse-error next))))) (else (parse-error token)))))) (define (parse-map-value) (let ((token (next-token))) (if (eqv? token 'colon) (parse-object) (parse-error token)))) (define (parse-list) (let loop ((res '(list))) (let ((token (next-token))) (cond ((eqv? token 'close-brace) (reverse res)) (else (let* ((res (cons (list 'item (parse-object-helper token)) res)) (next (next-token))) (cond ((eqv? next 'close-brace) (reverse res)) ((eqv? next 'comma) (loop res)) (else (parse-error next))))))))) )