PlanetXML

JSON Parser in Scheme

JavaScript Object Notation (JSON) ist ein einfach zu lesendes und zu parsendes Datenformat. Es basiert auf der Syntax von Array- und Objectdeclarationen in Sprachen wie JavaScript und Python.

Hier ist ein Parser für dieses Datenformat in der Programmiersprache Scheme.

;;; parser for json syntax (http://json.org/)
;;;
;;; tested with sisc (http://sisc.sf.net/)
;;;
;;; (c) Jörn Horstmann (http://blog.planetxml.de/)
(module json
  (json/tokenize json/tokenize-string json/parse json/parse-string json/parse-file)

  (import string-io)

  (define (json/parse p)
    (with-input-from-port p parse-object))

  (define (json/parse-string str)
    (with-input-from-string str parse-object))

  (define (json/parse-file file)
    (with-input-from-file file parse-object))

  (define (json/tokenize p)
    (with-input-from-port p tokenize))

  (define (json/tokenize-string str)
    (with-input-from-string str tokenize))

  (define (tokenize)
    (let loop ((res '())
               (token (next-token)))
      (if (eof-object? token)
          (reverse res)
          (loop (cons token res) (next-token)))))

  ;;; helper for error reporting
  (define (->string x)
    (cond
      ((char? x) (string x))
      ((symbol? x) (symbol->string x))
      ((eof-object? x) "<EOF>")
      (else "")))

  ;;; testing for control characters, handles only ascii and iso-8859-1 characters
  (define (char-control? ch)
    (let ((i (char->integer ch)))
      (or (< i 32)
          (< 127 i 160))))

  (define (lexer-error ch)
    (error (string-append "unexpected character " (->string ch))))

  (define (parse-error token)
    (error (string-append "unexpected token " (->string token))))

  ;;; reads a character and signals an error if it does not match the expected character
  (define (consume-char expect)
    (let ((ch (read-char)))
      (if (eqv? ch expect)
          ch
          (lexer-error ch))))

  (define (next-token)
    (let ((ch (read-char)))
      (case ch
        ((#\space #\newline #\tab #\return) (next-token))
        ((#\" #\') (parse-string ch))
        ((#\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (parse-number ch))
        ((#\[) 'open-brace)
        ((#\]) 'close-brace)
        ((#\{) 'open-curly)
        ((#\}) 'close-curly)
        ((#\:) 'colon)
        ((#\,) 'comma)
        ((#\t) (parse-true))
        ((#\f) (parse-false))
        ((#\n) (parse-null))
        (else (if (eof-object? ch)
                  ch
                  (lexer-error ch))))))

  ;;; XXX parsing of numbers is not really correct
  (define (parse-number ch)
    (let loop ((res (string ch)))
      (let ((ch (peek-char)))
        (case ch
          ((#\- #\+ #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\. #\e #\E)
           (begin (read-char)
                  (loop (string-append res (string ch)))))
          (else (string->number res))))))

  ;;; reads the symbol 'true, the first character is already read by next-token
  (define (parse-true)
    (begin
      (consume-char #\r)
      (consume-char #\u)
      (consume-char #\e)
      #t))

  ;;; reads the symbol 'false the first character is already read by next-token
  (define (parse-false)
    (begin
      (consume-char #\a)
      (consume-char #\l)
      (consume-char #\s)
      (consume-char #\e)
      #f))

  ;;; reads the symbol 'null the first character is already read by next-token
  (define (parse-null)
    (begin
      (consume-char #\u)
      (consume-char #\l)
      (consume-char #\l)
      '()))

  (define (parse-string q)
    (let loop ((res ""))
      (let ((ch (read-char)))
        (cond
          ((eqv? ch #\\) (loop (string-append res (string (parse-escape q)))))
          ((eqv? ch q) res)
          ((eqv? ch q) res)
          ((not (char-control? ch)) (loop (string-append res (string ch))))
          (else (lexer-error ch))))))

  (define (parse-escape q)
    (let ((ch (read-char)))
      (case ch
        ((#\b) #\backspace)
        ((#\f) #\page)
        ((#\n) #\newline)
        ((#\r) #\return)
        ((#\t) #\tab)
        ((#\\) #\\)
        ((#\u) (parse-unicode))
        (else (if (eqv? ch q)
                  q
                  (lexer-error ch))))))

  (define (numeric-char-value ch)
    (- (char->integer ch) (char->integer #\0)))

  (define (hex-char-value ch)
    (let ((i (char->integer ch)))
      (cond ((<= 97 i 102) (- i 87)) ; a-f
            ((<= 65 i 70)  (- i 55)) ; A-F
            ((<= 48 i 57)  (- i 48)) ; 0-9
            (else (lexer-error ch)))))

  ;;; parse an unicode escape consisting of four hexadecimal characters
  (define (parse-unicode)
    (let* ((a (hex-char-value (read-char)))
           (b (hex-char-value (read-char)))
           (c (hex-char-value (read-char)))
           (d (hex-char-value (read-char))))
      (integer->char (+ (* 4096 a)
                        (*  256 b)
                        (*   16 c)
                        d))))

  (define (parse-object)
    (parse-object-helper (next-token)))

  (define (parse-object-helper token)
    (cond
      ((eqv? token 'open-curly) (parse-map))
      ((eqv? token 'open-brace) (parse-list))
      ((null? token) token)
      ((string? token) token)
      ((number? token) token)
      ((boolean? token) token)
      (else (parse-error token))))

  (define (parse-map)
    (let loop ((res '(map)))
      (let ((token (next-token)))
        (cond
          ((eqv? token 'close-curly) (reverse res))
          ((string? token) (let* ((res (cons (list (string->symbol token) (parse-map-value))
                                             res))
                                  (next (next-token)))
                             (cond
                               ((eqv? next 'close-curly) (reverse res))
                               ((eqv? next 'comma) (loop res))
                               (else (parse-error next)))))
          (else (parse-error token))))))

  (define (parse-map-value)
    (let ((token (next-token)))
        (if (eqv? token 'colon)
            (parse-object)
            (parse-error token))))

  (define (parse-list)
    (let loop ((res '(list)))
      (let ((token (next-token)))
        (cond
          ((eqv? token 'close-brace) (reverse res))
          (else (let* ((res (cons (list 'item (parse-object-helper token)) res))
                       (next (next-token)))
                  (cond
                    ((eqv? next 'close-brace) (reverse res))
                    ((eqv? next 'comma) (loop res))
                    (else (parse-error next)))))))))

  )