;;; Monadic Parser Combinators Library ;;; ;;; Works cited: ;;; http://legacy.cs.uu.nl/daan/download/parsec/parsec.html ;;; http://cvs.haskell.org/cgi-bin/cvsweb.cgi/fptools/libraries/parsec/Text/ParserCombinators/Parsec/ ;;; http://www.cs.york.ac.uk/fp/HaXml/ ;;; http://www.fh-wedel.de/~si/HXmlToolbox/ ;;; http://haskell.org/onlinereport/standard-prelude.html ;;; ;;; References: ;;; http://www.cs.nott.ac.uk/~gmh/pearl.pdf ;;; http://research.microsoft.com/users/daan/download/papers/parsec-paper.pdf ;;; http://practical-scheme.net/wiliki/wiliki.cgi?Rui%3aParsingExpressionGrammar #lang scheme/base (provide (all-from-out scheme/function) disjoin conjoin constantly concat foldr1 debug parse run-through skip-through fmap sequence >>= >> return doP mzero mplus try fail unexpected many many1 skip-many skip-many1 sep-by sep-by1 count between option optional chainl chainl1 chainr chainr1 eop followed-by not-followed-by many-till skip-till one-of none-of any-char char chars char-ic chars-ic letter upper lower alphanum digit hex-digit space spaces satisfy white-space lexeme symbol parens braces angles squares comma-sep allbut allbut1) (require scheme/function srfi/8 srfi/40 ;;srfi/41 ; seems much slower than 40 (for-syntax scheme/base)) ;;; Prelude #| (define-syntax disjoin (syntax-rules () ((disjoin f ...) (lambda (x) (or (f x) ...))))) (define-syntax conjoin (syntax-rules () ((conjoin f ...) (lambda (x) (and (f x) ...))))) |# (define (disjoin f . fs) (if (null? fs) f (let ((chain (apply disjoin fs))) (lambda (x) (or (f x) (chain x)))))) (define (conjoin f . fs) (if (null? fs) f (let ((chain (apply conjoin fs))) (lambda (x) (and (f x) (chain x)))))) (define ((constantly x) _) x) (define-syntax concat (syntax-rules () ((concat l) (apply append l)))) (define (foldr1 f l) (let ((r (reverse l))) (foldl f (car r) (cdr r)))) (define (replicate n x) (for/list ((i (in-range n))) x)) (define (debug x (msg "DBG")) (printf "[~a] ~s~%" msg x) (flush-output (current-output-port)) x) ;;; Input Stream (define (sequence->stream seq) (cond ((stream? seq) seq) ((list? seq) (list->stream seq)) ((string? seq) (string->stream seq)) ((input-port? seq) (port->stream seq)) (else (error 'sequence->stream "Invalid type: ~s" seq)))) (define (list->stream l) (let loop ((l l)) (if (null? l) stream-null (stream-cons (car l) (loop (cdr l)))))) (define (port->stream in) (let loop ((c (read-char in))) (if (eof-object? c) stream-null (stream-cons c (loop (read-char in)))))) #| (define (string->stream s) (port->stream (open-input-string s))) |# (define (string->stream s) (let ((len (string-length s))) (let loop ((i 0)) (if (>= i len) stream-null (stream-cons (string-ref s i) (loop (add1 i))))))) (define (parse p input) (receive (err x input2) (p (sequence->stream input)) (if err (begin (unless (stream-null? input2) (display "Unparsed: ") (print-stream input2)) (list err x)) x))) (define (run-through input) (stream-for-each void (sequence->stream input))) (define (skip-through input) (parse (skip-many any-char) input)) (define (print-stream s) (stream-for-each display s) (newline)) ;;; Functor (define (fmap f p) (>>= p (lambda (x) (return (f x))))) ;;; Monad ;; Return x, without consuming input (define (return x) (lambda (input) (values #f x input))) (define (>>= p f) (lambda (input) (receive (err x input2) (p input) (if err (values err x input2) ((f x) input2))))) #| (define (>> . ps) (lambda (input) (let loop ((x '()) (input input) (ps ps)) (if (null? ps) (values #f x input) (receive (err y input2) ((car ps) input) (if err (values err y input2) (loop y input2 (cdr ps)))))))) (define (>> . ps) (define (bind p q) (lambda (input) (receive (err x input2) (p input) (if err (values err x input2) (q input2))))) (foldr1 bind ps)) (define (>> p q) (lambda (input) (receive (err x input2) (p input) (if err (values err x input2) (q input2))))) |# (define-syntax >> (syntax-rules () ((>> p q) (>>= p (lambda (_) q))) ((>> p q r ...) (>> p (>> q r ...))))) (define-syntax doP (syntax-rules (guard <-) ((doP e) e) ((doP (v <- p) e ...) (>>= p (lambda (v) (doP e ...)))) ((doP (guard p?) e ...) (if p? (doP e ...) mzero)) ((doP p e ...) (>> p (doP e ...))))) #| ;; From "The Haskell 98 Report: Standard Prelude" (define (sequence ps) (define (mcons p q) (doP (x <- p) (y <- q) (return (cons x y)))) (foldr mcons (return '()) ps)) (define (sequence ps) (lambda (input) (let loop ((xs '()) (input input) (ps ps)) (if (null? ps) (values #f (reverse xs) input) (receive (err x input2) ((car ps) input) (if err (values err x input2) (loop (cons x xs) input2 (cdr ps)))))))) |# (define (mcons p q) (lambda (input) (receive (err1 x input1) (p input) (if err1 (values err1 x input1) (receive (err2 y input2) (q input1) (if err2 (values err2 y input2) (values #f (cons x y) input2))))))) (define (sequence ps) (foldr mcons (return '()) ps)) (define (fail msg) (lambda (input) (values 'error msg input))) (define (unexpected x) (lambda (input) (values 'unexpected x input))) ;;; MonadPlus (define mzero (fail #f)) (define (mplus p q) (lambda (input) (receive (err x input2) (p input) (if err (q input) (values #f x input2))))) #| ;; TODO: benchmark (define ( . ps) (lambda (input) (let loop ((p1 (car ps)) (pmore (cdr ps))) (if (null? pmore) (p1 input) (receive (err x input2) (p1 input) (if err (loop (car pmore) (cdr pmore)) (values #f x input2))))))) (define-syntax (syntax-rules () (( p) p) (( p q) (mplus p q)) (( p q r ...) ;;( ( p q) r ...) ( p ( q r ...))))) |# (define ( . pp) (foldr1 mplus pp)) ;;; Utility Parsers (define (satisfy test) (lambda (input) (if (stream-null? input) (values 'eof "end of input" input) (let ((x (stream-car input))) (if (test x) (values #f x (stream-cdr input)) ((unexpected x) input)))))) (define (try p) (lambda (input) (receive (err x input2) (p input) (if err (values #f x input) (values #f x input2))))) ;; (From Parsec) To avoid stack overflow, we accumulate parse ;; results tail-recursively with left-fold. (define (many p) (fmap reverse (many-fold cons p))) (define (many-fold accum p) (lambda (input) (let loop ((input input) (xs '())) (receive (err x input2) (p input) (cond (err (values #f xs input)) ((null? x) (loop input2 xs)) (else (loop input2 (accum x xs)))))))) (define (many1 p) (doP (x <- p) (xs <- (many p)) (return (cons x xs)))) (define (skip-many p) (many-fold (lambda (x _) '()) p)) (define (skip-many1 p) (>> p (skip-many p))) (define (sep-by p sep) (option '() (sep-by1 p sep))) (define (sep-by1 p sep) (doP (x <- p) (xs <- (many (>> sep p))) (return (cons x xs)))) #| http://cvs.haskell.org/cgi-bin/cvsweb.cgi/fptools/libraries/parsec/Text/ParserCombinators/Parsec/Combinator.hs?rev=1.2 count :: Int -> GenParser tok st a -> GenParser tok st [a] count n p | n <= 0 = return [] | otherwise = sequence (replicate n p) |# (define (count n p) (sequence (replicate n p))) (define ((between open close) p) (doP open (x <- p) close (return x))) (define (option default p) (mplus p (return default))) (define (optional p) (mplus (>> p (return '())) (return '()))) (define (one-of l #:member (member memq)) (satisfy (curryr member l))) (define (none-of l #:member (member memq)) (satisfy (negate (curryr member l)))) (define digit (satisfy char-numeric?)) (define hex-digit (satisfy (disjoin char-numeric? (lambda (c) (or (char<=? #\a c #\f) (char<=? #\A c #\F)))))) (define letter (satisfy char-alphabetic?)) (define upper (satisfy char-upper-case?)) (define lower (satisfy char-lower-case?)) (define alphanum (satisfy (disjoin char-alphabetic? char-numeric?))) (define space (satisfy char-whitespace?)) (define spaces (many space)) (define any-char (satisfy (constantly #t))) (define eop ;(skip-many any-char) (lambda (_) (values #f 'eop stream-null))) (define (char c) (satisfy (curry char=? c))) ;; Prasec's `string' renamed, to avoid conflict with Scheme's string (define (chars str) (>> (sequence (map char (string->list str))) (return str))) (define (char-ic c) (satisfy (curry char-ci=? c))) (define (chars-ic str) (fmap list->string (sequence (map char-ic (string->list str))))) #| (if (zero? (string-length str)) (return str) (doP (char (string-ref str 0)) (chars (substring str 1)) (return str))) |# ;; chain* functions taken from HuttonMeijerWallace.hs ;; accompanying HaXml (define (chainl p op v) (option v (chainl1 p op))) (define (chainl1 p op) (letrec ((rest (lambda (x) (mplus (doP (f <- op) (y <- p) (rest (f x y))) (return x))))) (>>= p rest))) (define (chainr p op v) (option v (chainr1 p op))) (define (chainr1 p op) (doP (x <- p) (mplus (doP (f <- op) (y <- (chainr1 p op)) (return (f x y))) (return x)))) (define (many-till p end) (fmap reverse (till-fold cons p end))) (define (till-fold accum p end) (lambda (input) (let loop ((input input) (xs '())) (receive (err1 x input1) (end input) (if err1 (receive (err2 y input2) (p input) (cond (err2 (values err2 y input2)) ((null? y) (loop input2 xs)) (else (loop input2 (accum y xs))))) (values #f xs input1)))))) (define (skip-till p end) (till-fold (lambda (x _) '()) p end)) ;; Syntactic predicates (define (followed-by p) (lambda (input) (receive (err x _) (p input) (if err ((unexpected x) input) (values #f x input))))) (define (not-followed-by p) (lambda (input) (receive (err x _) (p input) (if err (values #f '() input) ((unexpected x) input))))) ;;; Lexical Analysis (define white-space (skip-many space)) (define (lexeme p) (doP (x <- p) white-space (return x))) (define (symbol cs) (lexeme (chars cs))) (define-syntax define-enclosed (syntax-rules () ((define-enclosed (name < >) ...) (begin (define name (between (symbol <) (symbol >))) ...)))) (define-enclosed (parens "(" ")") (braces "{" "}") (angles "<" ">") (squares "[" "]")) (define (comma-sep p) (sep-by p (lexeme (char #\,)))) ;; From Haskell XML Toolbox (define (allbut p str) (allbut1 p (constantly #t) str)) (define (allbut1 p pred? str) (let ((c (string-ref str 0)) (tail (substring str 1))) (p (mplus (satisfy (lambda (x) (and (pred? x) (not (char=? x c))))) (doP (char c) (not-followed-by (chars tail)) (return c))))))