;;; read-scheme-source --- Read a file, recognizing scheme forms and comments
;; Copyright (C) 2001, 2006, 2011 Free Software Foundation, Inc.
;;
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU Lesser General Public License
;; as published by the Free Software Foundation; either version 3, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; Lesser General Public License for more details.
;;
;; You should have received a copy of the GNU Lesser General Public
;; License along with this software; see the file COPYING.LESSER. If
;; not, write to the Free Software Foundation, Inc., 51 Franklin
;; Street, Fifth Floor, Boston, MA 02110-1301 USA
;;; Author: Thien-Thi Nguyen
;;; Commentary:
;; Usage: read-scheme-source FILE1 FILE2 ...
;;
;; This program parses each FILE and writes to stdout sexps that describe the
;; top-level structures of the file: scheme forms, single-line comments, and
;; hash-bang comments. You can further process these (to associate comments
;; w/ scheme forms as a kind of documentation, for example).
;;
;; The output sexps have one of these forms:
;;
;; (quote (filename FILENAME))
;;
;; (quote (comment :leading-semicolons N
;; :text LINE))
;;
;; (quote (whitespace :text LINE))
;;
;; (quote (hash-bang-comment :line LINUM
;; :line-count N
;; :text-list (LINE1 LINE2 ...)))
;;
;; (quote (following-form-properties :line LINUM
;; :line-count N)
;; :type TYPE
;; :signature SIGNATURE
;; :std-int-doc DOCSTRING))
;;
;; SEXP
;;
;; The first four are straightforward (both FILENAME and LINE are strings sans
;; newline, while LINUM and N are integers). The last two always go together,
;; in that order. SEXP is scheme code processed only by `read' and then
;; `write'.
;;
;; The :type field may be omitted if the form is not recognized. Otherwise,
;; TYPE may be one of: procedure, alias, define-module, variable.
;;
;; The :signature field may be omitted if the form is not a procedure.
;; Otherwise, SIGNATURE is a list showing the procedure's signature.
;;
;; If the type is `procedure' and the form has a standard internal docstring
;; (first body form a string), that is extracted in full -- including any
;; embedded newlines -- and recorded by field :std-int-doc.
;;
;;
;; Usage from a program: The output list of sexps can be retrieved by scheme
;; programs w/o having to capture stdout, like so:
;;
;; (use-modules (scripts read-scheme-source))
;; (define source-forms (read-scheme-source-silently "FILE1" "FILE2" ...))
;;
;; There are also two convenience procs exported for use by Scheme programs:
;;
;; (clump FORMS) --- filter FORMS combining contiguous comment forms that
;; have the same number of leading semicolons.
;;
;; (quoted? SYM FORM) --- see if FORM looks like: "(quote (SYM ...))", parse
;; the ":tags", and return alist of (TAG . VAL) elems.
;;
;; TODO: Add option "--clump-comments", maybe w/ different clumping styles.
;; Make `annotate!' extensible.
;;; Code:
(define-module (scripts read-scheme-source)
:use-module (ice-9 rdelim)
:export (read-scheme-source
read-scheme-source-silently
quoted?
clump))
(define %include-in-guild-list #f)
(define %summary "Print a parsed representation of a Scheme file.")
;; Try to figure out what FORM is and its various attributes.
;; Call proc NOTE! with key (a symbol) and value.
;;
(define (annotate! form note!)
(cond ((and (list? form)
(< 2 (length form))
(eq? 'define (car form))
(pair? (cadr form))
(symbol? (caadr form)))
(note! ':type 'procedure)
(note! ':signature (cadr form))
(and (< 3 (length form))
(string? (caddr form))
(note! ':std-int-doc (caddr form))))
((and (list? form)
(< 2 (length form))
(eq? 'define (car form))
(symbol? (cadr form))
(list? (caddr form))
(< 3 (length (caddr form)))
(eq? 'lambda (car (caddr form)))
(string? (caddr (caddr form))))
(note! ':type 'procedure)
(note! ':signature (cons (cadr form) (cadr (caddr form))))
(note! ':std-int-doc (caddr (caddr form))))
((and (list? form)
(= 3 (length form))
(eq? 'define (car form))
(symbol? (cadr form))
(symbol? (caddr form)))
(note! ':type 'alias))
((and (list? form)
(eq? 'define-module (car form)))
(note! ':type 'define-module))
;; Add other types here.
(else (note! ':type 'variable))))
;; Process FILE, calling NB! on parsed top-level elements.
;; Recognized: #!-!# and regular comments in addition to normal forms.
;;
(define (process file nb!)
(nb! `'(filename ,file))
(let ((hash-bang-rx (make-regexp "^#!"))
(bang-hash-rx (make-regexp "^!#"))
(all-comment-rx (make-regexp "^[ \t]*(;+)"))
(all-whitespace-rx (make-regexp "^[ \t]*$"))
(p (open-input-file file)))
(let loop ((n (1+ (port-line p))) (line (read-line p)))
(or (not n)
(eof-object? line)
(begin
(cond ((regexp-exec hash-bang-rx line)
(let loop ((line (read-line p))
(text (list line)))
(if (or (eof-object? line)
(regexp-exec bang-hash-rx line))
(nb! `'(hash-bang-comment
:line ,n
:line-count ,(1+ (length text))
:text-list ,(reverse
(cons line text))))
(loop (read-line p)
(cons line text)))))
((regexp-exec all-whitespace-rx line)
(nb! `'(whitespace :text ,line)))
((regexp-exec all-comment-rx line)
=> (lambda (m)
(nb! `'(comment
:leading-semicolons
,(let ((m1 (vector-ref m 1)))
(- (cdr m1) (car m1)))
:text ,line))))
(else
(unread-string line p)
(let* ((form (read p))
(count (- (port-line p) n))
(props (let* ((props '())
(prop+ (lambda args
(set! props
(append props args)))))
(annotate! form prop+)
props)))
(or (= count 1) ; ugh
(begin
(read-line p)
(set! count (1+ count))))
(nb! `'(following-form-properties
:line ,n
:line-count ,count
,@props))
(nb! form))))
(loop (1+ (port-line p)) (read-line p)))))))
;;; entry points
(define (read-scheme-source-silently . files)
"See commentary in module (scripts read-scheme-source)."
(let* ((res '()))
(for-each (lambda (file)
(process file (lambda (e) (set! res (cons e res)))))
files)
(reverse res)))
(define (read-scheme-source . files)
"See commentary in module (scripts read-scheme-source)."
(for-each (lambda (file)
(process file (lambda (e) (write e) (newline))))
files))
;; Recognize: (quote (SYM :TAG1 VAL1 :TAG2 VAL2 ...))
;; and return alist: ((TAG1 . VAL1) (TAG2 . VAL2) ...)
;; where the tags are symbols.
;;
(define (quoted? sym form)
(and (list? form)
(= 2 (length form))
(eq? 'quote (car form))
(let ((inside (cadr form)))
(and (list? inside)
(< 0 (length inside))
(eq? sym (car inside))
(let loop ((ls (cdr inside)) (alist '()))
(if (null? ls)
alist ; retval
(let ((first (car ls)))
(or (symbol? first)
(error "bad list!"))
(loop (cddr ls)
(acons (string->symbol
(substring (symbol->string first) 1))
(cadr ls)
alist)))))))))
;; Filter FORMS, combining contiguous comment forms that have the same number
;; of leading semicolons. Do not include in them whitespace lines.
;; Whitespace lines outside of such comment groupings are ignored, as are
;; hash-bang comments. All other forms are passed through unchanged.
;;
(define (clump forms)
(let loop ((forms forms) (acc '()) (pass-this-one-through? #f))
(if (null? forms)
(reverse acc) ; retval
(let ((form (car forms)))
(cond (pass-this-one-through?
(loop (cdr forms) (cons form acc) #f))
((quoted? 'following-form-properties form)
(loop (cdr forms) (cons form acc) #t))
((quoted? 'whitespace form) ;;; ignore
(loop (cdr forms) acc #f))
((quoted? 'hash-bang-comment form) ;;; ignore for now
(loop (cdr forms) acc #f))
((quoted? 'comment form)
=> (lambda (alist)
(let cloop ((inner-forms (cdr forms))
(level (assq-ref alist 'leading-semicolons))
(text (list (assq-ref alist 'text))))
(let ((up (lambda ()
(loop inner-forms
(cons (cons level (reverse text))
acc)
#f))))
(if (null? inner-forms)
(up)
(let ((inner-form (car inner-forms)))
(cond ((quoted? 'comment inner-form)
=> (lambda (inner-alist)
(let ((new-level
(assq-ref
inner-alist
'leading-semicolons)))
(if (= new-level level)
(cloop (cdr inner-forms)
level
(cons (assq-ref
inner-alist
'text)
text))
(up)))))
(else (up)))))))))
(else (loop (cdr forms) (cons form acc) #f)))))))
;;; script entry point
(define main read-scheme-source)
;;; read-scheme-source ends here