Uname: Linux web3.us.cloudlogin.co 5.10.226-xeon-hst #2 SMP Fri Sep 13 12:28:44 UTC 2024 x86_64
Software: Apache
PHP version: 8.1.31 [ PHP INFO ] PHP os: Linux
Server Ip: 162.210.96.117
Your Ip: 3.133.134.63
User: edustar (269686) | Group: tty (888)
Safe Mode: OFF
Disable Function:
NONE

name : tokenize.scm
;;; ECMAScript for Guile

;; Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.

;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;; 
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;;; Lesser General Public License for more details.
;;;; 
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

;;; Code:

(define-module (language ecmascript tokenize)
  #:use-module (ice-9 rdelim)
  #:use-module ((srfi srfi-1) #:select (unfold-right))
  #:use-module (system base lalr)
  #:export (next-token make-tokenizer make-tokenizer/1 tokenize tokenize/1))

(define (syntax-error what loc form . args)
  (throw 'syntax-error #f what
         (and=> loc source-location->source-properties)
         form #f args))

(define (port-source-location port)
  (make-source-location (port-filename port)
                        (port-line port)
                        (port-column port)
                        (false-if-exception (ftell port))
                        #f))

;; taken from SSAX, sorta
(define (read-until delims port loc)
  (if (eof-object? (peek-char port))
      (syntax-error "EOF while reading a token" loc #f)
      (let ((token (read-delimited delims port 'peek)))
        (if (eof-object? (peek-char port))
            (syntax-error "EOF while reading a token" loc token)
            token))))

(define (char-hex? c)
  (and (not (eof-object? c))
       (or (char-numeric? c)
           (memv c '(#\a #\b #\c #\d #\e #\f))
           (memv c '(#\A #\B #\C #\D #\E #\F)))))

(define (digit->number c)
  (- (char->integer c) (char->integer #\0)))

(define (hex->number c)
  (if (char-numeric? c)
      (digit->number c)
      (+ 10 (- (char->integer (char-downcase c)) (char->integer #\a)))))

(define (read-slash port loc div?)
  (let ((c1 (begin
              (read-char port)
              (peek-char port))))
    (cond
     ((eof-object? c1)
      ;; hmm. error if we're not looking for a div? ?
      (make-lexical-token '/ loc #f))
     ((char=? c1 #\/)
      (read-line port)
      (next-token port div?))
     ((char=? c1 #\*)
      (read-char port)
      (let lp ((c (read-char port)))
        (cond
         ((eof-object? c)
          (syntax-error "EOF while in multi-line comment" loc #f))
         ((char=? c #\*)
          (if (eqv? (peek-char port) #\/)
              (begin
                (read-char port)
                (next-token port div?))
              (lp (read-char port))))
         (else
          (lp (read-char port))))))
     (div?
      (case c1
        ((#\=) (read-char port) (make-lexical-token '/= loc #f))
        (else (make-lexical-token '/ loc #f))))
     (else
      (read-regexp port loc)))))

(define (read-regexp port loc)
  ;; first slash already read
  (let ((terms (string #\/ #\\ #\nl #\cr)))
    (let lp ((str (read-until terms port loc)) (head ""))
      (let ((terminator (peek-char port)))
        (cond
         ((char=? terminator #\/)
          (read-char port)
          ;; flags
          (let lp ((c (peek-char port)) (flags '()))
            (if (or (eof-object? c)
                    (not (or (char-alphabetic? c)
                             (char-numeric? c)
                             (char=? c #\$)
                             (char=? c #\_))))
                (make-lexical-token 'RegexpLiteral loc
                                    (cons (string-append head str)
                                          (reverse flags)))
                (begin (read-char port)
                       (lp (peek-char port) (cons c flags))))))
         ((char=? terminator #\\)
          (read-char port)
          (let ((echar (read-char port)))
            (lp (read-until terms port loc)
                (string-append head str (string #\\ echar)))))
         (else
          (syntax-error "regexp literals may not contain newlines"
                        loc str)))))))

(define (read-string port loc)
  (let ((c (read-char port)))
    (let ((terms (string c #\\ #\nl #\cr)))
      (define (read-escape port)
        (let ((c (read-char port)))
          (case c
            ((#\' #\" #\\) c)
            ((#\b) #\bs)
            ((#\f) #\np)
            ((#\n) #\nl)
            ((#\r) #\cr)
            ((#\t) #\tab)
            ((#\v) #\vt)
            ((#\0)
             (let ((next (peek-char port)))
               (cond
                ((eof-object? next) #\nul)
                ((char-numeric? next)
                 (syntax-error "octal escape sequences are not supported"
                               loc #f))
                (else #\nul))))
            ((#\x)
             (let* ((a (read-char port))
                    (b (read-char port)))
               (cond
                ((and (char-hex? a) (char-hex? b))
                 (integer->char (+ (* 16 (hex->number a)) (hex->number b))))
                (else
                 (syntax-error "bad hex character escape" loc (string a b))))))
            ((#\u)
             (let* ((a (read-char port))
                    (b (read-char port))
                    (c (read-char port))
                    (d (read-char port)))
               (integer->char (string->number (string a b c d) 16))))
            (else
             c))))
      (let lp ((str (read-until terms port loc)))
        (let ((terminator (peek-char port)))
          (cond
           ((char=? terminator c)
            (read-char port)
            (make-lexical-token 'StringLiteral loc str))
           ((char=? terminator #\\)
            (read-char port)
            (let ((echar (read-escape port)))
              (lp (string-append str (string echar)
                                 (read-until terms port loc)))))
           (else
            (syntax-error "string literals may not contain newlines"
                          loc str))))))))

(define *keywords*
  '(("break" . break)
    ("else" . else)
    ("new" . new)
    ("var" . var)
    ("case" . case)
    ("finally" . finally)
    ("return" . return)
    ("void" . void)
    ("catch" . catch)
    ("for" . for)
    ("switch" . switch)
    ("while" . while)
    ("continue" . continue)
    ("function" . function)
    ("this" . this)
    ("with" . with)
    ("default" . default)
    ("if" . if)
    ("throw" . throw)
    ("delete" . delete)
    ("in" . in)
    ("try" . try)
    ("do" . do)
    ("instanceof" . instanceof)
    ("typeof" . typeof)

    ;; these aren't exactly keywords, but hey
    ("null" . null)
    ("true" . true)
    ("false" . false)))

(define *future-reserved-words*
  '(("abstract" . abstract)
    ("enum" . enum)
    ("int" . int)
    ("short" . short)
    ("boolean" . boolean)
    ("export" . export)
    ("interface" . interface)
    ("static" . static)
    ("byte" . byte)
    ("extends" . extends)
    ("long" . long)
    ("super" . super)
    ("char" . char)
    ("final" . final)
    ("native" . native)
    ("synchronized" . synchronized)
    ("class" . class)
    ("float" . float)
    ("package" . package)
    ("throws" . throws)
    ("const" . const)
    ("goto" . goto)
    ("private" . private)
    ("transient" . transient)
    ("debugger" . debugger)
    ("implements" . implements)
    ("protected" . protected)
    ("volatile" . volatile)
    ("double" . double)
    ("import" . import)
    ("public" . public)))

(define (read-identifier port loc)
  (let lp ((c (peek-char port)) (chars '()))
    (if (or (eof-object? c)
            (not (or (char-alphabetic? c)
                     (char-numeric? c)
                     (char=? c #\$)
                     (char=? c #\_))))
        (let ((word (list->string (reverse chars))))
          (cond ((assoc-ref *keywords* word)
                 => (lambda (x) (make-lexical-token x loc #f)))
                ((assoc-ref *future-reserved-words* word)
                 (syntax-error "word is reserved for the future, dude."
                               loc word))
                (else (make-lexical-token 'Identifier loc
                                          (string->symbol word)))))
        (begin (read-char port)
               (lp (peek-char port) (cons c chars))))))

(define (read-numeric port loc)
  (let* ((c0 (if (char=? (peek-char port) #\.)
                 #\0
                 (read-char port)))
         (c1 (peek-char port)))
    (cond
     ((eof-object? c1) (digit->number c0))
     ((and (char=? c0 #\0) (or (char=? c1 #\x) (char=? c1 #\X)))
      (read-char port)
      (let ((c (peek-char port)))
        (if (not (char-hex? c))
            (syntax-error "bad digit reading hexadecimal number"
                          loc c))
        (let lp ((c c) (acc 0))
          (cond ((char-hex? c)
                 (read-char port)
                 (lp (peek-char port)
                     (+ (* 16 acc) (hex->number c))))
                (else
                 acc)))))
     ((and (char=? c0 #\0) (char-numeric? c1))
      (let lp ((c c1) (acc 0))
        (cond ((eof-object? c) acc)
              ((char-numeric? c)
               (if (or (char=? c #\8) (char=? c #\9))
                   (syntax-error "invalid digit in octal sequence"
                                 loc c))
               (read-char port)
               (lp (peek-char port)
                   (+ (* 8 acc) (digit->number c))))
              (else
               acc))))
     (else
      (let lp ((c1 c1) (acc (digit->number c0)))
        (cond
         ((eof-object? c1) acc)
         ((char-numeric? c1)
          (read-char port)
          (lp (peek-char port)
              (+ (* 10 acc) (digit->number c1))))
         ((or (char=? c1 #\e) (char=? c1 #\E))
          (read-char port)
          (let ((add (let ((c (peek-char port)))
                       (cond ((eof-object? c)
                              (syntax-error "error reading exponent: EOF"
                                            loc #f))
                             ((char=? c #\+) (read-char port) +)
                             ((char=? c #\-) (read-char port) -)
                             ((char-numeric? c) +)
                             (else
                              (syntax-error "error reading exponent: non-digit"
                                            loc c))))))
            (let lp ((c (peek-char port)) (e 0))
              (cond ((and (not (eof-object? c)) (char-numeric? c))
                     (read-char port)
                     (lp (peek-char port) (add (* 10 e) (digit->number c))))
                    (else
                     (* (if (negative? e) (* acc 1.0) acc) (expt 10 e)))))))
         ((char=? c1 #\.)
          (read-char port)
          (let lp2 ((c (peek-char port)) (dec 0.0) (n -1))
            (cond ((and (not (eof-object? c)) (char-numeric? c))
                   (read-char port)
                   (lp2 (peek-char port)
                        (+ dec (* (digit->number c) (expt 10 n)))
                        (1- n)))
                  (else
                   ;; loop back to catch an exponential part
                   (lp c (+ acc dec))))))
         (else
          acc)))))))
           
(define *punctuation*
  '(("{" . lbrace)
    ("}" . rbrace)
    ("(" . lparen)
    (")" . rparen)
    ("[" . lbracket)
    ("]" . rbracket)
    ("." . dot)
    (";" . semicolon)
    ("," . comma)
    ("<" . <)
    (">" . >)
    ("<=" . <=)
    (">=" . >=)
    ("==" . ==)
    ("!=" . !=)
    ("===" . ===)
    ("!==" . !==)
    ("+" . +)
    ("-" . -)
    ("*" . *)
    ("%" . %)
    ("++" . ++)
    ("--" . --)
    ("<<" . <<)
    (">>" . >>)
    (">>>" . >>>)
    ("&" . &)
    ("|" . bor)
    ("^" . ^)
    ("!" . !)
    ("~" . ~)
    ("&&" . &&)
    ("||" . or)
    ("?" . ?)
    (":" . colon)
    ("=" . =)
    ("+=" . +=)
    ("-=" . -=)
    ("*=" . *=)
    ("%=" . %=)
    ("<<=" . <<=)
    (">>=" . >>=)
    (">>>=" . >>>=)
    ("&=" . &=)
    ("|=" . bor=)
    ("^=" . ^=)))

(define *div-punctuation*
  '(("/" . /)
    ("/=" . /=)))

;; node ::= (char (symbol | #f) node*)
(define read-punctuation
  (let ((punc-tree (let lp ((nodes '()) (puncs *punctuation*))
                     (cond ((null? puncs)
                            nodes)
                           ((assv-ref nodes (string-ref (caar puncs) 0))
                            => (lambda (node-tail)
                                 (if (= (string-length (caar puncs)) 1)
                                     (set-car! node-tail (cdar puncs))
                                     (set-cdr! node-tail
                                               (lp (cdr node-tail)
                                                   `((,(substring (caar puncs) 1)
                                                      . ,(cdar puncs))))))
                                 (lp nodes (cdr puncs))))
                           (else
                            (lp (cons (list (string-ref (caar puncs) 0) #f) nodes)
                                puncs))))))
    (lambda (port loc)
      (let lp ((c (peek-char port)) (tree punc-tree) (candidate #f))
        (cond
         ((assv-ref tree c)
          => (lambda (node-tail)
               (read-char port)
               (lp (peek-char port) (cdr node-tail) (car node-tail))))
         (candidate
          (make-lexical-token candidate loc #f))
         (else
          (syntax-error "bad syntax: character not allowed" loc c)))))))

(define (next-token port div?)
  (let ((c   (peek-char port))
        (loc (port-source-location port)))
    (case c
      ((#\ht #\vt #\np #\space #\x00A0) ; whitespace
       (read-char port)
       (next-token port div?))
      ((#\newline #\cr)                 ; line break
       (read-char port)
       (next-token port div?))
      ((#\/)
       ;; division, single comment, double comment, or regexp
       (read-slash port loc div?))
      ((#\" #\')                        ; string literal
       (read-string port loc))
      (else
       (cond
        ((eof-object? c)
         '*eoi*)
        ((or (char-alphabetic? c)
             (char=? c #\$)
             (char=? c #\_))
         ;; reserved word or identifier
         (read-identifier port loc))
        ((char-numeric? c)
         ;; numeric -- also accept . FIXME, requires lookahead
         (make-lexical-token 'NumericLiteral loc (read-numeric port loc)))
        (else
         ;; punctuation
         (read-punctuation port loc)))))))

(define (make-tokenizer port)
  (let ((div? #f))
    (lambda ()
      (let ((tok (next-token port div?)))
        (set! div? (and (lexical-token? tok)
                        (let ((cat (lexical-token-category tok)))
                          (or (eq? cat 'Identifier)
                              (eq? cat 'NumericLiteral)
                              (eq? cat 'StringLiteral)))))
        tok))))

(define (make-tokenizer/1 port)
  (let ((div? #f)
        (eoi? #f)
        (stack '()))
    (lambda ()
      (if eoi?
          '*eoi*
          (let ((tok (next-token port div?)))
            (case (if (lexical-token? tok) (lexical-token-category tok) tok)
              ((lparen)
               (set! stack (cons tok stack)))
              ((rparen)
               (if (and (pair? stack)
                        (eq? (lexical-token-category (car stack)) 'lparen))
                   (set! stack (cdr stack))
                   (syntax-error "unexpected right parenthesis"
                                 (lexical-token-source tok)
                                 #f)))
              ((lbracket)
               (set! stack (cons tok stack)))
              ((rbracket)
               (if (and (pair? stack)
                        (eq? (lexical-token-category (car stack)) 'lbracket))
                   (set! stack (cdr stack))
                   (syntax-error "unexpected right bracket"
                                 (lexical-token-source tok)
                                 #f)))
              ((lbrace)
               (set! stack (cons tok stack)))
              ((rbrace)
               (if (and (pair? stack)
                        (eq? (lexical-token-category (car stack)) 'lbrace))
                   (set! stack (cdr stack))
                   (syntax-error "unexpected right brace"
                                 (lexical-token-source tok)
                                 #f)))
              ((semicolon)
               (set! eoi? (null? stack))))
            (set! div? (and (lexical-token? tok)
                            (let ((cat (lexical-token-category tok)))
                              (or (eq? cat 'Identifier)
                                  (eq? cat 'NumericLiteral)
                                  (eq? cat 'StringLiteral)))))
            tok)))))

(define (tokenize port)
  (let ((next (make-tokenizer port)))
    (let lp ((out '()))
      (let ((tok (next)))
        (if (eq? tok '*eoi*)
            (reverse! out)
            (lp (cons tok out)))))))

(define (tokenize/1 port)
  (let ((next (make-tokenizer/1 port)))
    (let lp ((out '()))
      (let ((tok (next)))
        (if (eq? tok '*eoi*)
            (reverse! out)
            (lp (cons tok out)))))))

© 2025 GrazzMean