Текст пузомерки на Лисп - dim13/lor GitHub Wiki
Программа писана больше для личного употребления, поэтому у нее практически нет интерфейса. Файл для подсчета захардкоден в самом конце, в виде пути к файлу типа "advlib.lisp".
Для разбора различных языков используются макросы типа make-lisp-classifier, использование конкретного макроса тоже хардкодидся в последней строке.
Примечание: текст слегка не влез по ширине, поэтому советую скопипастить его для изучения и/или использования. Если кто-нибудь отформатирует, буду очень благодарен --Евгений Косенко 05-Jul-2008 17:25 MSD
(require 'lexer)
(in-package lexer)
(defun lex-class (class-def)
(mapcar
#'(lambda (regex) `(,regex (return (values ,(car class-def) %0))))
(cdr class-def)))
(defmacro defclassifier (name class-list skip-list)
`(deflexer ,name
:flex-compatible
,@(mapcan #'lex-class class-list)
,@(mapcar #'list skip-list)))
(defclassifier make-ocaml-classifier
((:keyword
.("!=" "%" "&" "&&" "," ":" "::" ";" ";;" "<" "<\\-" "=" ">" ">=" "@" "Array"
"List" "Queue" "\\(" "\\)" "\\*" "\\+" "\\-" "\\->" "\\." "\\[" "\\]" "\\{"
"\\|" "\\|\\|" "\\}" "assoc" "create" "do" "done" "else" "flush" "for" "fun"
"function" "if" "in" "iter" "length" "let" "loop" "make_matrix" "map" "not"
"pop" "push" "raise" "then" "to" "true" "try" "with" "filter" "make" "init"
"string" "String" "index" "ignore" "rec" "open" "val" "bool" "int" "array"
"when" "copy"))
(:meanword
.("\"[^\"]*\"" "'[^']*'" "(-)?[0-9]+" "[A-Za-z_][A-Za-z_]*(')?")))
;skipword
("//.*$" "[:space:]+"))
(defclassifier make-c++-classifier
((:keyword
.("!=" "#include" "%" "&" "," ":" "::" ";" "<" "<<" "=" ">" ">=" ">>" "\\("
"\\)" "\\*" "\\+" "\\+\\+" "\\->" "\\." "\\[" "\\]" "\\{" "\\|\\|" "\\}"
"bool" "class" "const" "for" "if" "int" "namespace" "operator" "return" "std"
"struct" "try" "typedef" "using" "void"))
(:meanword
.("\"[^\"]*\"" "(-)?[0-9]+" "[A-Za-z_][A-Za-z_]*")))
;skipword
("//.*$" "[:space:]+"))
(defclassifier make-python-classifier
((:keyword
.("\\|" "\\&" "\\\\" "\\+=" "==" "\\-" "\\{" "\\}" "!=" "%" "," ":" "<" "=" ">" ">=" "\\(" "\\)" "\\*" "\\+" "\\."
"\\[" "\\]"
"__class__" "__init__" "__name__" "__unicode__" "append" "class" "cond" "def"
"dict" "except" "for" "if" "import" "in" "join" "lambda" "len" "list" "print"
"return" "setattr" "try" "unicode" "yield" "raise" "or" "pass" "xrange" "__str__" "break"
"None" "is" "while" "has_key" "from"))
(:meanword
.("`[^']+`" "\"[^\"]*\"" "(-)?[0-9]+" "[A-Za-z_][A-Za-z_]*" "u?'[^']*'")))
;skipword
("#.*$" "[:space:]+"))
(defclassifier make-lisp-classifier
((:keyword
.("&body" "#\\." "\\-" "#[cC]" "1-" "\\." "#'" "%" "&key" "&optional" "'" "," ",@" "/" "1\\+" ":" ":test" "<" "<>" "="
">" ">=" "\\(" "\\)" "\\*" "\\+" "`" "and" "break" "caar" "cond" "defun" "eq"
"equal" "gethash" "if" "in-package" "lambda" "length" "list" "make-hash-table"
"make-instance" "not" "print" "progn" "push" "setf" "setq" "slot-value"
"string=" "t" "terpri" "append" "reverse" "cadr" "min" "reduce" "assoc" "throw"
"mapcan" "remove-if" "defmacro" "remove" "pairlis" "defconstant" "across" "read-line" "listen"
"while" "upfrom" "with-open-file" "finally" "format" "princ" "complex" "let" "do" "to" "from" "for"
"loop" "eql" "member" "null" "or" "imagpart" "realpart" "cons" "car" "cdr" "nil" "defvar"
"eval" "eval" "mapcar" "&rest" "read" "with-open-stream" "return" "unless" "read-from-string"
"string" "concatenate" "remove-dublicates" "#c" ":conc-name" "defctruct" "max" "case" "require"
"initially" "remove-duplicates" "quote" ":count" "when" "upto" "\\(\\)" "count" "numberp"))
(:meanword
.("#\\\\." "\"[^\"]*\"" "(\\-)?[0-9]+" "[A-Za-z][A-Za-z0-9\\-\\+!]*" "[\\[]" "[\\]]")))
;skipword
(";.*$" "[:space:]+"))
(defun collect-stats (token-stream keywords meanwords trace-lex)
(multiple-value-bind
(class value) (funcall token-stream)
(cond ((eq class nil))
(t (cond ((eq class :keyword)
(setf (gethash value keywords) (1+ (gethash value keywords 0))))
((eq class :meanword)
(setf (gethash value meanwords) (1+ (gethash value meanwords 0))))
(t (throw :invalid-class (values nil class))))
(cond
(trace-lex
(prin1 class)
(prin1 " ")
(prin1 value)
(terpri)))
(collect-stats token-stream keywords meanwords trace-lex)))))
(defun hash-sum (hash)
(let ((sum 0))
(maphash #'(lambda (k v) (declare (ignore k)) (incf sum v)) hash)
sum))
(defun hash-keys (hash)
(let ((keys ()))
(maphash #'(lambda (k v) (declare (ignore v)) (push k keys)) hash) keys))
(defun % (part total)
(values (round (float (* 100 (/ part total))))))
(defun lo-stats (token-stream trace-lex show-classes)
(let
((keywords (make-hash-table :test #'equal)) (meanwords (make-hash-table :test #'equal)))
(collect-stats token-stream keywords meanwords trace-lex)
(cond
(show-classes
(princ "Keywords: ")
(prin1 (hash-keys keywords))
(terpri)
(princ "Meanwords: ")
(prin1 (hash-keys meanwords))
(terpri)))
(values
(hash-sum keywords)
(hash-sum meanwords)
(hash-table-count keywords)
(hash-table-count meanwords))))
(defun hi-stats (token-stream &key (trace-lex nil) (show-classes nil))
(multiple-value-bind
(keyword-length meaning-length keyword-thesaurus meaning-thesaurus)
(lo-stats token-stream trace-lex show-classes)
(let ((total-length (+ keyword-length meaning-length))
(total-thesaurus (+ keyword-thesaurus meaning-thesaurus)))
(list
(cons "Total length" total-length)
(cons "Meaning length" meaning-length)
(cons "Total thesaurus" total-thesaurus)
(cons "Meaning thesaurus" meaning-thesaurus)
(cons "Total saturation (%)" (% meaning-length total-length))
(cons "Thesaurus saturation (%)" (% meaning-thesaurus total-thesaurus))
(cons "Total expressiveness (%)" (% total-thesaurus total-length))
(cons "Meaning expressiveness (%)"
(if (> meaning-length 0) (% meaning-thesaurus meaning-length) 0))))))
(defun print-result (result-table)
(mapcar
#'(lambda (pair) (format t "~A~50T~7D~%" (car pair) (cdr pair)))
result-table))
(defun read-content (file-name)
(with-open-file
(s file-name)
(let
((data (make-string (file-length s))))
(read-sequence data s) data)))
(print-result (hi-stats (make-lisp-classifier (read-content "advworld.lisp"))))