summaryrefslogtreecommitdiff
path: root/pict
diff options
context:
space:
mode:
Diffstat (limited to 'pict')
-rw-r--r--pict/sxml.scm180
1 files changed, 180 insertions, 0 deletions
diff --git a/pict/sxml.scm b/pict/sxml.scm
new file mode 100644
index 0000000..99628bd
--- /dev/null
+++ b/pict/sxml.scm
@@ -0,0 +1,180 @@
+;;; pict.scm --- A simple picture language for Guile
+
+;;; Copyright © 2019 Ricardo Wurmus <rekado@elephly.net>
+;;;
+;;; This library is free software; you can redistribute it and/or
+;;; modify it under the terms of the GNU Lesser General Public License
+;;; as published by the Free Software Foundation; either version 3 of
+;;; the License, or (at your option) any later version.
+;;;
+;;; This library is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;;; Lesser General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Lesser General Public
+;;; License along with this library. If not, see
+;;; <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+;;;
+;;; This is an adaptation of Guile's xml->sxml from (sxml simple), as
+;;; released in version 2.2.4. The difference is in the handling of
+;;; namespaces, which is important for round trips XML->SXML->XML
+;;; again. For more information see Guile bug 20339
+;;; <https://bugs.gnu.org/20339>
+
+(define-module (pict sxml)
+ #:use-module (sxml simple)
+ #:use-module (sxml ssax input-parse)
+ #:use-module (sxml ssax)
+ #:use-module (sxml transform)
+ #:use-module (ice-9 match)
+ #:use-module (srfi srfi-1)
+ #:use-module (srfi srfi-13)
+ #:export (xml->sxml))
+
+(define* (xml->sxml #:optional (string-or-port (current-input-port)) #:key
+ (namespaces '())
+ (declare-namespaces? #t)
+ (trim-whitespace? #f)
+ (entities '())
+ (default-entity-handler #f)
+ (doctype-handler #f))
+ "Use SSAX to parse an XML document into SXML. Takes one optional
+argument, @var{string-or-port}, which defaults to the current input
+port."
+ ;; NAMESPACES: alist of PREFIX -> URI. Specifies the symbol prefix
+ ;; that the user wants on elements of a given namespace in the
+ ;; resulting SXML, regardless of the abbreviated namespaces defined in
+ ;; the document by xmlns attributes. If DECLARE-NAMESPACES? is true,
+ ;; these namespaces are treated as if they were declared in the DTD.
+
+ ;; ENTITIES: alist of SYMBOL -> STRING.
+
+ ;; NAMESPACES: list of (DOC-PREFIX . (USER-PREFIX . URI)).
+ ;; A DOC-PREFIX of #f indicates that it comes from the user.
+ ;; Otherwise, prefixes are symbols.
+ (define (munge-namespaces namespaces)
+ (map (lambda (el)
+ (match el
+ ((prefix . uri-string)
+ (cons* (and declare-namespaces? prefix)
+ prefix
+ (ssax:uri-string->symbol uri-string)))))
+ namespaces))
+
+ (define (user-namespaces)
+ (munge-namespaces namespaces))
+
+ (define (user-entities)
+ (if (and default-entity-handler
+ (not (assq '*DEFAULT* entities)))
+ (acons '*DEFAULT* default-entity-handler entities)
+ entities))
+
+ (define (name->sxml name namespaces)
+ (match name
+ ((prefix . local-part)
+ (let ((abbrev (and=> (find (match-lambda
+ ((abbrev uri . rest)
+ (and (eq? uri prefix) abbrev)))
+ namespaces)
+ first)))
+ (symbol-append abbrev (string->symbol ":") local-part)))
+ (_ name)))
+
+ (define (doctype-continuation seed)
+ (lambda* (#:key (entities '()) (namespaces '()))
+ (values #f
+ (append entities (user-entities))
+ (append (munge-namespaces namespaces) (user-namespaces))
+ seed)))
+
+ ;; The SEED in this parser is the SXML: initialized to '() at each new
+ ;; level by the fdown handlers; built in reverse by the fhere parsers;
+ ;; and reverse-collected by the fup handlers.
+ (define parser
+ (ssax:make-parser
+ NEW-LEVEL-SEED ; fdown
+ (lambda (elem-gi attributes namespaces expected-content seed)
+ '())
+
+ FINISH-ELEMENT ; fup
+ (lambda (elem-gi attributes namespaces parent-seed seed)
+ (let ((seed (if trim-whitespace?
+ ((@@ (sxml simple) ssax:reverse-collect-str-drop-ws) seed)
+ ((@@ (sxml simple) ssax:reverse-collect-str) seed)))
+ (attrs (append
+ ;; Namespace declarations
+ (filter-map (match-lambda
+ (('*DEFAULT* . _) #f)
+ ((abbrev uri . _)
+ (list (symbol-append 'xmlns: abbrev)
+ (symbol->string uri))))
+ namespaces)
+ (attlist-fold
+ (lambda (attr accum)
+ (cons (list (name->sxml (car attr) namespaces)
+ (cdr attr))
+ accum))
+ '() attributes))))
+ (acons (name->sxml elem-gi namespaces)
+ (if (null? attrs)
+ seed
+ (cons (cons '@ attrs) seed))
+ parent-seed)))
+
+ CHAR-DATA-HANDLER ; fhere
+ (lambda (string1 string2 seed)
+ (if (string-null? string2)
+ (cons string1 seed)
+ (cons* string2 string1 seed)))
+
+ DOCTYPE
+ ;; -> ELEMS ENTITIES NAMESPACES SEED
+ ;;
+ ;; ELEMS is for validation and currently unused.
+ ;;
+ ;; ENTITIES is an alist of parsed entities (symbol -> string).
+ ;;
+ ;; NAMESPACES is as above.
+ ;;
+ ;; SEED builds up the content.
+ (lambda (port docname systemid internal-subset? seed)
+ (call-with-values
+ (lambda ()
+ (cond
+ (doctype-handler
+ (doctype-handler docname systemid
+ (and internal-subset?
+ ((@@ (sxml simple) read-internal-doctype-as-string) port))))
+ (else
+ (when internal-subset?
+ (ssax:skip-internal-dtd port))
+ (values))))
+ (doctype-continuation seed)))
+
+ UNDECL-ROOT
+ ;; This is like the DOCTYPE handler, but for documents that do not
+ ;; have a <!DOCTYPE!> entry.
+ (lambda (elem-gi seed)
+ (call-with-values
+ (lambda ()
+ (if doctype-handler
+ (doctype-handler #f #f #f)
+ (values)))
+ (doctype-continuation seed)))
+
+ PI
+ ((*DEFAULT*
+ . (lambda (port pi-tag seed)
+ (cons
+ (list '*PI* pi-tag (ssax:read-pi-body-as-string port))
+ seed))))))
+
+ (let* ((port (if (string? string-or-port)
+ (open-input-string string-or-port)
+ string-or-port))
+ (elements (reverse (parser port '()))))
+ `(*TOP* ,@elements)))