Here's a simple example of an XML parser written in AFL, and its use:
# xmlparsers.afl include "patterns.afl"; # This sample illustrates the use of different kinds of XML parsers: # "push", "pull" and "DOM(-like)", and how a single parser implemenation # can be used in all three modes given appropriate language facilities. # ---------------------------------------------------------------------- # This is a very primative XML parser. It's written as a "push" parser. # That is, it invokes a client-provided method (passed as "yield") for # each XML event encountered. Note that events can be yielded when # deeply nested within the XML parser, making it difficult to use it # as a "pull" parser if written in most conventional programming languages. # (It would have been easy to put all the "yield" calls at the parser's # top level in such a small and simple parser, but I've nested the calls # to make its implementation reflect the implementation of real-world # push parsers. The point being made here is that how the parser is # implemented doesn't impact how it can be used.) # Note that pattern matching is used extensively to simplify the task # of parsing. Although the XML parser is provided only with string # input in the sample, it equally well accepts a file or other streaming # input. def xmlLetter: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; def xmlDigit: "0123456789"; def xmlName: anyOf xmlLetter &: anyOf (xmlLetter ++ "-._" ++ xmlDigit):*; def xmlWs: anyOf " \{tab}\{cr}\{lf}":+; def xmlData : anyOf (xmlLetter ++ xmlDigit ++ " ~`!@#$%^*()_-+={}|[]\\:;',>.?/"):+; def xmlParser (sourceText) (yield): { def parseContent (elementName): loop (exit): if ~: ="<" &: xmlName :> "gi" then { def args: hashtable (); while ~: xmlWs &: xmlName :> "aname" &: xmlWs:? &: ="=" &: xmlWs:? &: anyOf "\"'" :> "quote" &: (noneOf "\"'":+ |: !: another "quote" &: anyOf "\"'"):* :> "avalue" &: another "quote" do args [matched ["aname"]] = matched ["avalue"]; ~: xmlWs:?; def tag : '((if ~: ="/" then "emptyTag" else "startTag"), matched ["gi"], args); if ! (~: =">") then yield ('("error", "missing > on <\{matched ["gi"]} ...")); yield (tag); if tag.1 =="startTag" then { parseContent (tag.2); yield ('("endTag", tag.2)) }; } else if ~: ="</" &: xmlName :> "gi" &: xmlWs:? &: =">" then if matched ["gi"] == elementName then exit '{} else yield ('("error", "out-of-place end tag: </\{matched ["gi"]}>")) else if ~: ="&" &: xmlName :> "entityName" &: =";" then yield ('("entity", matched ["entityName"])) else if ~: ="&#" &: xmlDigit :> "number" &: =";" then yield ('("numericCharacter", toNumber matched ["number"])) else if ~: ="<!--" &: (noneOf "-":+ |: ="-" &: !: ="->"):* :> "comment" &: ="-->" then yield ('("comment", matched ["comment"])) else if ~: ="<?" &: (noneOf "?":+ |: ="?" &: !: =">"):* :> "pi" &: "-->" then yield ('("processingInstruction", matched ["pi"])) else if ~: anyOf "<&" then yield ('("error", "found unmached \"\{matched []}\"")) else if ~: noneOf "<&":+ then yield ('("data", matched [])) else ( if elementName != nil then yield ('("error", "missing </\{elementName}>")); exit '{} ); withSource sourceText do parseContent (nil); yield ('("done")) }; # A simple displayer, just so that it's easy to see what's produced # by the XML parser. def displayNodeSubset (nodeSet, depth): for each nodeSet do (node): { printinline " " ** depth ++ node.1; if node hasField "2" then ( printinline ":"; withSource node.2 do while ~: ?: any do printinline if ~: xmlData then " \"\{matched []}\"" else " #\{ord (+: any)}"; ); print ""; if node hasField "3" then for each node.3 do (k, v): print " " ** depth + 2 ++ "\"\{k}\"=\"\{v}\""; if node hasField "4" then displayNodeSubset (node.4, depth + 1); }; def displayEvent [180 event]: displayNodeSubset ('[event], 0); def displayNodeSet (nodeSet): displayNodeSubset (nodeSet, 0); def case [180 n]: print " "Case #\{n}:" # ---------------------------------------------------------------------- # Now for using the parser. # The issues of how processing is associated with individual XML events, # method selection, rule selection, simple testing etc., are not dealt # with in these implementations. Note that any such technique can be # applied in all of the cases. def xmlDocument: <doc><title>title text</title> <para>para text</para> <para>more para text</para> </doc>; case 1; # As a "push" parser (a.k.a. "SAX-like"). The XML parser is # passed "processEvent" to be called when each XML event is encountered: { def processEvent (event): displayEvent event; xmlParser (xmlDocument) (processEvent); }; case 2; # As a "generator" of XML events: for xmlParser (xmlDocument) do (event): displayEvent event; case 3; # As a "pull" parser. Each invocation of "pullParser" returns # the next event. In this example, the end of parsing is indicated # by the parser returning a "done" event. { def pullParser : ungenerate (xmlParser (xmlDocument), nil); loop (exit): { def event : pullParser (nil).1; displayEvent event; if event.1 == "done" then exit '{}; }; }; case 4; # As a "pull" parser again, with the parser exiting to "exit" once # it's finished parsing (i.e. after the "done" event is returned). catch exit: { def pullParser : ungenerate (xmlParser (xmlDocument), exit); while true do displayEvent pullParser (nil).1; }; case 5; # As a "pull" parser yet again, with the parser signalling an # exception when it's finished. try { def handler : exceptionHandler; def pullParser : ungenerate (xmlParser (xmlDocument), '{}: handler '{"EndDocument"}); while true do displayEvent pullParser (nil).1 } except (e): print ">> exception: " ++ e; case 6; # As a "DOM-like" parser, returning a tree-like data structure # representing the whole parsed document. "captureXMLDocument" uses # the "push" parser and builds a tree of what it pushes: { def captureXMLDocument (sourceText): { def startTagStack : '['("startTag", "dummy", arraylist (0), arraylist (0))]; def captureNodes (event): if event.1 == "startTag" then startTagStack [] = '("startTag", event.2, event.3, arraylist (0)) else if event.1 == "endTag" then { def e : *startTagStack []; startTagStack [].remove (); (*startTagStack []).4 [] = e; } else (*startTagStack []).4 [] = event; xmlParser (sourceText) (captureNodes); (*startTagStack []).4 }; displayNodeSet (captureXMLDocument (xmlDocument)); }; case 7; # As a "DOM-like" parser again, but this time "captureXMLDocument" uses # the XML parser as a "pull" parser. Implementation is a bit cleaner # than when using the "push" parser. { def captureXMLDocument (sourceText): { def pullParser : ungenerate (xmlParser (sourceText), nil); def captureNodeSet (): { def nodeSet : arraylist (0); loop (exit): { def event : pullParser (nil).1; if event.1 == "endTag" || event.1 == "done" then exit '{}; nodeSet [] = if event.1 == "startTag" then '("startTag", event.2, event.3, captureNodeSet ()) else event; }; nodeSet }; captureNodeSet (); }; displayNodeSet (captureXMLDocument (xmlDocument)); };
17 November 2005