Парсер XML

Парсер XML

6.7.2016 18:38

Это парсер XML-документов, написанный на Libretto и использующий библиотеки com/teacode/xrunner (навигация и построение xml-документов) и com/teacode/sinput (парсинг текстов).

use com/teacode/sinput as i
use com/teacode/xrunner as x

use libretto/text
use libretto/util

def i/Input err(msg:String!) = 
  error(<<%{this.top}/%{this.left}:[%{this.ch}] %{msg}>>!)

def i/Input element(parent:x/Elem!):x/Elem! = {
  fix elem = x/elem("")
  fix oname = this.opentag(elem)

  elem.name( oname )
  
  fix cname = this.contents(elem)
  if (oname != cname) 
    this.err('namesnotcoincide + "-" + oname + "/" + cname?(""))

  parent.add(elem)
}

def i/Input opentag(parent:x/Elem!):String! = {
  this.read("<")
  this.skipspaceln
  fix name = if (this.readid("-")) as n {n} else this.err('noname)
  if (not name) this.err('opentagname)
  this.skipspaceln

  util/while (this.letter): this.attribute(parent)

  if (not this.read(">")) this.err('opentagrangle)
  
  name
}

def i/Input contents(parent:x/Elem!): String! = {
  var buffer: String*
  util/while(not this.ch in ("<", i/eof)): {
    buffer += this.ch
    this.next
  }
  if (this.ch == i/eof) this.err('unexpectedend)

  parent.add(buffer.*text/join)

  if (this.closetag) as cname {cname}
  else {
    this.element(parent)
    this.contents(parent)
  }
}

def i/Input closetag:String? = {
  if (not this.readstring("</")) return ()
  this.skipspaceln
  fix name = this.readid("-")
  if (not name) this.err('closetagname)
  this.skipspaceln
  
  if (not this.read(">")) this.err('closetagrangle)
  
  name
}

def i/Input attribute(parent:x/Elem!) = {
  fix key = if (this.readid) as k {k} else this.err('nokey)
  this.skipspaceln
  if (not this.read("=")) this.err('attribute)
  this.skipspaceln
  if (not this.read(<<">>)) this.err('attrvalue)
  fix value = this.string
  parent.addAttr(key, value)
}

def i/Input string = {
  var buffer:String*
  util/while (not this.ch in (<<">>, i/eol, i/eof)): {
    buffer += this.ch
    this.next
  }
  if (this.ch != <<">>) this.err('string)
  this.next.skipspaceln
  buffer.*text/join
}


// ТОЧКА ВХОДА В ПРОГРАММУ
def main = {
  fix inp = i/Input(xml).skipspaceln
  fix root = x/elem('root)
  inp.element(root).string
}



def xml =
<<
<CATALOG>
  <PLANT>
    <COMMON lang="en">Bloodroot</COMMON>
    <COMMON lang="ru" ha="115">Лапчатка</COMMON>
    <BOTANICAL>Sanguinaria canadensis</BOTANICAL>
    <ZONE>4</ZONE>
    <LIGHT>Mostly Shady</LIGHT>
    <PRICE>2.44</PRICE>
    <AVAILABILITY>031599</AVAILABILITY>
  </PLANT>
  <PLANT>
    <COMMON>Columbine</COMMON>
    <BOTANICAL>Aquilegia canadensis</BOTANICAL>
    <ZONE>3</ZONE>
    <LIGHT>Mostly Shady</LIGHT>
    <PRICE>9.37</PRICE>
    <AVAILABILITY>030699</AVAILABILITY>
  </PLANT>
  <PLANT>
    <COMMON lang="en">Marsh Marigold</COMMON>
    <COMMON lang="ru">Калужница болотная</COMMON>
    <BOTANICAL>Caltha palustris</BOTANICAL>
    <ZONE>4</ZONE>
    <LIGHT>Mostly Sunny</LIGHT>
    <PRICE>6.81</PRICE>
    <AVAILABILITY>051799</AVAILABILITY>
  </PLANT>
  <PLANT>
    <COMMON>Cowslip</COMMON>
    <BOTANICAL>Caltha palustris</BOTANICAL>
    <ZONE>4</ZONE>
    <LIGHT>Mostly Shady</LIGHT>
    <PRICE>9.90</PRICE>
    <AVAILABILITY>030699</AVAILABILITY>
  </PLANT>
</CATALOG>
>>

def xml1 =
<<
<CATALOG>
  <PLANT>
    aaa
    <COMMON lang="en">Bloodroot</COMMON>
    <BOTANICAL>Sanguinaria canadensis</BOTANICAL>
    bbb
    <ZONE>4</ZONE>
    ccc
  </PLANT>
</CATALOG>
>>