#!/usr/bin/ruby BIBPATH="./bib.xml" XMLDOCPATH="./newMethodology.xml" HTMLDOCPATH="./newMethodology.html" HTMLDOCPATH_J="./newMethodology_j.html" HTMLDOCPATH_EJ="./newMethodology_ej.html" require 'xmlparser' require 'kconv' include Kconv require 'uconv' include Uconv require 'obaq/htmlgen' module Obaq module Xml class Node attr_reader :name def initialize(parent, name) @parent = parent @name = Uconv.u8toeuc(name) if name end def to_s @name end def each_child(&block) end end class Text < Node attr_reader :text def initialize(parent,text) super(parent, nil) @text = Uconv.u8toeuc(text) end def name "#TEXT" end def to_s @text end def get_text(trim=true) if trim @text.tr("\t\n", "") else @text end end def dump(indent=0) print @text end end class Parent < Node def initialize(parent, name) super(parent, name) @children = [] @attributes = {} end def add_child(child) @children << child end def register_attribute(name, xml_name=true, &block) if iterator? @attributes[name] = [xml_name, Proc.new] else @attributes[name] = [xml_name, nil] end end def dump(indent=0) print "-" * indent, to_s, "\n" @children.each {|c| c.dump(indent+1) } end def find_child(klass, name=nil, proc_=nil) ret = nil @children.each {|c| found = true found = false unless c.kind_of?(klass) found = false if found and name and name.downcase != c.name.downcase found = false if found and proc_ and (not proc_.call(c)) return c if found next unless c.kind_of?(Parent) ret = c.find_child(klass, name, proc_) break if ret } ret end def each_child(&block) @children.each {|c| yield(c) } end def each(klass=Element, name=nil, &block) @children.each {|c| case c when klass if name if name.downcase == c.name.downcase yield c end else yield c end c.each(klass, name, &block) when Parent c.each(klass, name, &block) else next end } end def get_text(trim=true) ret = "" return data unless name @children.each {|c| ret += c.get_text } ret.tr!("\n\t","") if trim ret end def method_missing(mid, *args) xml_name, proc_ = @attributes[mid] case xml_name when String find_child(Node, xml_name, proc_) when TrueClass find_child(Node, mid.id2name, proc_) else super end end end class Element < Parent def initialize(parent, name, attr) super(parent, name) @attributes = attr end def start_element(stack) end def end_element end def get_attr(nm) @attributes[nm] end def set_attr(nm, val) @attributes[nm] = val end def method_missing(mid, *args) val = @attributes[mid.id2name] if val val else super end end end class XmlParser def initialize(io) @io = io @stack = [] @root = nil @node_generator = {} @default_node_class = Element end def top @stack[-1] end def push(a) @stack.push(a) if a end def pop @stack.pop end def read @contents = @io.gets if @contents =~ /^<\?xml\sversion=.+\sencoding=.EUC-JP./i @contents.sub!(/EUC-JP/i, "UTF-8") encoding = 'EUC-JP' elsif @contents =~ /^<\?xml\sversion=.+\sencoding=.Shift_JIS./i @contents.sub!(/Shift_JIS/i, "UTF-8") encoding = "Shift_JIS" elsif @contents =~ /^<\?xml\sversion=.+\sencoding=.ISO-2022-JP./i @contents.sub!(/ISO-2022-JP/i, "UTF-8") encoding = "ISO-2022-JP" end @contents += @io.read if encoding == "EUC-JP" @contents = euctou8(@contents) elsif encoding == "Shift_JIS" @contents = euctou8(kconv(@contents, EUC, SJIS)) elsif encoding == "ISO-2022-JP" @contents = euctou8(kconv(@contents, EUC, JIS)) end end def parse parser = XMLParser.new def parser.default end begin parser.parse(@contents) do |type, name, data| process_token(type, name, data) end rescue XMLParserError line = parser.line print "Parse error(#{line}): #{$!}\n" end @root end def process_token(type, name, data) case type when XMLParser::START_ELEM push(generate_node(name, data)) top.start_element(@stack) when XMLParser::END_ELEM top.end_element() pop when XMLParser::CDATA generate_node(nil, data) when XMLParser::PI #data.gsub!("\n", "\\n") #print Uconv.u8toeuc("?#{name} #{data}\n") else #data.gsub!("\n", "\\n") #print Uconv.u8toeuc("//#{data}\n") end end def generate_node(name, data) parent = top if name klass = (@node_generator[name.downcase] or @default_node_class) newnode = klass.new(parent, name, data) else newnode = Text.new(parent, data) end parent.add_child(newnode) if parent @root = newnode unless @root newnode end def register_generator(name, generator_class) @node_generator[name.downcase] = generator_class end def dump @root.dump end end end end module FowlerXml include Obaq::Xml Root = [] class Elem < Obaq::Xml::Element def lang if @attributes["lang"] @attributes["lang"] elsif @parent @parent.lang else nil end end end module Label ID = [1] def init_label @id = "A#{ID[0]}" ID[0] = ID[0] + 1 end def linkto(path = "", &block) #Obaq::HtmlGenLong::link(path + "#" + @id, &block) ret = E(:a, A(:href, path + "#" + @id), &block) ret end def html_label(&block) E(:a, A(:name, @id), &block) end end module Section include Label include Obaq::HtmlGen def_tag :ul def_tag :li attr_reader :label, :level def init_sec @sections = [] @parent_section = nil init_label end def add_section(sec) @sections << sec end def set_section_label(label=nil, level=0) @label = label @level = level i = 1 @sections.each { |s| if @label label = @label + "." + i.to_s else label = i.to_s end s.set_section_label(label, level+1) i = i + 1 } end def each_section(lvl=0, &block) @sections.each { |s| yield(lvl, s) s.each_section(lvl+1, &block) } end def html_index(lang) html_index_title(lang) + if @sections.size > 0 ul { @sections.collect { |s| s.html_index(lang) } } end end end class Document < Elem include Section def initialize(parent ,name, attr) super(parent, name, attr) register_attribute(:title) register_attribute(:title_jp, "title") { |e| e.lang == "jp"} register_attribute(:author) register_attribute(:author_jp, "Author") { |e| e.lang == "jp"} register_attribute(:abstract) register_attribute(:abstract_jp, "abstract") { |e| e.lang == "jp"} register_attribute(:translation_note) register_attribute(:original) Root[0] = self init_sec end def doc_title(lang) if lang == "jp" title_jp.get_text else title.get_text end end def html_index_title(lang) Obaq::HtmlGenLong::NullNode.new end end class Author < Elem def initialize(parent ,name, attr) super(parent, name, attr) register_attribute(:a_name, "name") register_attribute(:job) register_attribute(:org) register_attribute(:url) end end class TranslationNote < Elem def initialize(parent ,name, attr) super(parent, name, attr) register_attribute(:translator) register_attribute(:job) register_attribute(:email) register_attribute(:org) register_attribute(:url) register_attribute(:date) register_attribute(:version) register_attribute(:note) end end class HeaderElement < Elem attr_reader :section def start_element(stack) stack.reverse.each { |s| if s.kind_of?(Section) @section = s break end } end end class SectionElement < Elem include Section def initialize(parent ,name, attr) super(parent, name, attr) register_attribute(:h) register_attribute(:h_jp, "h") { |e| e.lang == "jp"} init_sec end def section_title(lang="jp") if lang == "jp" and h_jp h_jp.get_text else h.get_text end end def start_element(stack) stack.reverse.each { |s| if s.kind_of?(Section) and s != self @parent_section = s break end } if @parent_section @parent_section.add_section(self) else Root[0].add_section(self) end end def html_index_title(lang) if lang == "both" li { [ "#{@label} " , linkto {section_title}, " (#{section_title(\"en\")})" ] } else li { linkto {section_title(lang)} } end end end class Ref < Elem include Label def initialize(parent ,name, attr) super(parent, name, attr) init_label register_attribute(:cite) register_attribute(:isbn) register_attribute(:author) register_attribute(:title) register_attribute(:pub) register_attribute(:date) register_attribute(:url){ |e| e.lang != "jp"} register_attribute(:url_j, "url") { |e| e.lang == "jp"} clear_reference end def name get_attr("name") end def get_url if url url.get_text elsif isbn "http://www1.fatbrain.com/asp/bookinfo/bookinfo.asp?theisbn=#{isbn.get_text}" else nil end end def get_url_j if url_j url_j.get_text else nil end end def clear_reference @reference = [] end def add_reference(ref) @reference << ref end def each_ref(&block) @reference.each(&block) end end class Parser < XmlParser def initialize(io) super register_generator("XML", Document) register_generator("author", Author) register_generator("translation_note", TranslationNote) register_generator("section", SectionElement) register_generator("h", HeaderElement) register_generator("ref", Ref) @default_node_class = Elem end end class HtmlGenerator class GeneratorNode attr_reader :xmlnode def initialize(generator, xmlnode, htmltag=nil) @generator = generator @xmlnode = xmlnode @htmltag = (htmltag or xmlnode.name) @children = [] xmlnode.each_child { |xmlchild| @children << generator.make_generator(xmlchild) } end def lang @generator.lang end end class NullGenerator < GeneratorNode def initialize(generator, xmlnode) @generator = generator @xmlnode = xmlnode @children = [] end def to_html nil end end class TextGenerator < GeneratorNode def to_html @xmlnode.get_text end end class ElemGenerator < GeneratorNode def to_html E(@htmltag) { @children.collect { |c| c.to_html } } end end class SectionGenerator < GeneratorNode def to_html [ xmlnode.html_label { " " } , @children.collect { |c| c.to_html } ] end end class HeaderGenerator < ElemGenerator def to_html if xmlnode.lang == lang or (lang=="both" and xmlnode.lang != "jp") h = "h#{xmlnode.section.level+1}" E(h) { [ if lang == "both" xmlnode.section.label end, @children.collect { |c| c.to_html } ] } else nil end end end class ParagraphGenerator < ElemGenerator def to_html if lang == "both" if xmlnode.lang == "jp" super + E(:hr) else super end else if xmlnode.lang == lang super else nil end end end end class CiteGenerator < ElemGenerator include Label def initialize(generator, xmlnode, htmltag=nil) super init_label end def to_html nm = xmlnode.get_attr("name") ref = @generator.bib[nm] raise "unknown ref #{nm}" unless ref ref.add_reference(self) #S { L(ref.get_url) { xmlnode.get_text } } #E(:a, A(:href, ref.get_url), A(:name, @id)){ xmlnode.get_text } html_label { ref.linkto { "*" } + L(ref.get_url) { xmlnode.get_text } } end end class ReferenceGenerator < ElemGenerator def initialize(generator, xmlnode, htmltag=nil) super @books = [] @urls = [] end def collect_urls @generator.bib.each_value { |b| if b.isbn @books << b else @urls << b end } end def to_html collect_urls E(:div, A(:align, "center")) { if xmlnode.reftype == "books" to_html_books else to_html_urls end } end def to_html_books E(:table, A(:border, "1")) { E(:tr) { E(:th) { "title" } + E(:th) { "author" } + E(:th) { "isbn" } + E(:th) { "url" } + E(:th) { "reference" } } + @books.collect { |b| E(:tr) { E(:td) { b.html_label { if b.title b.title.get_text else b.name end } } + E(:td) { text_or_space(b.author) } + E(:td) { text_or_space(b.isbn) } + E(:td) { L(b.get_url) { b.get_url } + if b.get_url_j E(:br) + "(Japanese)" + L(b.get_url_j) { b.get_url_j } end + E(:td) { i = 1 refs = [] b.each_ref { |r| refs << r.linkto { "[#{i}] " } i = i + 1 } refs } } } } } end def to_html_urls E(:table, A(:border, "1")) { E(:tr) { E(:th) { "name" } + E(:th) { "url" } + E(:th) { "reference" } } + @urls.collect { |u| E(:tr) { E(:td) { u.html_label { u.name } } + E(:td) { L(u.get_url) { u.get_url } + if u.get_url_j E(:br) + "(Japanese)" + L(u.get_url_j) { u.get_url_j } end + E(:td) { i = 1 refs = [] u.each_ref { |r| refs << r.linkto { "[#{i}] " } i = i + 1 } refs } } } } } end def text_or_space(e) if e e.get_text or noescape { " " } else noescape { " " } end end end class DocGenerator < GeneratorNode def to_html set_lang E(:html) { [ E(:header) { header }, E(:body) { [ body_header, body_index, body, body_footer, ] } ] } end def set_lang if lang == "jp" @author = xmlnode.author_jp @tr_note = xmlnode.translation_note else @author = xmlnode.author end end def header l = lang l = "jp" if l == "both" E(:title) { xmlnode.doc_title(l) } end def body_header [ E(:hr), E(:br), E(:h1, A(:align, "center")) { xmlnode.doc_title(lang) }, if lang == "jp" E(:div, A(:align, "center")) { E(:br) + "(original: " + L(xmlnode.original.url) { xmlnode.original.get_text } + ")" + E(:br) } end, E(:hr), E(:p, A(:align, "center")) { L(@author.url.get_text) { E(:big) { @author.a_name.get_text } } + E(:br) + @author.job.get_text + " , " + L(@author.org.href) { @author.org.get_text } }, if lang == "jp" E(:p) { E(:i) { xmlnode.abstract_jp.get_text } } elsif lang == "en" E(:p) { E(:i) { xmlnode.abstract.get_text } } else E(:p) { E(:i) { xmlnode.abstract.get_text } } + E(:p) { E(:i) { xmlnode.abstract_jp.get_text } } end ] end def body_index xmlnode.html_index(lang) end def body @children.collect { |c| c.to_html } end def body_footer E(:hr) + E(:small) { noescape { "Copyright " } + L(@author.url.get_text) { @author.a_name.get_text } + noescape { ", all rights reserved"} + if @tr_note E(:br) + "Japanese translation by " + L(@tr_note.email.get_text) { @tr_note.translator.get_text } + " ," + L(@tr_note.org.href) { @tr_note.org.get_text } + "(" + @tr_note.date.get_text + "version:" + @tr_note.version.get_text + ")" + E(:br) + @tr_note.note.get_text end } end end # HtmlGenerator attr_reader :lang, :bib def initialize(xmlroot, bib, lang="jp") @root = make_generator(xmlroot) @lang = lang @bib = bib end def make_generator(xmlnode) case xmlnode when Document DocGenerator.new(self, xmlnode) when Section SectionGenerator.new(self, xmlnode) when Text TextGenerator.new(self, xmlnode) when Elem if ["title", "author", "original", "abstract", "contents", "tbd", "translation_note"].include?(xmlnode.name) NullGenerator.new(self,xmlnode) elsif xmlnode.name =~ /h/i HeaderGenerator.new(self,xmlnode) elsif xmlnode.name =~ /p/i ParagraphGenerator.new(self, xmlnode) elsif xmlnode.name =~ /cite/i CiteGenerator.new(self, xmlnode) elsif xmlnode.name =~ /reference/i ReferenceGenerator.new(self,xmlnode) elsif xmlnode.name =~ /quote/i ElemGenerator.new(self, xmlnode, "i") elsif xmlnode.name =~ /list/i ElemGenerator.new(self, xmlnode, "ul") elsif xmlnode.name =~ /item/i ElemGenerator.new(self, xmlnode, "li") else ElemGenerator.new(self, xmlnode) end else raise "unknown class #{xmlnode.type}" end end def to_html @root.to_html end end end def read_bib include Obaq::HtmlGen include FowlerXml ret = {} STDERR.print "reading #{BIBPATH}\n" File.open(BIBPATH) { |f| parser = Parser.new(f) parser.read doc = parser.parse doc.each(Ref) { |c| ret[c.name] = c } } ret end def main include Obaq::HtmlGen include FowlerXml bib = read_bib STDERR.print "reading #{XMLDOCPATH}\n" File.open(XMLDOCPATH) { |xmlfile| parser = Parser.new(xmlfile) parser.read doc = parser.parse doc.set_section_label STDERR.print "writing #{HTMLDOCPATH_J}\n" File.open(HTMLDOCPATH_J, "w") { |f| f.print PP { HtmlGenerator.new(doc, bib, "jp").to_html } } bib.each_value { |r| r.clear_reference } STDERR.print "writing #{HTMLDOCPATH}\n" File.open(HTMLDOCPATH, "w") { |f| f.print PP { HtmlGenerator.new(doc, bib, "en").to_html } } bib.each_value { |r| r.clear_reference } STDERR.print "writing #{HTMLDOCPATH_EJ}\n" File.open(HTMLDOCPATH_EJ, "w") { |f| f.print PP { HtmlGenerator.new(doc, bib, "both").to_html } } } end def test(doc) print doc.author.job.get_text," " print doc.author_jp.job.get_text,"\n" print doc.author.a_name.get_text," " print doc.author_jp.a_name.get_text,"\n" doc.each_section { |lvl, s| print "---" * lvl, s.section_title("jp"), "\n" } print PP { doc.html_index } end main