123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227 |
- require 'fileutils'
- require 'rexml/parsers/pullparser'
- module DocBook
- class Epub
- CHECKER = "epubcheck"
- STYLESHEET = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', "docbook.xsl"))
- CALLOUT_PATH = File.join('images', 'callouts')
- CALLOUT_FULL_PATH = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', '..', CALLOUT_PATH))
- CALLOUT_LIMIT = 15
- CALLOUT_EXT = ".png"
- XSLT_PROCESSOR = "xsltproc"
- OUTPUT_DIR = ".epubtmp#{Time.now.to_f.to_s}"
- MIMETYPE = "application/epub+zip"
- META_DIR = "META-INF"
- OEBPS_DIR = "OEBPS"
- ZIPPER = "zip"
- attr_reader :output_dir
- def initialize(docbook_file, output_dir=OUTPUT_DIR, css_file=nil, customization_layer=nil, embedded_fonts=[])
- @docbook_file = docbook_file
- @output_dir = output_dir
- @meta_dir = File.join(@output_dir, META_DIR)
- @oebps_dir = File.join(@output_dir, OEBPS_DIR)
- @css_file = css_file ? File.expand_path(css_file) : css_file
- @embedded_fonts = embedded_fonts
- @to_delete = []
-
- if customization_layer
- @stylesheet = File.expand_path(customization_layer)
- else
- @stylesheet = STYLESHEET
- end
- unless File.exist?(@docbook_file)
- raise ArgumentError.new("File #{@docbook_file} does not exist")
- end
- end
- def render_to_file(output_file, verbose=false)
- render_to_epub(output_file, verbose)
- bundle_epub(output_file, verbose)
- cleanup_files(@to_delete)
- end
- def self.invalid?(file)
- # Obnoxiously, we can't just check for a non-zero output...
- cmd = %Q(#{CHECKER} "#{file}")
- output = `#{cmd} 2>&1`
- if $?.to_i == 0
- return false
- else
- STDERR.puts output if $DEBUG
- return output
- end
- end
- private
- def render_to_epub(output_file, verbose)
- @collapsed_docbook_file = collapse_docbook()
- chunk_quietly = "--stringparam chunk.quietly " + (verbose ? '0' : '1')
- callout_path = "--stringparam callout.graphics.path #{CALLOUT_PATH}/"
- callout_limit = "--stringparam callout.graphics.number.limit #{CALLOUT_LIMIT}"
- callout_ext = "--stringparam callout.graphics.extension #{CALLOUT_EXT}"
- html_stylesheet = "--stringparam html.stylesheet #{File.basename(@css_file)}" if @css_file
- base = "--stringparam base.dir #{OEBPS_DIR}/"
- unless @embedded_fonts.empty?
- embedded_fonts = @embedded_fonts.map {|f| File.basename(f)}.join(',')
- font = "--stringparam epub.embedded.fonts \"#{embedded_fonts}\""
- end
- meta = "--stringparam epub.metainf.dir #{META_DIR}/"
- oebps = "--stringparam epub.oebps.dir #{OEBPS_DIR}/"
- options = [chunk_quietly,
- callout_path,
- callout_limit,
- callout_ext,
- base,
- font,
- meta,
- oebps,
- html_stylesheet,
- ].join(" ")
- # Double-quote stylesheet & file to help Windows cmd.exe
- db2epub_cmd = %Q(cd "#{@output_dir}" && #{XSLT_PROCESSOR} #{options} "#{@stylesheet}" "#{@collapsed_docbook_file}")
- STDERR.puts db2epub_cmd if $DEBUG
- success = system(db2epub_cmd)
- raise "Could not render as .epub to #{output_file} (#{db2epub_cmd})" unless success
- @to_delete << Dir["#{@meta_dir}/*"]
- @to_delete << Dir["#{@oebps_dir}/*"]
- end
- def bundle_epub(output_file, verbose)
- quiet = verbose ? "" : "-q"
- mimetype_filename = write_mimetype()
- meta = File.basename(@meta_dir)
- oebps = File.basename(@oebps_dir)
- images = copy_images()
- csses = copy_csses()
- fonts = copy_fonts()
- callouts = copy_callouts()
- # zip -X -r ../book.epub mimetype META-INF OEBPS
- # Double-quote stylesheet & file to help Windows cmd.exe
- zip_cmd = %Q(cd "#{@output_dir}" && #{ZIPPER} #{quiet} -X -r "#{File.expand_path(output_file)}" "#{mimetype_filename}" "#{meta}" "#{oebps}")
- puts zip_cmd if $DEBUG
- success = system(zip_cmd)
- raise "Could not bundle into .epub file to #{output_file}" unless success
- end
- # Input must be collapsed because REXML couldn't find figures in files that
- # were XIncluded or added by ENTITY
- # http://sourceforge.net/tracker/?func=detail&aid=2750442&group_id=21935&atid=373747
- def collapse_docbook
- # Double-quote stylesheet & file to help Windows cmd.exe
- collapsed_file = File.join(File.expand_path(File.dirname(@docbook_file)),
- '.collapsed.' + File.basename(@docbook_file))
- entity_collapse_command = %Q(xmllint --loaddtd --noent -o "#{collapsed_file}" "#{@docbook_file}")
- entity_success = system(entity_collapse_command)
- raise "Could not collapse named entites in #{@docbook_file}" unless entity_success
- xinclude_collapse_command = %Q(xmllint --xinclude -o "#{collapsed_file}" "#{collapsed_file}")
- xinclude_success = system(xinclude_collapse_command)
- raise "Could not collapse XIncludes in #{@docbook_file}" unless xinclude_success
- @to_delete << collapsed_file
- return collapsed_file
- end
- def copy_callouts
- new_callout_images = []
- if has_callouts?
- calloutglob = "#{CALLOUT_FULL_PATH}/*#{CALLOUT_EXT}"
- Dir.glob(calloutglob).each {|img|
- img_new_filename = File.join(@oebps_dir, CALLOUT_PATH, File.basename(img))
- # TODO: What to rescue for these two?
- FileUtils.mkdir_p(File.dirname(img_new_filename))
- FileUtils.cp(img, img_new_filename)
- @to_delete << img_new_filename
- new_callout_images << img
- }
- end
- return new_callout_images
- end
- def copy_fonts
- new_fonts = []
- @embedded_fonts.each {|font_file|
- font_new_filename = File.join(@oebps_dir, File.basename(font_file))
- FileUtils.cp(font_file, font_new_filename)
- new_fonts << font_file
- }
- return new_fonts
- end
- def copy_csses
- if @css_file
- css_new_filename = File.join(@oebps_dir, File.basename(@css_file))
- FileUtils.cp(@css_file, css_new_filename)
- end
- end
- def copy_images
- image_references = get_image_refs()
- new_images = []
- image_references.each {|img|
- # TODO: It'd be cooler if we had a filetype lookup rather than just
- # extension
- if img =~ /\.(svg|png|gif|jpe?g|xml)/i
- img_new_filename = File.join(@oebps_dir, img)
- img_full = File.join(File.expand_path(File.dirname(@docbook_file)), img)
- # TODO: What to rescue for these two?
- FileUtils.mkdir_p(File.dirname(img_new_filename))
- puts(img_full + ": " + img_new_filename) if $DEBUG
- FileUtils.cp(img_full, img_new_filename)
- @to_delete << img_new_filename
- new_images << img_full
- end
- }
- return new_images
- end
- def write_mimetype
- mimetype_filename = File.join(@output_dir, "mimetype")
- File.open(mimetype_filename, "w") {|f| f.print MIMETYPE}
- @to_delete << mimetype_filename
- return File.basename(mimetype_filename)
- end
- def cleanup_files(file_list)
- file_list.flatten.each {|f|
- # Yikes
- FileUtils.rm_r(f, :force => true )
- }
- end
- # Returns an Array of all of the (image) @filerefs in a document
- def get_image_refs
- parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file))
- image_refs = []
- while parser.has_next?
- el = parser.pull
- if el.start_element? and (el[0] == "imagedata" or el[0] == "graphic")
- image_refs << el[1]['fileref']
- end
- end
- return image_refs.uniq
- end
- # Returns true if the document has code callouts
- def has_callouts?
- parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file))
- while parser.has_next?
- el = parser.pull
- if el.start_element? and (el[0] == "calloutlist" or el[0] == "co")
- return true
- end
- end
- return false
- end
- end
- end
|