docbook.rb 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. require 'fileutils'
  2. require 'rexml/parsers/pullparser'
  3. module DocBook
  4. class Epub
  5. CHECKER = "epubcheck"
  6. STYLESHEET = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', "docbook.xsl"))
  7. CALLOUT_PATH = File.join('images', 'callouts')
  8. CALLOUT_FULL_PATH = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', '..', CALLOUT_PATH))
  9. CALLOUT_LIMIT = 15
  10. CALLOUT_EXT = ".png"
  11. XSLT_PROCESSOR = "xsltproc"
  12. OUTPUT_DIR = ".epubtmp#{Time.now.to_f.to_s}"
  13. MIMETYPE = "application/epub+zip"
  14. META_DIR = "META-INF"
  15. OEBPS_DIR = "OEBPS"
  16. ZIPPER = "zip"
  17. attr_reader :output_dir
  18. def initialize(docbook_file, output_dir=OUTPUT_DIR, css_file=nil, customization_layer=nil, embedded_fonts=[])
  19. @docbook_file = docbook_file
  20. @output_dir = output_dir
  21. @meta_dir = File.join(@output_dir, META_DIR)
  22. @oebps_dir = File.join(@output_dir, OEBPS_DIR)
  23. @css_file = css_file ? File.expand_path(css_file) : css_file
  24. @embedded_fonts = embedded_fonts
  25. @to_delete = []
  26. if customization_layer
  27. @stylesheet = File.expand_path(customization_layer)
  28. else
  29. @stylesheet = STYLESHEET
  30. end
  31. unless File.exist?(@docbook_file)
  32. raise ArgumentError.new("File #{@docbook_file} does not exist")
  33. end
  34. end
  35. def render_to_file(output_file, verbose=false)
  36. render_to_epub(output_file, verbose)
  37. bundle_epub(output_file, verbose)
  38. cleanup_files(@to_delete)
  39. end
  40. def self.invalid?(file)
  41. # Obnoxiously, we can't just check for a non-zero output...
  42. cmd = %Q(#{CHECKER} "#{file}")
  43. output = `#{cmd} 2>&1`
  44. if $?.to_i == 0
  45. return false
  46. else
  47. STDERR.puts output if $DEBUG
  48. return output
  49. end
  50. end
  51. private
  52. def render_to_epub(output_file, verbose)
  53. @collapsed_docbook_file = collapse_docbook()
  54. chunk_quietly = "--stringparam chunk.quietly " + (verbose ? '0' : '1')
  55. callout_path = "--stringparam callout.graphics.path #{CALLOUT_PATH}/"
  56. callout_limit = "--stringparam callout.graphics.number.limit #{CALLOUT_LIMIT}"
  57. callout_ext = "--stringparam callout.graphics.extension #{CALLOUT_EXT}"
  58. html_stylesheet = "--stringparam html.stylesheet #{File.basename(@css_file)}" if @css_file
  59. base = "--stringparam base.dir #{OEBPS_DIR}/"
  60. unless @embedded_fonts.empty?
  61. embedded_fonts = @embedded_fonts.map {|f| File.basename(f)}.join(',')
  62. font = "--stringparam epub.embedded.fonts \"#{embedded_fonts}\""
  63. end
  64. meta = "--stringparam epub.metainf.dir #{META_DIR}/"
  65. oebps = "--stringparam epub.oebps.dir #{OEBPS_DIR}/"
  66. options = [chunk_quietly,
  67. callout_path,
  68. callout_limit,
  69. callout_ext,
  70. base,
  71. font,
  72. meta,
  73. oebps,
  74. html_stylesheet,
  75. ].join(" ")
  76. # Double-quote stylesheet & file to help Windows cmd.exe
  77. db2epub_cmd = %Q(cd "#{@output_dir}" && #{XSLT_PROCESSOR} #{options} "#{@stylesheet}" "#{@collapsed_docbook_file}")
  78. STDERR.puts db2epub_cmd if $DEBUG
  79. success = system(db2epub_cmd)
  80. raise "Could not render as .epub to #{output_file} (#{db2epub_cmd})" unless success
  81. @to_delete << Dir["#{@meta_dir}/*"]
  82. @to_delete << Dir["#{@oebps_dir}/*"]
  83. end
  84. def bundle_epub(output_file, verbose)
  85. quiet = verbose ? "" : "-q"
  86. mimetype_filename = write_mimetype()
  87. meta = File.basename(@meta_dir)
  88. oebps = File.basename(@oebps_dir)
  89. images = copy_images()
  90. csses = copy_csses()
  91. fonts = copy_fonts()
  92. callouts = copy_callouts()
  93. # zip -X -r ../book.epub mimetype META-INF OEBPS
  94. # Double-quote stylesheet & file to help Windows cmd.exe
  95. zip_cmd = %Q(cd "#{@output_dir}" && #{ZIPPER} #{quiet} -X -r "#{File.expand_path(output_file)}" "#{mimetype_filename}" "#{meta}" "#{oebps}")
  96. puts zip_cmd if $DEBUG
  97. success = system(zip_cmd)
  98. raise "Could not bundle into .epub file to #{output_file}" unless success
  99. end
  100. # Input must be collapsed because REXML couldn't find figures in files that
  101. # were XIncluded or added by ENTITY
  102. # http://sourceforge.net/tracker/?func=detail&aid=2750442&group_id=21935&atid=373747
  103. def collapse_docbook
  104. # Double-quote stylesheet & file to help Windows cmd.exe
  105. collapsed_file = File.join(File.expand_path(File.dirname(@docbook_file)),
  106. '.collapsed.' + File.basename(@docbook_file))
  107. entity_collapse_command = %Q(xmllint --loaddtd --noent -o "#{collapsed_file}" "#{@docbook_file}")
  108. entity_success = system(entity_collapse_command)
  109. raise "Could not collapse named entites in #{@docbook_file}" unless entity_success
  110. xinclude_collapse_command = %Q(xmllint --xinclude -o "#{collapsed_file}" "#{collapsed_file}")
  111. xinclude_success = system(xinclude_collapse_command)
  112. raise "Could not collapse XIncludes in #{@docbook_file}" unless xinclude_success
  113. @to_delete << collapsed_file
  114. return collapsed_file
  115. end
  116. def copy_callouts
  117. new_callout_images = []
  118. if has_callouts?
  119. calloutglob = "#{CALLOUT_FULL_PATH}/*#{CALLOUT_EXT}"
  120. Dir.glob(calloutglob).each {|img|
  121. img_new_filename = File.join(@oebps_dir, CALLOUT_PATH, File.basename(img))
  122. # TODO: What to rescue for these two?
  123. FileUtils.mkdir_p(File.dirname(img_new_filename))
  124. FileUtils.cp(img, img_new_filename)
  125. @to_delete << img_new_filename
  126. new_callout_images << img
  127. }
  128. end
  129. return new_callout_images
  130. end
  131. def copy_fonts
  132. new_fonts = []
  133. @embedded_fonts.each {|font_file|
  134. font_new_filename = File.join(@oebps_dir, File.basename(font_file))
  135. FileUtils.cp(font_file, font_new_filename)
  136. new_fonts << font_file
  137. }
  138. return new_fonts
  139. end
  140. def copy_csses
  141. if @css_file
  142. css_new_filename = File.join(@oebps_dir, File.basename(@css_file))
  143. FileUtils.cp(@css_file, css_new_filename)
  144. end
  145. end
  146. def copy_images
  147. image_references = get_image_refs()
  148. new_images = []
  149. image_references.each {|img|
  150. # TODO: It'd be cooler if we had a filetype lookup rather than just
  151. # extension
  152. if img =~ /\.(svg|png|gif|jpe?g|xml)/i
  153. img_new_filename = File.join(@oebps_dir, img)
  154. img_full = File.join(File.expand_path(File.dirname(@docbook_file)), img)
  155. # TODO: What to rescue for these two?
  156. FileUtils.mkdir_p(File.dirname(img_new_filename))
  157. puts(img_full + ": " + img_new_filename) if $DEBUG
  158. FileUtils.cp(img_full, img_new_filename)
  159. @to_delete << img_new_filename
  160. new_images << img_full
  161. end
  162. }
  163. return new_images
  164. end
  165. def write_mimetype
  166. mimetype_filename = File.join(@output_dir, "mimetype")
  167. File.open(mimetype_filename, "w") {|f| f.print MIMETYPE}
  168. @to_delete << mimetype_filename
  169. return File.basename(mimetype_filename)
  170. end
  171. def cleanup_files(file_list)
  172. file_list.flatten.each {|f|
  173. # Yikes
  174. FileUtils.rm_r(f, :force => true )
  175. }
  176. end
  177. # Returns an Array of all of the (image) @filerefs in a document
  178. def get_image_refs
  179. parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file))
  180. image_refs = []
  181. while parser.has_next?
  182. el = parser.pull
  183. if el.start_element? and (el[0] == "imagedata" or el[0] == "graphic")
  184. image_refs << el[1]['fileref']
  185. end
  186. end
  187. return image_refs.uniq
  188. end
  189. # Returns true if the document has code callouts
  190. def has_callouts?
  191. parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file))
  192. while parser.has_next?
  193. el = parser.pull
  194. if el.start_element? and (el[0] == "calloutlist" or el[0] == "co")
  195. return true
  196. end
  197. end
  198. return false
  199. end
  200. end
  201. end