wordml2normalise.xsl 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. <xsl:stylesheet version="1.0"
  2. xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  3. xmlns:w="http://schemas.microsoft.com/office/word/2003/wordml"
  4. xmlns:v="urn:schemas-microsoft-com:vml"
  5. xmlns:w10="urn:schemas-microsoft-com:office:word"
  6. xmlns:sl="http://schemas.microsoft.com/schemaLibrary/2003/core"
  7. xmlns:aml="http://schemas.microsoft.com/aml/2001/core"
  8. xmlns:wx="http://schemas.microsoft.com/office/word/2003/auxHint"
  9. xmlns:o="urn:schemas-microsoft-com:office:office"
  10. xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882"
  11. xmlns:dbk='http://docbook.org/ns/docbook'
  12. xmlns:rnd='http://docbook.org/ns/docbook/roundtrip'
  13. xmlns:xlink='http://www.w3.org/1999/xlink'
  14. xmlns:exsl='http://exslt.org/common'
  15. exclude-result-prefixes='w v w10 sl aml wx o dt'
  16. extension-element-prefixes='exsl'>
  17. <xsl:import href='normalise-common.xsl'/>
  18. <xsl:output method='xml' indent="yes"/>
  19. <!-- ********************************************************************
  20. $Id: wordml2normalise.xsl 8105 2008-08-15 01:29:11Z balls $
  21. ********************************************************************
  22. This file is part of the XSL DocBook Stylesheet distribution.
  23. See ../README or http://nwalsh.com/docbook/xsl/ for copyright
  24. and other information.
  25. ******************************************************************** -->
  26. <xsl:strip-space elements='*'/>
  27. <xsl:preserve-space elements='w:t'/>
  28. <xsl:key name='style'
  29. match='w:style'
  30. use='@w:styleId'/>
  31. <xsl:template match="w:wordDocument">
  32. <dbk:article>
  33. <xsl:apply-templates select='w:body'/>
  34. </dbk:article>
  35. </xsl:template>
  36. <xsl:template match='wx:borders |
  37. wx:margin-left'/>
  38. <xsl:template match='w:p'>
  39. <xsl:variable name='style'>
  40. <xsl:call-template name='rnd:map-paragraph-style'>
  41. <xsl:with-param name='style' select='w:pPr/w:pStyle/@w:val'/>
  42. </xsl:call-template>
  43. </xsl:variable>
  44. <xsl:choose>
  45. <xsl:when test='aml:annotation[@w:type = "Word.Deletion"] and
  46. not(aml:annotation[@w:type != "Word.Deletion"]) and
  47. count(*) = count(aml:annotation|w:pPr)'/>
  48. <!-- Eliminate paragraphs that have no content.
  49. These are section or page breaks.
  50. -->
  51. <xsl:when test='not(w:r|w:hlink|w:tbl) and
  52. w:pPr/w:sectPr'/>
  53. <xsl:otherwise>
  54. <dbk:para>
  55. <xsl:attribute name='rnd:style'>
  56. <xsl:value-of select='$style'/>
  57. </xsl:attribute>
  58. <xsl:if test='w:pPr/w:pStyle/@w:val and
  59. $style != w:pPr/w:pStyle/@w:val'>
  60. <xsl:attribute name='rnd:original-style'>
  61. <xsl:value-of select='w:pPr/w:pStyle/@w:val'/>
  62. </xsl:attribute>
  63. </xsl:if>
  64. <xsl:if test='w:r[1][w:rPr/w:rStyle/@w:val = "attributes"] and
  65. w:r[2][w:rPr/w:rStyle/@w:val = "CommentReference"]'>
  66. <xsl:apply-templates select='w:r[2]//w:r[w:rPr/w:rStyle/@w:val = "attribute-name"]'
  67. mode='rnd:attributes'/>
  68. </xsl:if>
  69. <xsl:apply-templates/>
  70. </dbk:para>
  71. </xsl:otherwise>
  72. </xsl:choose>
  73. </xsl:template>
  74. <xsl:template match='*' mode='rnd:attributes'>
  75. <xsl:attribute name='{w:t}'>
  76. <xsl:apply-templates select='following-sibling::w:r[w:rPr/w:rStyle/@w:val = "attribute-value"][1]'
  77. mode='rnd:attribute-value'/>
  78. </xsl:attribute>
  79. </xsl:template>
  80. <xsl:template match='w:r'>
  81. <xsl:param name='do-vert-align' select='true()'/>
  82. <xsl:variable name='role'>
  83. <xsl:choose>
  84. <xsl:when test='w:rPr/w:b and
  85. w:rPr/w:i'>
  86. <xsl:text>bold-italic</xsl:text>
  87. </xsl:when>
  88. <xsl:when test='w:rPr/w:b'>
  89. <xsl:text>bold</xsl:text>
  90. </xsl:when>
  91. <xsl:when test='w:rPr/w:i'>
  92. <xsl:text>italic</xsl:text>
  93. </xsl:when>
  94. <xsl:when test='w:rPr/w:u'>
  95. <xsl:text>underline</xsl:text>
  96. </xsl:when>
  97. <!-- TODO: add support for other styles -->
  98. </xsl:choose>
  99. </xsl:variable>
  100. <xsl:variable name='style'>
  101. <xsl:if test='w:rPr/w:rStyle'>
  102. <xsl:value-of select='w:rPr/w:rStyle/@w:val'/>
  103. </xsl:if>
  104. </xsl:variable>
  105. <xsl:choose>
  106. <xsl:when test='w:rPr/w:rStyle/@w:val = "attributes"'/>
  107. <xsl:when test='w:rPr/w:rStyle/@w:val = "CommentReference"'/>
  108. <xsl:when test='w:pict'>
  109. <!-- "filename" is where the image data gets extracted to -->
  110. <xsl:variable name='filename'>
  111. <xsl:call-template name='rnd:image-filename'/>
  112. </xsl:variable>
  113. <!-- "target" is the URL that will be the target of the imagedata hyperlink.
  114. This may or may not be related to the physical filename.
  115. -->
  116. <xsl:variable name='target'>
  117. <xsl:call-template name='rnd:image-target'>
  118. <xsl:with-param name='filename' select='$filename'/>
  119. </xsl:call-template>
  120. </xsl:variable>
  121. <xsl:call-template name='rnd:handle-image-data'>
  122. <xsl:with-param name='filename' select='$filename'/>
  123. <xsl:with-param name='data' select='w:pict/w:binData'/>
  124. </xsl:call-template>
  125. <dbk:inlinemediaobject>
  126. <dbk:imageobject>
  127. <dbk:imagedata fileref='{$target}'>
  128. <xsl:if test='w:pict/v:shape/@style'>
  129. <xsl:attribute name='width'>
  130. <xsl:value-of select='normalize-space(substring-before(substring-after(w:pict/v:shape/@style, "width:"), ";"))'/>
  131. </xsl:attribute>
  132. <xsl:attribute name='depth'>
  133. <xsl:value-of select='normalize-space(substring-after(w:pict/v:shape/@style, "height:"))'/>
  134. </xsl:attribute>
  135. </xsl:if>
  136. </dbk:imagedata>
  137. </dbk:imageobject>
  138. </dbk:inlinemediaobject>
  139. </xsl:when>
  140. <xsl:when test='$do-vert-align and
  141. w:rPr/w:vertAlign/@w:val = "subscript"'>
  142. <dbk:subscript>
  143. <xsl:apply-templates select='.'>
  144. <xsl:with-param name='do-vert-align' select='false()'/>
  145. </xsl:apply-templates>
  146. </dbk:subscript>
  147. </xsl:when>
  148. <xsl:when test='$do-vert-align and
  149. w:rPr/w:vertAlign/@w:val = "superscript"'>
  150. <dbk:superscript>
  151. <xsl:apply-templates select='.'>
  152. <xsl:with-param name='do-vert-align' select='false()'/>
  153. </xsl:apply-templates>
  154. </dbk:superscript>
  155. </xsl:when>
  156. <xsl:when test='w:endnoteRef and
  157. parent::w:p/parent::w:endnote and
  158. count(w:rPr|w:endnoteRef) = count(*)'/>
  159. <xsl:when test='w:footnoteRef'/> <!-- is a label supplied? -->
  160. <xsl:when test='w:footnote|w:endnote'>
  161. <dbk:footnote>
  162. <xsl:apply-templates select='w:footnote|w:endnote'/>
  163. </dbk:footnote>
  164. </xsl:when>
  165. <xsl:when test='$role != "" or $style != ""'>
  166. <dbk:emphasis>
  167. <xsl:if test='$role != ""'>
  168. <xsl:attribute name='role'>
  169. <xsl:value-of select='$role'/>
  170. </xsl:attribute>
  171. </xsl:if>
  172. <xsl:if test='$style != ""'>
  173. <xsl:attribute name='rnd:style'>
  174. <xsl:call-template name='rnd:map-character-style'>
  175. <xsl:with-param name='style' select='$style'/>
  176. </xsl:call-template>
  177. </xsl:attribute>
  178. </xsl:if>
  179. <xsl:apply-templates/>
  180. </dbk:emphasis>
  181. </xsl:when>
  182. <xsl:otherwise>
  183. <xsl:apply-templates/>
  184. </xsl:otherwise>
  185. </xsl:choose>
  186. </xsl:template>
  187. <!-- An application may wish to override these templates -->
  188. <!-- rnd:image-filename determines the filename of the physical file
  189. to which the image data should be written.
  190. -->
  191. <xsl:template name='rnd:image-filename'>
  192. <xsl:param name='pict' select='w:pict'/>
  193. <xsl:choose>
  194. <xsl:when test='contains($pict/w:binData/@w:name, "wordml://")'>
  195. <xsl:value-of select='substring-after($pict/w:binData/@w:name, "wordml://")'/>
  196. </xsl:when>
  197. <xsl:otherwise>
  198. <xsl:text>image</xsl:text>
  199. <xsl:value-of select='count($pict/preceding::w:pict) + 1'/>
  200. <xsl:text>.jpg</xsl:text>
  201. </xsl:otherwise>
  202. </xsl:choose>
  203. </xsl:template>
  204. <!-- rnd:image-target determines the URL for the image data.
  205. This may or may not be related to the physical filename.
  206. -->
  207. <xsl:template name='rnd:image-target'>
  208. <xsl:param name='filename'/>
  209. <xsl:param name='pict' select='w:pict'/>
  210. <xsl:value-of select='$filename'/>
  211. </xsl:template>
  212. <!-- rnd:handle-image-data receives the base64-encoded data and a filename
  213. for the physical file to which the data should be written.
  214. Since XSLT cannot natively handle binary data, this implementation
  215. just writes the undecoded data to the nominated file.
  216. A real application would decode the data into a binary representation.
  217. -->
  218. <xsl:template name='rnd:handle-image-data'>
  219. <xsl:param name='filename'/>
  220. <xsl:param name='data'/>
  221. <xsl:if test='element-available("exsl:document")'>
  222. <exsl:document href='{$filename}.b64' method='text'>
  223. <xsl:value-of select='w:pict/w:binData'/>
  224. </exsl:document>
  225. </xsl:if>
  226. </xsl:template>
  227. <xsl:template match='w:hlink'>
  228. <dbk:link xlink:href='{@w:dest}'>
  229. <xsl:apply-templates/>
  230. </dbk:link>
  231. </xsl:template>
  232. <!-- Soft returns don't really have an equivalent in DocBook,
  233. - except in literal line environments.
  234. -->
  235. <xsl:template match='w:br'>
  236. <xsl:text>&#xa;</xsl:text>
  237. </xsl:template>
  238. <xsl:template match='w:tbl'>
  239. <xsl:variable name='tbl.style'
  240. select='key("style", w:tblPr/w:tblStyle/@w:val) | .'/>
  241. <xsl:variable name='border.top'>
  242. <xsl:choose>
  243. <xsl:when test='$tbl.style/w:tblPr/w:tblBorders/w:top[not(@w:val = "nil" or @w:val = "none")]'>1</xsl:when>
  244. <xsl:when test='$tbl.style/w:tblPr/w:tblBorders/w:top[@w:val = "nil" or @w:val = "none"]'>0</xsl:when>
  245. <xsl:when test='w:tr[1]/w:tc[w:tcPr/w:tcBorders/w:top[not(@w:val = "nil" or @w:val = "none")]]'>1</xsl:when>
  246. <xsl:otherwise>0</xsl:otherwise>
  247. </xsl:choose>
  248. </xsl:variable>
  249. <xsl:variable name='border.bottom'>
  250. <xsl:choose>
  251. <xsl:when test='$tbl.style/w:tblPr/w:tblBorders/w:bottom[not(@w:val = "nil" or @w:val = "none")]'>1</xsl:when>
  252. <xsl:when test='$tbl.style/w:tblPr/w:tblBorders/w:bottom[@w:val = "nil" or @w:val = "none"]'>0</xsl:when>
  253. <xsl:when test='w:tr[1]/w:tc[w:tcPr/w:tcBorders/w:bottom[not(@w:val = "nil" or @w:val = "none")]]'>1</xsl:when>
  254. <xsl:otherwise>0</xsl:otherwise>
  255. </xsl:choose>
  256. </xsl:variable>
  257. <xsl:variable name='border.left'>
  258. <xsl:choose>
  259. <xsl:when test='$tbl.style/w:tblPr/w:tblBorders/w:left[not(@w:val = "nil" or @w:val = "none")]'>1</xsl:when>
  260. <xsl:when test='$tbl.style/w:tblPr/w:tblBorders/w:left[@w:val = "nil" or @w:val = "none"]'>0</xsl:when>
  261. <xsl:when test='w:tr[1]/w:tc[w:tcPr/w:tcBorders/w:left[not(@w:val = "nil" or @w:val = "none")]]'>1</xsl:when>
  262. <xsl:otherwise>0</xsl:otherwise>
  263. </xsl:choose>
  264. </xsl:variable>
  265. <xsl:variable name='border.right'>
  266. <xsl:choose>
  267. <xsl:when test='$tbl.style/w:tblPr/w:tblBorders/w:right[not(@w:val = "nil" or @w:val = "none")]'>1</xsl:when>
  268. <xsl:when test='$tbl.style/w:tblPr/w:tblBorders/w:right[@w:val = "nil" or @w:val = "none"]'>0</xsl:when>
  269. <xsl:when test='w:tr[1]/w:tc[w:tcPr/w:tcBorders/w:rightt[not(@w:val = "nil" or @w:val = "none")]]'>1</xsl:when>
  270. <xsl:otherwise>0</xsl:otherwise>
  271. </xsl:choose>
  272. </xsl:variable>
  273. <dbk:informaltable>
  274. <xsl:choose>
  275. <xsl:when test='$border.top = "1" and $border.bottom = "1" and
  276. $border.left = "1" and $border.right = "1"'>
  277. <xsl:attribute name='frame'>all</xsl:attribute>
  278. </xsl:when>
  279. <xsl:when test='$border.top = "1" and $border.bottom = "1"'>
  280. <xsl:attribute name='frame'>topbot</xsl:attribute>
  281. </xsl:when>
  282. <xsl:when test='$border.left = "1" and $border.right = "1"'>
  283. <xsl:attribute name='frame'>sides</xsl:attribute>
  284. </xsl:when>
  285. <xsl:when test='$border.top = "1"'>
  286. <xsl:attribute name='frame'>top</xsl:attribute>
  287. </xsl:when>
  288. <xsl:when test='$border.bottom = "1"'>
  289. <xsl:attribute name='frame'>bottom</xsl:attribute>
  290. </xsl:when>
  291. </xsl:choose>
  292. <!-- TODO: analyse column widths -->
  293. <dbk:tgroup>
  294. <xsl:apply-templates select='w:tblGrid'/>
  295. <xsl:choose>
  296. <xsl:when test='$tbl.style/w:tblStylePr[@w:type = "firstRow"]/w:trPr/w:tblHeader'>
  297. <dbk:thead>
  298. <xsl:apply-templates select='w:tr[1]'/>
  299. </dbk:thead>
  300. <dbk:tbody>
  301. <xsl:apply-templates select='w:tr[position() != 1]'/>
  302. </dbk:tbody>
  303. </xsl:when>
  304. <xsl:otherwise>
  305. <dbk:tbody>
  306. <xsl:apply-templates select='w:tr'/>
  307. </dbk:tbody>
  308. </xsl:otherwise>
  309. </xsl:choose>
  310. </dbk:tgroup>
  311. </dbk:informaltable>
  312. </xsl:template>
  313. <xsl:template match='w:tblPr'/>
  314. <xsl:template match='w:tblGrid/w:gridCol'>
  315. <dbk:colspec colwidth='{@w:w}*'
  316. colname='column-{count(preceding-sibling::w:gridCol) + 1}'/>
  317. </xsl:template>
  318. <xsl:template match='w:tr'>
  319. <dbk:row>
  320. <xsl:apply-templates/>
  321. </dbk:row>
  322. </xsl:template>
  323. <xsl:template match='w:tc'>
  324. <xsl:variable name='tbl.style'
  325. select='ancestor::w:tbl[1] |
  326. key("style", ancestor::w:tbl[1]/w:tblPr/w:tblStyle/@w:val)'/>
  327. <dbk:entry>
  328. <xsl:if test='$tbl.style/w:tblPr/w:tblBorders/w:insideH[not(@w:val = "nil" or @w:val = "none")] |
  329. w:tcPr/w:tcBorders/w:bottom[not(@w:val = "nil" or @w:val = "none")]'>
  330. <xsl:attribute name='rowsep'>1</xsl:attribute>
  331. </xsl:if>
  332. <xsl:if test='$tbl.style/w:tblPr/w:tblBorders/w:insideV[not(@w:val = "nil" or @w:val = "none")] |
  333. w:tcPr/w:tcBorders/w:right[not(@w:val = "nil" or @w:val = "none")]'>
  334. <xsl:attribute name='colsep'>1</xsl:attribute>
  335. </xsl:if>
  336. <xsl:variable name='this.colnum'
  337. select='count(preceding-sibling::w:tc) + 1 +
  338. sum(preceding-sibling::w:tc/w:tcPr/w:gridSpan/@w:val) -
  339. count(preceding-sibling::w:tc/w:tcPr/w:gridSpan[@w:val])'/>
  340. <xsl:if test='w:tcPr/w:gridSpan[@w:val > 1]'>
  341. <xsl:attribute name='namest'>
  342. <xsl:text>column-</xsl:text>
  343. <xsl:value-of select='$this.colnum'/>
  344. </xsl:attribute>
  345. <xsl:attribute name='nameend'>
  346. <xsl:text>column-</xsl:text>
  347. <xsl:value-of select='$this.colnum + w:tcPr/w:gridSpan/@w:val - 1'/>
  348. </xsl:attribute>
  349. </xsl:if>
  350. <xsl:if test='w:tcPr/w:vmerge[@w:val = "restart"]'>
  351. <xsl:attribute name='morerows'>
  352. <xsl:call-template name='rnd:count-rowspan'>
  353. <xsl:with-param name='row' select='../following-sibling::w:tr[1]'/>
  354. <xsl:with-param name='colnum' select='$this.colnum'/>
  355. </xsl:call-template>
  356. </xsl:attribute>
  357. </xsl:if>
  358. <xsl:apply-templates/>
  359. </dbk:entry>
  360. </xsl:template>
  361. <xsl:template match='w:pStyle |
  362. w:rStyle |
  363. w:proofErr |
  364. w:fldData |
  365. w:instrText'/>
  366. <xsl:template name='rnd:count-rowspan'>
  367. <xsl:param name='row' select='/..'/>
  368. <xsl:param name='colnum' select='0'/>
  369. <xsl:variable name='cell'
  370. select='$row/w:tc[count(preceding-sibling::w:tc) + 1 +
  371. sum(preceding-sibling::w:tc/w:tcPr/w:gridSpan/@w:val) -
  372. count(preceding-sibling::w:tc/w:tcPr/w:gridSpan[@w:val]) = $colnum]'/>
  373. <xsl:choose>
  374. <xsl:when test='not($cell)'>
  375. <xsl:text>0</xsl:text>
  376. </xsl:when>
  377. <xsl:when test='$cell/w:tcPr/w:vmerge[not(@w:val = "restart")]'>
  378. <xsl:variable name='remainder'>
  379. <xsl:call-template name='rnd:count-rowspan'>
  380. <xsl:with-param name='row'
  381. select='$row/following-sibling::w:tr[1]'/>
  382. <xsl:with-param name='colnum' select='$colnum'/>
  383. </xsl:call-template>
  384. </xsl:variable>
  385. <xsl:value-of select='$remainder + 1'/>
  386. </xsl:when>
  387. <xsl:otherwise>0</xsl:otherwise>
  388. </xsl:choose>
  389. </xsl:template>
  390. <xsl:template match='w:hdr|w:ftr'/>
  391. <xsl:template match='aml:annotation'>
  392. <xsl:choose>
  393. <xsl:when test='@w:type = "Word.Deletion"'/>
  394. <xsl:otherwise>
  395. <xsl:apply-templates/>
  396. </xsl:otherwise>
  397. </xsl:choose>
  398. </xsl:template>
  399. </xsl:stylesheet>