|
4 | 4 |
|
5 | 5 | import html |
6 | 6 | import os |
| 7 | +import re |
| 8 | +from html.entities import codepoint2name |
7 | 9 | from os import path |
8 | 10 | from pathlib import Path |
9 | 11 | from typing import TYPE_CHECKING, Any |
|
21 | 23 | from sphinx.util.template import SphinxRenderer |
22 | 24 |
|
23 | 25 | if TYPE_CHECKING: |
24 | | - from docutils.nodes import Element, Node, document |
| 26 | + from docutils.nodes import Element, Node |
25 | 27 | from sphinx.application import Sphinx |
26 | 28 | from sphinx.config import Config |
27 | 29 |
|
@@ -91,7 +93,7 @@ def chm_htmlescape(s: str, quote: bool = True) -> str: |
91 | 93 |
|
92 | 94 |
|
93 | 95 | class ToCTreeVisitor(nodes.NodeVisitor): |
94 | | - def __init__(self, document: document) -> None: |
| 96 | + def __init__(self, document: nodes.document) -> None: |
95 | 97 | super().__init__(document) |
96 | 98 | self.body: list[str] = [] |
97 | 99 | self.depth = 0 |
@@ -181,13 +183,25 @@ def update_page_context( |
181 | 183 | ) -> None: |
182 | 184 | ctx['encoding'] = self.encoding |
183 | 185 |
|
| 186 | + # escape the `body` part to 7-bit ASCII |
| 187 | + body = ctx.get("body") |
| 188 | + if body is not None: |
| 189 | + ctx["body"] = re.sub(r"[^\x00-\x7F]", self._escape, body) |
| 190 | + |
| 191 | + @staticmethod |
| 192 | + def _escape(match: re.Match[str]) -> str: |
| 193 | + codepoint = ord(match.group(0)) |
| 194 | + if codepoint in codepoint2name: |
| 195 | + return f"&{codepoint2name[codepoint]};" |
| 196 | + return f"&#{codepoint};" |
| 197 | + |
184 | 198 | def handle_finish(self) -> None: |
185 | 199 | self.copy_stopword_list() |
186 | 200 | self.build_project_file() |
187 | 201 | self.build_toc_file() |
188 | 202 | self.build_hhx(self.outdir, self.config.htmlhelp_basename) |
189 | 203 |
|
190 | | - def write_doc(self, docname: str, doctree: document) -> None: |
| 204 | + def write_doc(self, docname: str, doctree: nodes.document) -> None: |
191 | 205 | for node in doctree.findall(nodes.reference): |
192 | 206 | # add ``target=_blank`` attributes to external links |
193 | 207 | if node.get('internal') is None and 'refuri' in node: |
@@ -265,7 +279,7 @@ def build_toc_file(self) -> None: |
265 | 279 | def build_hhx(self, outdir: str | os.PathLike[str], outname: str) -> None: |
266 | 280 | logger.info(__('writing index file...')) |
267 | 281 | index = IndexEntries(self.env).create_index(self) |
268 | | - filename = path.join(outdir, outname + '.hhk') |
| 282 | + filename = Path(outdir, outname + '.hhk') |
269 | 283 | with open(filename, 'w', encoding=self.encoding, errors='xmlcharrefreplace') as f: |
270 | 284 | f.write('<UL>\n') |
271 | 285 |
|
@@ -299,6 +313,9 @@ def write_param(name: str, value: str) -> None: |
299 | 313 | for title, (refs, subitems, _category_key) in group: |
300 | 314 | write_index(title, refs, subitems) |
301 | 315 | f.write('</UL>\n') |
| 316 | + # Fixup keywords (HTML escapes in keywords file) |
| 317 | + content = filename.read_bytes().replace(b''', b''') |
| 318 | + filename.write_bytes(content) |
302 | 319 |
|
303 | 320 |
|
304 | 321 | def default_htmlhelp_basename(config: Config) -> str: |
|
0 commit comments