Source code for doxysphinx.toc

# =====================================================================================
#  C O P Y R I G H T
# -------------------------------------------------------------------------------------
#  Copyright (c) 2023 by Robert Bosch GmbH. All rights reserved.
#  Author(s):
#  - Markus Braun, :em engineering methods AG (contracted by Robert Bosch GmbH)
# =====================================================================================
"""The toc module contains classes related to the toctree generation for doxygen htmls/rsts."""

import re
import unicodedata
from dataclasses import dataclass, field, replace
from pathlib import Path
from typing import Any, Dict, Iterable, Iterator, List, Protocol, Tuple

from doxysphinx.doxygen import read_js_data_file
from doxysphinx.utils.files import write_file
from doxysphinx.utils.iterators import apply

[docs]class TocGenerator(Protocol): """ TocGenerator protocol. Gets the source_dir (with the html sources) during init and each file to possibly generate a toctree directive for in the :meth:`generate_toc_for` method. The implementer has then to choose how to implement the toc generation. """ def __init__(self, source_dir: Path): """ Initialize an instance of a TocGenerator. :param source_dir: The source directory where all html files reside. """
[docs] def generate_toc_for(self, file: Path) -> Iterable[str]: """ Generate a toctree directive for a given file. :param file: the file to generate the toctree directive for :return: a string interable representing the lines forming the toctree directive """ return []
@dataclass class _MenuEntry: title: str docname: str url: str children: List["_MenuEntry"] is_structural_dummy: bool = ( False # indicated whether a menu entry references a children's file as a structural dummy ) is_leaf: bool = field(init=False) def __post_init__(self): if not self.children: self.is_leaf = True else: self.is_leaf = False @staticmethod def from_json_node(json_node: Dict[str, Any]) -> "_MenuEntry": """Create a _MenuEntry from a json node (in doxygen's menudata.js). Note that this method will build up a _MenuEntry-tree automatically/recursively :param json_node: The json node to generate a _MenuEntry from :return: A _MenuEntry representation of the json_node and its' children """ title = json_node["text"] url = json_node["url"] file = _MenuEntry._docname_from_url(url) children = _MenuEntry._get_sphinx_toc_compatible_children(json_node) is_structural_dummy = "is_structural_dummy" in json_node and json_node["is_structural_dummy"] return _MenuEntry(title, file, url, children, is_structural_dummy) @staticmethod def _docname_from_url(url: str) -> str: return url.split("#")[0].replace(".html", "") @staticmethod def _get_sphinx_toc_compatible_children(json_node: Dict[str, Any]) -> List["_MenuEntry"]: """Get a "sphinx compatible" view of the children. We therefore need a special handling for index anchors with doxygen we sometimes have urls in menu entries like: # - title: url - a: globals_enum.html#index_a - c: globals_enum.html#index_c - e: globals_enum.html#index_e - f: globals_enum.html#index_f ... The problem here is that the sphinx toctree simple cannot handle anchors... so we cannot add these links for entries in the parent's toctree. We therefore need to - eliminate all childrens with the same name/file down to one last child - then check if the parent has the same name/file and in that case get rid of the child completely """ # get all children children = [_MenuEntry.from_json_node(c) for c in json_node["children"]] if "children" in json_node else [] if not children: return [] # get unique (considering .file value) children unique_children = [] current_docname = _MenuEntry._docname_from_url(json_node["url"]) unique_files = set() for child in children: if child.docname in unique_files and child.is_leaf: continue if child.docname == current_docname: json_node["is_structural_dummy"] = True unique_children.append(child) unique_files.add(child.docname) # if there is only one child item left and if that's the same as the current item - get rid of it current_docname = _MenuEntry._docname_from_url(json_node["url"]) if len(unique_children) == 1 and unique_children[0].docname == current_docname and unique_children[0].is_leaf: json_node["is_structural_dummy"] = False return [] return unique_children
[docs]class DoxygenTocGenerator: """ A TocGenerator for doxygen. Will read the menudata.js to check whether a toctree directive needs to be generated or not. """ def __init__(self, source_dir: Path): """ Initialize an instance of a TocGenerator. :param source_dir: The source directory where the doxygen html files reside. """ self._source_dir = source_dir self._menu: _MenuEntry = self._load_menu_tree(source_dir / "menudata.js") # self._project_name, self._project_number = self._parse_project_infos() self._doxy_html_template: Tuple[str, str] = self._parse_template() # create rst files for those structural dummies doxygen is using... structural_dummies = [e for e in self._flatten_tree(self._menu) if e.is_structural_dummy] apply(structural_dummies, self._prepare_structural_dummy) apply(structural_dummies, self._create_toc_file_for_structural_dummy) self._menu_lookup: Dict[str, _MenuEntry] = { e.docname: e for e in self._flatten_tree(self._menu) if not e.is_leaf } def _parse_template(self) -> Tuple[str, str]: """Parse a "doxygen html template shell" out of the index.html file. :return: A Tuple containing the doxygen html before the content area and the content after the content area. """ # load html file as string and remove the newline chars blueprint = self._source_dir / "index.html" complete_html = blueprint.read_text() linearized_html = complete_html.replace("\n", "").replace("\r", "") # split the html string on the content element # (so that we can use the 2 parts and inject our content in the middle) split_start_search = r"<!--header--><div class=\"contents\">" split_end_search = r"</div><!-- contents -->" split_regex = re.compile(f"{split_start_search}.*?{split_end_search}") splitted = split_regex.split(linearized_html) if len(splitted) != 2: raise Exception( "couldn't parse html template for toc dummies from index.html. " "Maybe the format of doxygen has changed? Or do you have a custom template? In that case: " "we search for the following regex to find anything except the content: " '<!--header--><div class="contents">.*</div><!-- contents -->' ) prefix, suffix = splitted # replace the original index title with a marker that we can easily replace afterwards replace_regex = re.compile('(?<=<div class="title">).*?(?=</div>)') prefix_replaced = replace_regex.sub("@@@-TITLE-@@@", prefix) return prefix_replaced + split_start_search.replace('\\"', '"'), split_end_search + suffix def _sanitize_filename(self, value: str) -> str: """Sanitize value to make it usable as a filename. - Try to replace unicode characters with ascii fallbacks - drop any remaining non-ascii characters - converts to lower case - replace whitespace and slashes with underscores - keeps only alphanumerics, dash and underscore """ value = unicodedata.normalize("NFKD", value) value = value.encode("ascii", "ignore").decode("ascii") value = re.sub(r"[\s/]", "_", value.lower()) return re.sub(r"[^\w\-_]", "", value) def _prepare_structural_dummy(self, structural_dummy: _MenuEntry): clean_title = self._sanitize_filename(structural_dummy.title) toc_docname = f"{structural_dummy.docname}_{clean_title}" structural_dummy.docname = toc_docname def _create_toc_file_for_structural_dummy(self, structural_dummy: _MenuEntry): prefix, suffix = self._doxy_html_template content = [ f".. title:: {structural_dummy.title}", "", f"{structural_dummy.title}", f"{'-' * len(structural_dummy.title)}", "", ".. container:: doxygen-content", "", " .. raw:: html", "", " " + prefix.replace("@@@-TITLE-@@@", structural_dummy.title), "", " .. toctree::", " :maxdepth: 4", "", *[f" {item.title} <{item.docname}>" for item in structural_dummy.children], "", " .. raw:: html", "", " " + suffix, "", ] file = self._source_dir / f"{structural_dummy.docname}.rst" write_file(file, content) def _load_menu_tree(self, menu_data_js_path: Path) -> _MenuEntry: menu = read_js_data_file(menu_data_js_path) items = menu["children"] children = [_MenuEntry.from_json_node(c) for c in items] root, *_ = children _, *children_without_root = children root_copy = replace(root, children=children_without_root) return root_copy def _flatten_tree(self, *entries: _MenuEntry) -> Iterator[_MenuEntry]: for entry in entries: yield entry if not entry.is_leaf: yield from self._flatten_tree(*entry.children)
[docs] def generate_toc_for(self, file: Path) -> Iterator[str]: """ Generate a toctree directive for a given file. Note that the toctree will only be generated when the file is part of a menu structure. :param file: the file to generate the toctree directive for :return: a string iterator representing the lines forming the toctree directive """ name = file.stem if name in self._menu_lookup: matching_menu_entry = self._menu_lookup[name] children = matching_menu_entry.children if not children: # when the children list is empty no tocs need to be generated. return yield ".. toctree::" yield f" :caption: {matching_menu_entry.title}" yield " :maxdepth: 2" yield " :hidden:" yield "" yield from [f" {item.title} <{item.docname}>" for item in matching_menu_entry.children] yield ""