Source code for doxysphinx.process

# =====================================================================================
#  C O P Y R I G H T
# -------------------------------------------------------------------------------------
#  Copyright (c) 2023 by Robert Bosch GmbH. All rights reserved.
#
#  Author(s):
#  - Markus Braun, :em engineering methods AG (contracted by Robert Bosch GmbH)
#  - Aniket Salve, Robert Bosch GmbH
# =====================================================================================

"""
The process module contains the :class:`Builder` and :class:`Cleaner` classes.

These represent the main functionality of doxysphinx.
"""
import logging
from pathlib import Path
from typing import Iterable, List, Optional, Tuple, Type

from mpire import WorkerPool

from doxysphinx.html_parser import DoxygenHtmlParser, HtmlParser
from doxysphinx.resources import DoxygenResourceProvider, ResourceProvider
from doxysphinx.sphinx import DirectoryMapper, SphinxHtmlBuilderDirectoryMapper
from doxysphinx.utils.files import hash_blake2b
from doxysphinx.writer import RstWriter, Writer


[docs]class Builder: """ The Builder builds target docs-as-code files out of an existing html documentation. For each an every html file a rst file is created that imports the html content with raw directives. The html resources (stylesheets, images etc.) are also processed and copied to the correct place in the sphinx output directory. When sphinx then (later - not part of doxysphinx) processes the rst files they will resemble the original filenames in the sphinx output directory, thereby keeping and internal links intact. """ _logger = logging.getLogger(__name__) def __init__( self, sphinx_source_dir: Path, sphinx_output_dir: Path, dir_mapper_type: Type[DirectoryMapper] = SphinxHtmlBuilderDirectoryMapper, resource_provider_type: Type[ResourceProvider] = DoxygenResourceProvider, parser_type: Type[HtmlParser] = DoxygenHtmlParser, writer_type: Type[Writer] = RstWriter, force_recreation: bool = False, parallel=True, ): """ Create a Builder that builds rsts for doxygen html files. :param sphinx_source_dir: The sphinx source directory where the rst files are located (most of the time something like "docs") :param sphinx_output_dir: The sphinx output directory where the final documentation is located. :param dir_mapper_type: The type of directory mapper to use. :param resource_provider_type: The resource provider to use. :param parser_type: The html parser to use. :param writer_type: The writer type to use. :param force_recreation: whether to force the recreation of rst files """ self._dir_mapper = dir_mapper_type(sphinx_source_dir, sphinx_output_dir) self._resource_provider = resource_provider_type(self._dir_mapper) # these will be used later lazily self._parser_type = parser_type self._writer_type = writer_type self._force_recreation = force_recreation self._parallel = parallel
[docs] def build(self, doxygen_html_dir: Path): """ Generate a rst file for each doxygen html file. Also copies necessary resources. :param doxygen_html_dir: The html output directory of doxygen where the generated documentation is. """ copied_resources = self._resource_provider.provide_resources(doxygen_html_dir) self._logger.info( f"copied {len(copied_resources)} resource-files " f"to {self._dir_mapper.map(doxygen_html_dir)}" ) created_rsts = self._build(doxygen_html_dir) self._logger.info(f"created {len(created_rsts)} rst-files in {doxygen_html_dir}")
def _build(self, doxygen_html_dir: Path) -> List[Path]: parser = self._parser_type(doxygen_html_dir) writer = self._writer_type(doxygen_html_dir) task_args: Tuple[HtmlParser, Writer] = (parser, writer) files_with_hashes = list(self._get_doxy_htmls_to_process_with_hashes(doxygen_html_dir)) if self._parallel: with WorkerPool() as pool: pool.set_shared_objects(task_args) result = pool.map(self._run, files_with_hashes) return result else: return [self._run((parser, writer), f[0], f[1]) for f in files_with_hashes] def _get_doxy_htmls_to_process_with_hashes(self, doxygen_html_dir: Path) -> Iterable[Tuple[Path, str]]: """Get all doxygen html files to process with their hashes (blake2b). The hashes are used to implement incremental behavior. So only files which aren't the same are processed. """ for html_file in doxygen_html_dir.glob("*.html"): rst_file = html_file.with_suffix(".rst") hash_from_html = hash_blake2b(html_file) if not rst_file.exists(): yield html_file, hash_from_html continue hash_from_rst = self._get_html_hash_from_rst(rst_file) if hash_from_rst == hash_from_html: self._logger.debug(f"skipping {html_file} as the rst was created before.") continue yield html_file, hash_from_html def _get_html_hash_from_rst(self, rst_file: Path) -> Optional[str]: if not rst_file.exists(): return None with rst_file.open(encoding="utf-8") as file: rst_content = [next(file) for x in range(1)] if not rst_content: return None if not rst_content[0].startswith(".. meta::"): return None # take everything from the last ":" onwards and return it (=the hash) hash_from_rst = rst_content[0].split(":")[-1].rstrip() return hash_from_rst def _run(self, task_args: Tuple[HtmlParser, Writer], html_file: Path, html_hash: str) -> Path: parser, writer = task_args # parse the doxygen html file parse_result = parser.parse(html_file) rst_file = html_file.with_suffix(".rst") # write the corresponding rst file result = writer.write(parse_result, rst_file, html_hash) return result
[docs]class Cleaner: """The cleaner cleans files created and copied by the builder.""" _logger = logging.getLogger(__name__) def __init__( self, sphinx_source_dir: Path, sphinx_output_dir: Path, dir_mapper_type: Type[DirectoryMapper] = SphinxHtmlBuilderDirectoryMapper, resource_provider_type: Type[ResourceProvider] = DoxygenResourceProvider, parallel: bool = True, ): """ Create a Cleaner that will cleanup things that the :class:`Builder` created. :param sphinx_source_dir: The sphinx source directory where the rst files are located (most of the time something like "docs") :param sphinx_output_dir: The sphinx output directory where the final documentation is located. :param dir_mapper_type: The type of directory mapper to use. :param resource_provider_type: The resource provider to use. :param parser_type: The html parser to use. :param writer_type: The writer type to use. """ self._dir_mapper = dir_mapper_type(sphinx_source_dir, sphinx_output_dir) self._resource_provider = resource_provider_type(self._dir_mapper) self._parallel = parallel
[docs] def cleanup(self, doxygen_html_dir: Path): """ Clean up files that were generated by the build method. :param doxygen_html_dir: The html output directory of doxygen where the generated documentation is. """ resource_target_dir = self._dir_mapper.map(doxygen_html_dir) deleted_resources = self._resource_provider.cleanup_resources(resource_target_dir) self._logger.info(f"deleted {len(deleted_resources)} resource-files from {resource_target_dir}") deleted_rsts = self._cleanup(doxygen_html_dir) self._logger.info(f"deleted {len(deleted_rsts)} rst-files from {doxygen_html_dir}")
def _cleanup(self, doxygen_html_dir: Path) -> List[Path]: files = list(doxygen_html_dir.glob("*.html")) if self._parallel: with WorkerPool() as pool: pool.set_shared_objects(self._logger) return pool.map(self._delete_corresponding_file, files) else: return [result for file in files if (result := self._delete_corresponding_file(self._logger, file))] @staticmethod def _delete_corresponding_file(logger: logging.Logger, html_file: Path) -> Optional[Path]: target_rst_path = html_file.with_suffix(".rst") if target_rst_path.exists(): target_rst_path.unlink() logger.debug(f"deleted {target_rst_path}") return target_rst_path return None