From e7a9b447a8e6d6fbd4cd5b81ffa116016be31960 Mon Sep 17 00:00:00 2001 From: iabhi4 Date: Sat, 7 Jun 2025 14:51:25 -0700 Subject: [PATCH] REF: Replace os.path with pathlib.Path in pandas_web.py --- web/pandas_web.py | 70 ++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 40 deletions(-) diff --git a/web/pandas_web.py b/web/pandas_web.py index 34ac3743148ba..eca53178426dc 100755 --- a/web/pandas_web.py +++ b/web/pandas_web.py @@ -100,20 +100,15 @@ def blog_add_posts(context): posts = [] # posts from the file system if context["blog"]["posts_path"]: - posts_path = os.path.join( - context["source_path"], *context["blog"]["posts_path"].split("/") - ) - for fname in os.listdir(posts_path): - if fname.startswith("index."): + posts_path = context["source_path"] / context["blog"]["posts_path"] + for fname in posts_path.iterdir(): + if fname.name.startswith("index."): continue - link = ( - f"/{context['blog']['posts_path']}" - f"/{os.path.splitext(fname)[0]}.html" - ) + link = f"/{context['blog']['posts_path']}/{fname.stem}.html" md = markdown.Markdown( extensions=context["main"]["markdown_extensions"] ) - with open(os.path.join(posts_path, fname), encoding="utf-8") as f: + with fname.open(encoding="utf-8") as f: html = md.convert(f.read()) title = md.Meta["title"][0] summary = re.sub(tag_expr, "", html) @@ -386,15 +381,15 @@ def get_callable(obj_as_str: str) -> object: return obj -def get_context(config_fname: str, **kwargs): +def get_context(config_fname: pathlib.Path, **kwargs): """ Load the config yaml as the base context, and enrich it with the information added by the context preprocessors defined in the file. """ - with open(config_fname, encoding="utf-8") as f: + with config_fname.open(encoding="utf-8") as f: context = yaml.safe_load(f) - context["source_path"] = os.path.dirname(config_fname) + context["source_path"] = config_fname.parent context.update(kwargs) preprocessors = ( @@ -409,14 +404,13 @@ def get_context(config_fname: str, **kwargs): return context -def get_source_files(source_path: str) -> typing.Generator[str, None, None]: +def get_source_files(source_path: pathlib.Path) -> typing.Generator[str, None, None]: """ Generate the list of files present in the source directory. """ - for root, dirs, fnames in os.walk(source_path): - root_rel_path = os.path.relpath(root, source_path) - for fname in fnames: - yield os.path.join(root_rel_path, fname) + for path in source_path.rglob("*"): + if path.is_file(): + yield path.relative_to(source_path) def extend_base_template(content: str, base_template: str) -> str: @@ -432,8 +426,8 @@ def extend_base_template(content: str, base_template: str) -> str: def main( - source_path: str, - target_path: str, + source_path: pathlib.Path, + target_path: pathlib.Path, ) -> int: """ Copy every file in the source directory to the target directory. @@ -441,9 +435,10 @@ def main( For ``.md`` and ``.html`` files, render them with the context before copying them. ``.md`` files are transformed to HTML. """ + # Sanity check: validate that versions.json is valid JSON - versions_path = os.path.join(source_path, "versions.json") - with open(versions_path, encoding="utf-8") as f: + versions_path = source_path / "versions.json" + with versions_path.open(encoding="utf-8") as f: try: json.load(f) except json.JSONDecodeError as e: @@ -451,7 +446,7 @@ def main( f"Invalid versions.json: {e}. Ensure it is valid JSON." ) from e - config_fname = os.path.join(source_path, "config.yml") + config_fname = source_path / "config.yml" shutil.rmtree(target_path, ignore_errors=True) os.makedirs(target_path, exist_ok=True) @@ -460,23 +455,22 @@ def main( context = get_context(config_fname, target_path=target_path) sys.stderr.write("Context generated\n") - templates_path = os.path.join(source_path, context["main"]["templates_path"]) + templates_path = source_path / context["main"]["templates_path"] jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_path)) for fname in get_source_files(source_path): - if os.path.normpath(fname) in context["main"]["ignore"]: + if fname.as_posix() in context["main"]["ignore"]: continue - sys.stderr.write(f"Processing {fname}\n") - dirname = os.path.dirname(fname) - os.makedirs(os.path.join(target_path, dirname), exist_ok=True) + dirname = fname.parent + (target_path / dirname).mkdir(parents=True, exist_ok=True) - extension = os.path.splitext(fname)[-1] + extension = fname.suffix if extension in (".html", ".md"): - with open(os.path.join(source_path, fname), encoding="utf-8") as f: + with (source_path / fname).open(encoding="utf-8") as f: content = f.read() if extension == ".md": - if "pdeps/" in fname: + if len(fname.parts) > 1 and fname.parts[1] == "pdeps": from markdown.extensions.toc import TocExtension body = markdown.markdown( @@ -503,17 +497,13 @@ def main( # Python-Markdown doesn't let us config table attributes by hand body = body.replace("", '
') content = extend_base_template(body, context["main"]["base_template"]) - context["base_url"] = "".join(["../"] * os.path.normpath(fname).count("/")) + context["base_url"] = "../" * (len(fname.parents) - 1) content = jinja_env.from_string(content).render(**context) - fname_html = os.path.splitext(fname)[0] + ".html" - with open( - os.path.join(target_path, fname_html), "w", encoding="utf-8" - ) as f: + fname_html = fname.with_suffix(".html").name + with (target_path / dirname / fname_html).open("w", encoding="utf-8") as f: f.write(content) else: - shutil.copy( - os.path.join(source_path, fname), os.path.join(target_path, dirname) - ) + shutil.copy(source_path / fname, target_path / fname) if __name__ == "__main__": @@ -525,4 +515,4 @@ def main( "--target-path", default="build", help="directory where to write the output" ) args = parser.parse_args() - sys.exit(main(args.source_path, args.target_path)) + sys.exit(main(pathlib.Path(args.source_path), pathlib.Path(args.target_path)))