Skip to content

Commit 3f359e0

Browse files
authored
Merge pull request #269 from openzim/python_3.14
Small maintenance : Python 3.14, yt-dlp[default] and beartype 0.22
2 parents 48da0c6 + 2808b22 commit 3f359e0

File tree

9 files changed

+57
-40
lines changed

9 files changed

+57
-40
lines changed

.pre-commit-config.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,20 @@
33
exclude: ^tests/files # these are raw test files, no need to mess with them
44
repos:
55
- repo: https://github.com/pre-commit/pre-commit-hooks
6-
rev: v5.0.0
6+
rev: v6.0.0
77
hooks:
88
- id: trailing-whitespace
99
- id: end-of-file-fixer
1010
- repo: https://github.com/psf/black
11-
rev: '25.9.0'
11+
rev: '25.11.0'
1212
hooks:
1313
- id: black
1414
- repo: https://github.com/astral-sh/ruff-pre-commit
15-
rev: v0.13.1
15+
rev: v0.14.5
1616
hooks:
1717
- id: ruff
1818
- repo: https://github.com/RobertCraigie/pyright-python
19-
rev: v1.1.405
19+
rev: v1.1.407
2020
hooks:
2121
- id: pyright
2222
name: pyright (system)

.readthedocs.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ version: 2
66
build:
77
os: ubuntu-24.04
88
tools:
9-
python: '3.13'
9+
python: '3.14'
1010

1111
# custom commands to run mkdocs build within hatch, as suggested by maintainer in
1212
# https://github.com/readthedocs/readthedocs.org/issues/10706

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Changed
11+
12+
- Upgrade to support only Python 3.14 (#266)
13+
- Upgrade dependencies (#269)
14+
1015
## [5.2.0] - 2025-10-02
1116

1217
### Added

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@ zimscraperlib>=1.1,<1.2
2424

2525
See documentation at [Read the Docs](https://python-scraperlib.readthedocs.io/) for details.
2626

27+
> [!WARNING]
28+
> While this library brings support for downloading videos with yt-dlp, recent changes in Youtube have forced yt-dlp team
29+
> to require new dependencies for youtube videos (see https://github.com/yt-dlp/yt-dlp/issues/15012). These dependencies
30+
> are significantly big and not needed for all other backend supported by yt-dlp (only youtube needs it). These dependencies
31+
> are hence not included in this library dependencies (yet, see https://github.com/openzim/python-scraperlib/issues/268),
32+
> you have to install them on your own if you intend to download videos from Youtube.
33+
2734
# Dependencies
2835

2936
- libmagic

pyproject.toml

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
[build-system]
22
# jinja2 is required to generate JS and Python rules at build time
33
# PyYAML is used to parse fuzzy rules and generate Python/JS code
4-
requires = ["hatchling", "hatch-openzim>=0.2", "jinja2==3.1.6", "PyYAML==6.0.2"]
4+
requires = ["hatchling", "hatch-openzim>=0.2", "jinja2==3.1.6", "PyYAML==6.0.3"]
55
build-backend = "hatchling.build"
66

77
[project]
88
name = "zimscraperlib"
9-
requires-python = ">=3.13,<3.14"
9+
requires-python = ">=3.14,<3.15"
1010
description = "Collection of python tools to re-use common code across scrapers"
1111
readme = "README.md"
1212
dependencies = [
@@ -16,7 +16,7 @@ dependencies = [
1616
"python-resize-image>=1.1.19,<1.2",
1717
"Babel>=2.9,<3.0",
1818
"python-magic>=0.4.3,<0.5",
19-
"libzim>=3.4.0,<4.0",
19+
"libzim>=3.8.0,<4.0",
2020
"beautifulsoup4>=4.9.3,<5.0",
2121
"lxml>=4.6.3,<7.0",
2222
"optimize-images>=1.3.6,<2.0",
@@ -26,10 +26,10 @@ dependencies = [
2626
"regex>=2020.7.14",
2727
"pymupdf>=1.24.0,<2.0",
2828
"CairoSVG>=2.2.0,<3.0",
29-
"beartype>=0.19,<0.22",
29+
"beartype>=0.19,<0.23",
3030
# youtube-dl should be updated as frequently as possible
3131
"yt-dlp",
32-
"pillow>=7.0.0,<12.0",
32+
"pillow>=7.0.0,<13.0",
3333
"urllib3>=1.26.5,<2.6.0",
3434
"piexif==1.1.3", # this dep is a nightmare in terms of release management, better pinned just like in optimize-images anyway
3535
"idna>=2.5,<4.0",
@@ -49,39 +49,39 @@ additional-classifiers = [
4949

5050
[project.optional-dependencies]
5151
scripts = [
52-
"invoke==2.2.0",
52+
"invoke==2.2.1",
5353
# jinja2 is required to generate JS and Python rules at build time
5454
# PyYAML is used to parse fuzzy rules and generate Python/JS code
5555
# also update version in build-system above
5656
"jinja2==3.1.6",
57-
"PyYAML==6.0.2",
57+
"PyYAML==6.0.3",
5858

5959
]
6060
lint = [
61-
"black==25.9.0",
62-
"ruff==0.13.1",
61+
"black==25.11.0",
62+
"ruff==0.14.5",
6363
]
6464
check = [
65-
"pyright==1.1.405",
66-
"pytest==8.4.2",
65+
"pyright==1.1.407",
66+
"pytest==9.0.1",
6767
]
6868
test = [
69-
"pytest==8.4.2",
69+
"pytest==9.0.1",
7070
"pytest-mock==3.15.1",
71-
"coverage==7.10.7",
71+
"coverage==7.11.3",
7272
]
7373
docs = [
7474
"mkdocs==1.6.1",
75-
"mkdocs-include-markdown-plugin==7.1.7",
76-
"mkdocs-material==9.6.20",
75+
"mkdocs-include-markdown-plugin==7.2.0",
76+
"mkdocs-material==9.7.0",
7777
"mkdocstrings[python]==0.30.1",
78-
"pymdown-extensions==10.16.1",
78+
"pymdown-extensions==10.17.1",
7979
"mkdocs-gen-files==0.5.0",
8080
"mkdocs-literate-nav==0.6.2",
8181
]
8282
dev = [
83-
"ipython==9.5.0",
84-
"pre-commit==4.3.0",
83+
"ipython==9.7.0",
84+
"pre-commit==4.4.0",
8585
"zimscraperlib[scripts]",
8686
"zimscraperlib[lint]",
8787
"zimscraperlib[test]",
@@ -157,10 +157,10 @@ build = "inv docs-build --args '{args}'"
157157

158158
[tool.black]
159159
line-length = 88
160-
target-version = ['py313']
160+
target-version = ['py314']
161161

162162
[tool.ruff]
163-
target-version = "py313"
163+
target-version = "py314"
164164
line-length = 88
165165
src = ["src", "contrib"]
166166

@@ -293,12 +293,6 @@ exclude_lines = [
293293
include = ["contrib", "src", "tests", "tasks.py"]
294294
exclude = [".env/**", ".venv/**"]
295295
extraPaths = ["src"]
296-
pythonVersion = "3.13"
296+
pythonVersion = "3.14"
297297
typeCheckingMode="strict"
298298
disableBytesTypePromotions = true
299-
300-
[[tool.pyright.overrides.files]]
301-
files = [
302-
"src/zimscraperlib/rewriting**/*.py",
303-
"tests/rewriting/**/*.py"
304-
]

rules/rules.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
# ones) but just rewriting to proper path.
77
#
88
# This file is in sync with content at commit
9-
# https://github.com/webrecorder/wabac.js/commit/1c3acfce39e0dc127acf455b04237e9a82062730
10-
# from October 17, 2024
9+
# https://github.com/webrecorder/wabac.js/commit/f62756661d06e721bc57ff25199c73ce51227916
10+
# from October 29, 2025
1111
#
1212
# This file should be updated at every release of scraperlib
1313
#

src/zimscraperlib/rewriting/js.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
ZIM at `_zim_static/__wb_module_decl.js`
1414
1515
This code is based on https://github.com/webrecorder/wabac.js/blob/main/src/rewrite/jsrewriter.ts
16-
Last backport of upstream changes is from Sept 13, 2025
17-
Commit 6dd2d9ae664cfcd2ea8637d7d6c7ed7a0ca332a0
16+
Last backport of upstream changes is from Oct 12, 2025
17+
Commit 1849552c3dbcbc065c05afac2dd80061db37b64d
1818
"""
1919

2020
import re

src/zimscraperlib/zim/indexing.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,15 @@ def get_pdf_index_data(
101101
if parts: # pragma: no branch (always metadata in test PDFs)
102102
title = " - ".join(parts)
103103

104-
content = "\n".join(
105-
page.get_text() # pyright: ignore[reportUnknownArgumentType, reportUnknownMemberType, reportAttributeAccessIssue]
106-
for page in doc
107-
)
104+
def get_pdf_content(page: pymupdf.Page) -> str:
105+
text = ( # pyright: ignore[reportUnknownVariableType]
106+
page.get_text() # pyright: ignore[reportUnknownMemberType]
107+
)
108+
if not isinstance(text, str):
109+
raise Exception("Unexpected text content")
110+
return text
111+
112+
content = "\n".join(get_pdf_content(page) for page in doc)
108113

109114
# build list of messages and filter messages which are known to not be relevant
110115
# in our use-case

tests/rewriting/test_js_rewriting.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,12 @@ def test_import_rewrite(rewrite_import_content: ImportTestContent):
449449
`;
450450
}
451451
452+
""",
453+
""""use strict";(function() {
454+
const text = `
455+
export { a };
456+
`;
457+
})
452458
""",
453459
"let a = 7; var b = 5; const foo = 4;\n\n",
454460
]

0 commit comments

Comments
 (0)