From 969e4086f06ff1c64b75ce79afeb5ead7cffd6c4 Mon Sep 17 00:00:00 2001 From: Galen Wright-Watson Date: Sun, 20 Oct 2024 12:54:08 -0700 Subject: [PATCH 1/3] add feature: include line number in toc parse error message --- pdftocio/tocparser.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pdftocio/tocparser.py b/pdftocio/tocparser.py index 3dfaec6..d46df5e 100644 --- a/pdftocio/tocparser.py +++ b/pdftocio/tocparser.py @@ -5,10 +5,10 @@ from typing import IO, List from fitzutils import ToCEntry -from itertools import takewhile +from itertools import count, takewhile -def parse_entry(entry: List) -> ToCEntry: +def parse_entry(entry: List, nLine: int) -> ToCEntry: """parse a row in csv to a toc entry""" # a somewhat weird hack, csv reader would read spaces as an empty '', so we @@ -24,7 +24,7 @@ def parse_entry(entry: List) -> ToCEntry: ) return toc_entry except IndexError as e: - print(f"Unable to parse toc entry {entry};", + print(f"Unable to parse toc entry {entry} from line {nLine};", f"Need at least {indent + 2} parts but only have {len(entry)}.", "Make sure the page number is present.", file=sys.stderr) @@ -35,4 +35,4 @@ def parse_toc(file: IO) -> List[ToCEntry]: """Parse a toc file to a list of toc entries""" reader = csv.reader(file, lineterminator='\n', delimiter=' ', quoting=csv.QUOTE_NONNUMERIC) - return list(map(parse_entry, reader)) + return list(map(parse_entry, reader, count(1))) From 2e0a8c6171d48f38f8cc6af1b9deaa5d5d7a057e Mon Sep 17 00:00:00 2001 From: Galen Wright-Watson Date: Sun, 10 Nov 2024 14:31:49 -0800 Subject: [PATCH 2/3] fix for #38: update main loops so command line utilities can be debugged from the command line --- pdftocgen/app.py | 4 +++- pdftocio/app.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pdftocgen/app.py b/pdftocgen/app.py index 2859d7b..459a540 100644 --- a/pdftocgen/app.py +++ b/pdftocgen/app.py @@ -90,7 +90,7 @@ def main(): print(usage_s, file=sys.stderr) sys.exit(2) - recipe_file: TextIO = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='ignore') + recipe_file: TextIO = None readable: bool = False vpos: bool = False out: TextIO = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='ignore') @@ -129,6 +129,8 @@ def main(): print(usage_s, file=sys.stderr) sys.exit(1) + if not recipe_file: + recipe_file = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='ignore') path_in: str = args[0] # done parsing arguments diff --git a/pdftocio/app.py b/pdftocio/app.py index 484b44f..6fcc22f 100644 --- a/pdftocio/app.py +++ b/pdftocio/app.py @@ -94,7 +94,7 @@ def main(): print(usage_s, file=sys.stderr) sys.exit(2) - toc_file: TextIO = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='ignore') + toc_file: TextIO = None print_toc: bool = False readable: bool = False out: Optional[str] = None @@ -131,6 +131,8 @@ def main(): print(usage_s, file=sys.stderr) sys.exit(1) + if not toc_file: + toc_file = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='ignore') path_in: str = args[0] # done parsing arguments From b467cfdcbd07fa28ec88bf78ce08e1330ecc4b67 Mon Sep 17 00:00:00 2001 From: Galen Wright-Watson Date: Sun, 10 Nov 2024 16:00:14 -0800 Subject: [PATCH 3/3] fix for #37: print more informative error message when page number can't be converted to an int --- pdftocio/tocparser.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pdftocio/tocparser.py b/pdftocio/tocparser.py index d46df5e..93693f6 100644 --- a/pdftocio/tocparser.py +++ b/pdftocio/tocparser.py @@ -23,6 +23,11 @@ def parse_entry(entry: List, nLine: int) -> ToCEntry: *entry[indent + 2:] # vpos ) return toc_entry + except ValueError as e: + print(f"Unable to parse toc entry {entry} from line {nLine};", + f"Couldn't convert '{entry[indent + 1]}' to a page number.", + file=sys.stderr) + raise e except IndexError as e: print(f"Unable to parse toc entry {entry} from line {nLine};", f"Need at least {indent + 2} parts but only have {len(entry)}.",