Skip to content

Commit b678de2

Browse files
committed
Print better error messages if article data is incomplete
1 parent c08877b commit b678de2

File tree

1 file changed

+17
-4
lines changed

1 file changed

+17
-4
lines changed

pubarchiver/__main__.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -474,18 +474,31 @@ def _save_articles(self, dest_dir, article_list, dest_service, zip_articles):
474474
# error description if there is an error.
475475
saved_files = []
476476
for article in article_list:
477+
# Prep a message in case we must report a problem.
478+
if article.doi:
479+
identifier = ' ' + article.doi
480+
elif article.date:
481+
identifier = ' from ' + article.date
482+
if article.title:
483+
identifier += ' with title "' + article.title + '"'
484+
elif article.title:
485+
identifier = ' with title "' + article.title + '"'
486+
else:
487+
identifier = ''
488+
skipping_preface = 'Skipping article' + identifier + ' because of '
489+
477490
# Start by testing that we have all the data we will need.
478491
if not article.doi:
479-
warn('Skipping article with missing DOI: ' + article.title)
492+
warn(skipping_preface + 'missing DOI')
480493
article.status = 'missing-doi'
481494
continue
482495
if not article.pdf:
483-
warn('Skipping article with missing PDF URL: ' + article.doi)
496+
warn(skipping_preface + 'missing PDF URL')
484497
article.status = 'missing-pdf'
485498
continue
486499
if self.journal.uses_jats and not article.jats:
487500
# We need JATS for PMC.
488-
warn('Skipping article with missing JATS URL: ' + article.doi)
501+
warn(skipping_preface + 'missing JATS URL')
489502
article.status = 'missing-jats'
490503
continue
491504
xmldict = self.journal.article_metadata(article)
@@ -543,7 +556,7 @@ def _save_article_portico(self, dest_dir, article, xmldict):
543556
if not download_file(article.jats, jats_file):
544557
warn(f'Could not download JATS file for {article.doi}')
545558
article.status = 'failed-jats-download'
546-
if self.do_validate:
559+
if self.do_validate and article.status != 'failed-jats-download':
547560
if not valid_xml(jats_file, self._dtd):
548561
warn(f'Failed to validate JATS for article {article.doi}')
549562
article.status = 'failed-jats-validation'

0 commit comments

Comments
 (0)