forked from Nandaka/PixivUtil2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPixivImageHandler.py
326 lines (280 loc) · 16.7 KB
/
PixivImageHandler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
# -*- coding: utf-8 -*-
import datetime
import gc
import os
import re
import sys
import traceback
import urllib
from colorama import Fore, Style
import datetime_z
import PixivBrowserFactory
import PixivConstant
from PixivException import PixivException
import PixivHelper
import PixivDownloadHandler
def process_image(caller,
config,
artist=None,
image_id=None,
user_dir='',
bookmark=False,
search_tags='',
title_prefix="",
bookmark_count=-1,
image_response_count=-1,
notifier=None,
job_option=None):
# caller function/method
# TODO: ideally to be removed or passed as argument
db = caller.__dbManager__
if notifier is None:
notifier = PixivHelper.dummy_notifier
# override the config source if job_option is give for filename formats
format_src = config
if job_option is not None:
format_src = job_option
parse_medium_page = None
image = None
result = None
referer = f'https://www.pixiv.net/artworks/{image_id}'
filename = f'no-filename-{image_id}.tmp'
try:
msg = Fore.YELLOW + Style.NORMAL + f'Processing Image Id: {image_id}' + Style.RESET_ALL
PixivHelper.print_and_log(None, msg)
notifier(type="IMAGE", message=msg)
# check if already downloaded. images won't be downloaded twice - needed in process_image to catch any download
r = db.selectImageByImageId(image_id, cols='save_name')
exists = False
in_db = False
if r is not None:
exists = db.cleanupFileExists(r[0])
in_db = True
# skip if already recorded in db and alwaysCheckFileSize is disabled and overwrite is disabled.
if in_db and not config.alwaysCheckFileSize and not config.overwrite:
PixivHelper.print_and_log(None, f'Already downloaded in DB: {image_id}')
gc.collect()
return PixivConstant.PIXIVUTIL_SKIP_DUPLICATE_NO_WAIT
# get the medium page
try:
(image, parse_medium_page) = PixivBrowserFactory.getBrowser().getImagePage(image_id=image_id,
parent=artist,
from_bookmark=bookmark,
bookmark_count=bookmark_count)
if len(title_prefix) > 0:
caller.set_console_title(f"{title_prefix} ImageId: {image.imageId}")
else:
caller.set_console_title(f"MemberId: {image.artist.artistId} ImageId: {image.imageId}")
except PixivException as ex:
caller.ERROR_CODE = ex.errorCode
caller.__errorList.append(dict(type="Image", id=str(image_id), message=ex.message, exception=ex))
if ex.errorCode == PixivException.UNKNOWN_IMAGE_ERROR:
PixivHelper.print_and_log('error', ex.message)
elif ex.errorCode == PixivException.SERVER_ERROR:
PixivHelper.print_and_log('error', f'Giving up image_id (medium): {image_id}')
elif ex.errorCode > 2000:
PixivHelper.print_and_log('error', f'Image Error for {image_id}: {ex.message}')
if parse_medium_page is not None:
dump_filename = f'Error medium page for image {image_id}.html'
PixivHelper.dump_html(dump_filename, parse_medium_page)
PixivHelper.print_and_log('error', f'Dumping html to: {dump_filename}')
else:
PixivHelper.print_and_log('error', f'Image ID ({image_id}): {ex}')
PixivHelper.print_and_log('error', f'Stack Trace: {sys.exc_info()}')
return PixivConstant.PIXIVUTIL_NOT_OK
except Exception as ex:
PixivHelper.print_and_log('error', f'Image ID ({image_id}): {ex}')
if parse_medium_page is not None:
dump_filename = f'Error medium page for image {image_id}.html'
PixivHelper.dump_html(dump_filename, parse_medium_page)
PixivHelper.print_and_log('error', f'Dumping html to: {dump_filename}')
PixivHelper.print_and_log('error', f'Stack Trace: {sys.exc_info()}')
exc_type, exc_value, exc_traceback = sys.exc_info()
traceback.print_exception(exc_type, exc_value, exc_traceback)
return PixivConstant.PIXIVUTIL_NOT_OK
download_image_flag = True
# date validation and blacklist tag validation
if config.dateDiff > 0:
if image.worksDateDateTime != datetime.datetime.fromordinal(1).replace(tzinfo=datetime_z.utc):
if image.worksDateDateTime < (datetime.datetime.today() - datetime.timedelta(config.dateDiff)).replace(tzinfo=datetime_z.utc):
PixivHelper.print_and_log('info', f'Skipping image_id: {image_id} – it\'s older than: {config.dateDiff} day(s).')
download_image_flag = False
result = PixivConstant.PIXIVUTIL_SKIP_OLDER
if config.useBlacklistMembers and download_image_flag:
if str(image.originalArtist.artistId) in caller.__blacklistMembers:
PixivHelper.print_and_log('info', f'Skipping image_id: {image_id} – blacklisted member id: {image.originalArtist.artistId}')
download_image_flag = False
result = PixivConstant.PIXIVUTIL_SKIP_BLACKLIST
if config.useBlacklistTags and download_image_flag:
for item in caller.__blacklistTags:
if item in image.imageTags:
PixivHelper.print_and_log('info', f'Skipping image_id: {image_id} – blacklisted tag: {item}')
download_image_flag = False
result = PixivConstant.PIXIVUTIL_SKIP_BLACKLIST
break
if config.useBlacklistTitles and download_image_flag:
if config.useBlacklistTitlesRegex:
for item in caller.__blacklistTitles:
if re.search(rf"{item}", image.imageTitle):
PixivHelper.print_and_log('info', f'Skipping image_id: {image_id} – Title matched: {item}')
download_image_flag = False
result = PixivConstant.PIXIVUTIL_SKIP_BLACKLIST
break
else:
for item in caller.__blacklistTitles:
if item in image.imageTitle:
PixivHelper.print_and_log('info', f'Skipping image_id: {image_id} – Title contained: {item}')
download_image_flag = False
result = PixivConstant.PIXIVUTIL_SKIP_BLACKLIST
break
if download_image_flag and not caller.DEBUG_SKIP_DOWNLOAD_IMAGE:
if artist is None:
PixivHelper.print_and_log(None, f'Member Name : {image.artist.artistName}')
PixivHelper.print_and_log(None, f'Member Avatar: {image.artist.artistAvatar}')
PixivHelper.print_and_log(None, f'Member Token : {image.artist.artistToken}')
PixivHelper.print_and_log(None, f'Member Background : {image.artist.artistBackground}')
PixivHelper.print_and_log(None, f"Title: {image.imageTitle}")
tags_str = ', '.join(image.imageTags)
PixivHelper.print_and_log(None, f"Tags : {tags_str}")
PixivHelper.print_and_log(None, f"Date : {image.worksDateDateTime}")
PixivHelper.print_and_log(None, f"Mode : {image.imageMode}")
# get bookmark count
if ("%bookmark_count%" in format_src.filenameFormat or "%image_response_count%" in format_src.filenameFormat) and image.bookmark_count == -1:
PixivHelper.print_and_log(None, "Parsing bookmark page", end=' ')
bookmark_url = f'https://www.pixiv.net/bookmark_detail.php?illust_id={image_id}'
parse_bookmark_page = PixivBrowserFactory.getBrowser().getPixivPage(bookmark_url)
image.ParseBookmarkDetails(parse_bookmark_page)
parse_bookmark_page.decompose()
del parse_bookmark_page
PixivHelper.print_and_log(None, f"Bookmark Count : {image.bookmark_count}")
caller.__br__.back()
if config.useSuppressTags:
for item in caller.__suppressTags:
if item in image.imageTags:
image.imageTags.remove(item)
# get manga page
if image.imageMode == 'manga':
PixivHelper.print_and_log(None, f"Page Count : {image.imageCount}")
if user_dir == '': # Yavos: use config-options
target_dir = format_src.rootDirectory
else: # Yavos: use filename from list
target_dir = user_dir
result = PixivConstant.PIXIVUTIL_OK
manga_files = list()
page = 0
# Issue #639
source_urls = image.imageUrls
if config.downloadResized:
source_urls = image.imageResizedUrls
for img in source_urls:
PixivHelper.print_and_log(None, f'Image URL : {img}')
url = os.path.basename(img)
split_url = url.split('.')
if split_url[0].startswith(str(image_id)):
filename_format = format_src.filenameFormat
if image.imageMode == 'manga':
filename_format = format_src.filenameMangaFormat
filename = PixivHelper.make_filename(filename_format,
image,
tagsSeparator=config.tagsSeparator,
tagsLimit=config.tagsLimit,
fileUrl=url,
bookmark=bookmark,
searchTags=search_tags,
useTranslatedTag=config.useTranslatedTag,
tagTranslationLocale=config.tagTranslationLocale)
filename = PixivHelper.sanitize_filename(filename, target_dir)
if image.imageMode == 'manga' and config.createMangaDir:
manga_page = caller.__re_manga_page.findall(filename)
if len(manga_page) > 0:
splitted_filename = filename.split(manga_page[0][0], 1)
splitted_manga_page = manga_page[0][0].split("_p", 1)
# filename = splitted_filename[0] + splitted_manga_page[0] + os.sep + "_p" + splitted_manga_page[1] + splitted_filename[1]
filename = f"{splitted_filename[0]}{splitted_manga_page[0]}{os.sep}_p{splitted_manga_page[1]}{splitted_filename[1]}"
PixivHelper.print_and_log('info', f'Filename : {filename}')
result = PixivConstant.PIXIVUTIL_NOT_OK
try:
(result, filename) = PixivDownloadHandler.download_image(caller,
img,
filename,
referer,
config.overwrite,
config.retry,
config.backupOldFile,
image,
page,
notifier)
if result == PixivConstant.PIXIVUTIL_NOT_OK:
PixivHelper.print_and_log('error', f'Image url not found/failed to download: {image.imageId}')
elif result == PixivConstant.PIXIVUTIL_ABORTED:
raise KeyboardInterrupt()
manga_files.append((image_id, page, filename))
page = page + 1
except urllib.error.URLError:
PixivHelper.print_and_log('error', f'Error when download_image(), giving up url: {img}')
PixivHelper.print_and_log(None, '')
if config.writeImageInfo or config.writeImageJSON:
filename_info_format = format_src.filenameInfoFormat or format_src.filenameFormat
# Issue #575
if image.imageMode == 'manga':
filename_info_format = format_src.filenameMangaInfoFormat or format_src.filenameMangaFormat or filename_info_format
info_filename = PixivHelper.make_filename(filename_info_format,
image,
tagsSeparator=config.tagsSeparator,
tagsLimit=config.tagsLimit,
fileUrl=url,
appendExtension=False,
bookmark=bookmark,
searchTags=search_tags,
useTranslatedTag=config.useTranslatedTag,
tagTranslationLocale=config.tagTranslationLocale)
info_filename = PixivHelper.sanitize_filename(info_filename, target_dir)
# trim _pXXX
info_filename = re.sub(r'_p?\d+$', '', info_filename)
if config.writeImageInfo:
image.WriteInfo(info_filename + ".txt")
if config.writeImageJSON:
image.WriteJSON(info_filename + ".json")
if image.imageMode == 'ugoira_view':
if config.writeUgoiraInfo:
image.WriteUgoiraData(filename + ".js")
# Handle #451
if config.createUgoira and (result in (PixivConstant.PIXIVUTIL_OK, PixivConstant.PIXIVUTIL_SKIP_DUPLICATE)):
PixivDownloadHandler.handle_ugoira(image, filename, config, notifier)
if config.writeUrlInDescription:
PixivHelper.write_url_in_description(image, config.urlBlacklistRegex, config.urlDumpFilename)
if in_db and not exists:
result = PixivConstant.PIXIVUTIL_CHECK_DOWNLOAD # There was something in the database which had not been downloaded
# Only save to db if all images is downloaded completely
if result in (PixivConstant.PIXIVUTIL_OK,
PixivConstant.PIXIVUTIL_SKIP_DUPLICATE,
PixivConstant.PIXIVUTIL_SKIP_LOCAL_LARGER):
try:
db.insertImage(image.artist.artistId, image.imageId, image.imageMode)
except BaseException:
PixivHelper.print_and_log('error', f'Failed to insert image id:{image.imageId} to DB')
db.updateImage(image.imageId, image.imageTitle, filename, image.imageMode)
if len(manga_files) > 0:
db.insertMangaImages(manga_files)
# map back to PIXIVUTIL_OK (because of ugoira file check)
result = 0
if image is not None:
del image
if parse_medium_page is not None:
del parse_medium_page
gc.collect()
PixivHelper.print_and_log(None, '\n')
return result
except Exception as ex:
if isinstance(ex, KeyboardInterrupt):
raise
caller.ERROR_CODE = getattr(ex, 'errorCode', -1)
exc_type, exc_value, exc_traceback = sys.exc_info()
traceback.print_exception(exc_type, exc_value, exc_traceback)
PixivHelper.print_and_log('error', f'Error at process_image(): {image_id}')
PixivHelper.print_and_log('error', f'Exception: {sys.exc_info()}')
if parse_medium_page is not None:
dump_filename = f'Error medium page for image {image_id}.html'
PixivHelper.dump_html(dump_filename, parse_medium_page)
PixivHelper.print_and_log('error', f'Dumping html to: {dump_filename}')
raise