-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbing_scraper.py
executable file
·80 lines (66 loc) · 2.62 KB
/
bing_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/bin/env python
"""Scrapes images from Bing images using a search term"""
import argparse
import requests
import json
import shutil
import os
class Image():
""" Image class for storing image metadata from Bing API call """
def __init__(self, result):
self.content_url = result.get('contentUrl')
self.name = result.get('name')
self.content_size = result.get('contentSize')
self.thumbnail_url = result.get('thumbnailUrl')
self.thumbnail_height = result.get('thumbnail').get('height')
self.thumbnail_width = result.get('thumbnail').get('width')
self.image_height = result.get('height')
self.image_width = result.get('width')
self.encoding_format = result.get('encodingFormat')
def __str__(self):
return self.name + ' ' + self.content_url
def get_imgs(term, offset, key):
"""Returns a list of image objects"""
base = 'https://api.cognitive.microsoft.com/bing/v5.0/images/search'
payload = {
'q' : term,
'count' : 50, # max
'offset' : offset, # use this for multipass
}
header = {'Ocp-Apim-Subscription-Key' : key}
req = requests.get(base, params=payload, headers=header)
req_json = req.json()
return [Image(i) for i in req_json["value"]]
def write_imgs(imgs, dest):
"""Downloads images to disk"""
if not os.path.exists(dest):
os.mkdir(dest)
for i, img in enumerate(imgs):
try:
r = requests.get(img.thumbnail_url, stream=True)
path = dest + '/' + str(i) + '.' + img.encoding_format
print('Writing: ' + path)
if r.status_code == 200:
# Copy to file in chunks
with open(path, 'wb') as f:
shutil.copyfileobj(r.raw, f)
f.close()
except Exception as err:
print("Could not write " + str(err))
def main():
""" Get images using search term and write to directory"""
parser = argparse.ArgumentParser(description='Scrape images from Bing Image Search.')
parser.add_argument('keyfile', help='File containing Azure API key.', type=argparse.FileType('r'))
parser.add_argument('term', help='Search term for parsing Bing images')
parser.add_argument('n', help='Number of images to pull.', type=int)
parser.add_argument('--d', help='Destination dir for writing images.')
args = parser.parse_args()
key = args.keyfile.readline().strip()
args.keyfile.close()
imgs = []
# Get n images
for offset in range(0,args.n,50):
imgs += get_imgs(args.term, offset, key)
write_imgs(imgs, args.d)
if __name__ == "__main__":
main()