Skip to content

Commit f43ea2b

Browse files
authored
Merge pull request #306 from tencentyun/feature_libertyzhu_92829d5f
Feature libertyzhu 92829d5f
2 parents 2820f60 + 563a011 commit f43ea2b

File tree

5 files changed

+639
-94
lines changed

5 files changed

+639
-94
lines changed

qcloud_cos/cos_client.py

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import xml.etree.ElementTree
1515
from requests import Request, Session, ConnectionError, Timeout
1616
from datetime import datetime
17-
from six.moves.urllib.parse import quote, unquote, urlencode
17+
from six.moves.urllib.parse import quote, unquote, urlencode, urlparse
1818
from six import text_type, binary_type
1919
from hashlib import md5
2020
from .streambody import StreamBody
@@ -241,7 +241,7 @@ class CosS3Client(object):
241241
__built_in_sessions = None # 内置的静态连接池,多个Client间共享使用
242242
__built_in_pid = 0
243243

244-
def __init__(self, conf, retry=1, session=None):
244+
def __init__(self, conf, retry=3, session=None):
245245
"""初始化client对象
246246
247247
:param conf(CosConfig): 用户的配置.
@@ -250,6 +250,7 @@ def __init__(self, conf, retry=1, session=None):
250250
"""
251251
self._conf = conf
252252
self._retry = retry # 重试的次数,分片上传时可适当增大
253+
self._retry_exe_times = 0 # 重试已执行次数
253254

254255
if session is None:
255256
if not CosS3Client.__built_in_sessions:
@@ -299,6 +300,14 @@ def handle_built_in_connection_pool_by_pid(self):
299300
def get_conf(self):
300301
"""获取配置"""
301302
return self._conf
303+
304+
def get_retry_exe_times(self):
305+
"""获取重试已执行次数"""
306+
return self._retry_exe_times
307+
308+
def inc_retry_exe_times(self):
309+
"""重试执行次数递增"""
310+
self._retry_exe_times += 1
302311

303312
def get_auth(self, Method, Bucket, Key, Expired=300, Headers={}, Params={}, SignHost=None, UseCiEndPoint=False):
304313
"""获取签名
@@ -342,11 +351,11 @@ def get_auth(self, Method, Bucket, Key, Expired=300, Headers={}, Params={}, Sign
342351
auth = CosS3Auth(self._conf, Key, Params, Expired, SignHost)
343352
return auth(r).headers['Authorization']
344353

345-
def should_switch_domain(self, domain_switched, headers={}):
354+
def should_switch_domain(self, url, headers={}):
355+
host = urlparse(url).hostname
346356
if not 'x-cos-request-id' in headers and \
347-
not domain_switched and \
348357
self._conf._auto_switch_domain_on_retry and \
349-
self._conf._ip is None:
358+
re.match(r'^([a-z0-9-]+-[0-9]+\.)(cos\.[a-z]+-[a-z]+(-[a-z]+)?(-1)?)\.(myqcloud\.com)$', host):
350359
return True
351360
return False
352361

@@ -375,7 +384,6 @@ def send_request(self, method, url, bucket=None, timeout=30, cos_request=True, c
375384
kwargs['headers'] = format_values(kwargs['headers'])
376385

377386
file_position = None
378-
domain_switched = False # 重试时如果要切换域名, 只切换一次
379387
if 'data' in kwargs:
380388
body = kwargs['data']
381389
if hasattr(body, 'tell') and hasattr(body, 'seek') and hasattr(body, 'read'):
@@ -402,9 +410,11 @@ def send_request(self, method, url, bucket=None, timeout=30, cos_request=True, c
402410
if j != 0:
403411
if client_can_retry(file_position, **kwargs):
404412
kwargs['headers']['x-cos-sdk-retry'] = 'true' # SDK重试标记
413+
self.inc_retry_exe_times()
405414
time.sleep(j)
406415
else:
407416
break
417+
logger.debug("send request: url: {}, headers: {}".format(url, kwargs['headers']))
408418
if method == 'POST':
409419
res = self._session.post(url, timeout=timeout, proxies=self._conf._proxies, **kwargs)
410420
elif method == 'GET':
@@ -415,34 +425,27 @@ def send_request(self, method, url, bucket=None, timeout=30, cos_request=True, c
415425
res = self._session.delete(url, timeout=timeout, proxies=self._conf._proxies, **kwargs)
416426
elif method == 'HEAD':
417427
res = self._session.head(url, timeout=timeout, proxies=self._conf._proxies, **kwargs)
428+
logger.debug("recv response: status_code: {}, headers: {}".format(res.status_code, res.headers))
418429
if res.status_code < 400: # 2xx和3xx都认为是成功的
419430
if res.status_code == 301 or res.status_code == 302 or res.status_code == 307:
420-
if j < self._retry and self.should_switch_domain(domain_switched, res.headers):
431+
if j < self._retry and self.should_switch_domain(url, res.headers):
421432
url = switch_hostname_for_url(url)
422-
domain_switched = True
423433
continue
424434
return res
425435
elif res.status_code < 500: # 4xx 不重试
426-
if j < self._retry and self.should_switch_domain(domain_switched, res.headers):
427-
url = switch_hostname_for_url(url)
428-
domain_switched = True
429-
continue
430436
break
431437
else:
432-
if j < self._retry and self.should_switch_domain(domain_switched, res.headers):
438+
if j == (self._retry - 1) and self.should_switch_domain(url, res.headers):
433439
url = switch_hostname_for_url(url)
434-
domain_switched = True
435-
continue
436-
else:
437-
break
440+
continue
438441
except Exception as e: # 捕获requests抛出的如timeout等客户端错误,转化为客户端错误
442+
logger.debug("recv exception: {}".format(e))
439443
# 记录每次请求的exception
440444
exception_log = 'url:%s, retry_time:%d exception:%s' % (url, j, str(e))
441445
exception_logbuf.append(exception_log)
442446
if j < self._retry and (isinstance(e, ConnectionError) or isinstance(e, Timeout)): # 只重试网络错误
443-
if self.should_switch_domain(domain_switched):
447+
if j == (self._retry - 1) and self.should_switch_domain(url):
444448
url = switch_hostname_for_url(url)
445-
domain_switched = True
446449
continue
447450
logger.exception(exception_logbuf) # 最终重试失败, 输出前几次重试失败的exception
448451
raise CosClientError(str(e))
@@ -524,6 +527,7 @@ def get_object(self, Bucket, Key, KeySimplifyCheck=True, **kwargs):
524527
525528
:param Bucket(string): 存储桶名称.
526529
:param Key(string): COS路径.
530+
:param KeySimplifyCheck(bool): 是否对Key进行posix路径语义归并检查
527531
:param kwargs(dict): 设置下载的headers.
528532
:return(dict): 下载成功返回的结果,包含Body对应的StreamBody,可以获取文件流或下载文件到本地.
529533
@@ -4033,7 +4037,7 @@ def _check_all_upload_parts(self, bucket, key, uploadid, local_path, parts_num,
40334037
already_exist_parts[part_num] = part['ETag']
40344038
return True
40354039

4036-
def download_file(self, Bucket, Key, DestFilePath, PartSize=20, MAXThread=5, EnableCRC=False, progress_callback=None, DumpRecordDir=None, KeySimplifyCheck=True, **Kwargs):
4040+
def download_file(self, Bucket, Key, DestFilePath, PartSize=20, MAXThread=5, EnableCRC=False, progress_callback=None, DumpRecordDir=None, KeySimplifyCheck=True, DisableTempDestFilePath=False, **Kwargs):
40374041
"""小于等于20MB的文件简单下载,大于20MB的文件使用续传下载
40384042
40394043
:param Bucket(string): 存储桶名称.
@@ -4042,6 +4046,9 @@ def download_file(self, Bucket, Key, DestFilePath, PartSize=20, MAXThread=5, Ena
40424046
:param PartSize(int): 分块下载的大小设置,单位为MB.
40434047
:param MAXThread(int): 并发下载的最大线程数.
40444048
:param EnableCRC(bool): 校验下载文件与源文件是否一致
4049+
:param DumpRecordDir(string): 指定保存断点信息的文件路径
4050+
:param KeySimplifyCheck(bool): 是否对Key进行posix路径语义归并检查
4051+
:param DisableTempDestFilePath(bool): 简单下载写入目标文件时,不使用临时文件
40454052
:param kwargs(dict): 设置请求headers.
40464053
"""
40474054
logger.debug("Start to download file, bucket: {0}, key: {1}, dest_filename: {2}, part_size: {3}MB,\
@@ -4058,9 +4065,9 @@ def download_file(self, Bucket, Key, DestFilePath, PartSize=20, MAXThread=5, Ena
40584065
head_headers['VersionId'] = Kwargs['VersionId']
40594066
object_info = self.head_object(Bucket, Key, **head_headers)
40604067
file_size = int(object_info['Content-Length'])
4061-
if file_size <= 1024 * 1024 * 20:
4068+
if file_size <= 1024 * 1024 * PartSize:
40624069
response = self.get_object(Bucket, Key, KeySimplifyCheck, **Kwargs)
4063-
response['Body'].get_stream_to_file(DestFilePath)
4070+
response['Body'].get_stream_to_file(DestFilePath, DisableTempDestFilePath)
40644071
return
40654072

40664073
# 支持回调查看进度

qcloud_cos/cos_comm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,8 +249,8 @@ def switch_hostname(host):
249249
if not host:
250250
raise CosClientError("Host is required not empty!")
251251

252-
# *.cos.*-*.myqcloud.com
253-
if re.match(r'^.*\.cos\..*\-.*\.myqcloud\.com$', host):
252+
# {bucket}-{appid}.cos.{region}.myqcloud.com
253+
if re.match(r'^([a-z0-9-]+-[0-9]+\.)(cos\.[a-z]+-[a-z]+(-[a-z]+)?(-1)?)\.(myqcloud\.com)$', host):
254254
host = host[:-len(".myqcloud.com")] + ".tencentcos.cn"
255255

256256
return host

qcloud_cos/resumable_downloader.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def __init__(self, cos_client, bucket, key, dest_filename, object_info, part_siz
5757
logger.debug('resumale downloader init finish, bucket: {0}, key: {1}'.format(bucket, key))
5858

5959
def start(self):
60-
logger.debug('start resumable downloade, bucket: {0}, key: {1}'.format(self.__bucket, self.__key))
60+
logger.debug('start resumable download, bucket: {0}, key: {1}'.format(self.__bucket, self.__key))
6161
self.__load_record() # 从record文件中恢复读取上下文
6262

6363
assert self.__tmp_file
@@ -80,7 +80,7 @@ def start(self):
8080
pool.wait_completion()
8181
result = pool.get_result()
8282
if not result['success_all']:
83-
raise CosClientError('some download_part fail after max_retry, please downloade_file again')
83+
raise CosClientError('some download_part fail after max_retry, please download_file again')
8484

8585
if os.path.exists(self.__dest_file_path):
8686
os.remove(self.__dest_file_path)

qcloud_cos/streambody.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,16 @@ def read(self, chunk_size=1024, auto_decompress=False):
4646
return ''
4747
return chunk
4848

49-
def get_stream_to_file(self, file_name, auto_decompress=False):
49+
def get_stream_to_file(self, file_name, disable_tmp_file=False, auto_decompress=False):
5050
"""保存流到本地文件"""
5151
self._read_len = 0
5252
tmp_file_name = "{file_name}_{uuid}".format(file_name=file_name, uuid=uuid.uuid4().hex)
53+
if disable_tmp_file:
54+
tmp_file_name = file_name
55+
chunk_size = 1024 * 1024
5356
with open(tmp_file_name, 'wb') as fp:
54-
while 1:
55-
chunk = self.read(1024, auto_decompress)
57+
while True:
58+
chunk = self.read(chunk_size, auto_decompress)
5659
if not chunk:
5760
break
5861
self._read_len += len(chunk)
@@ -63,16 +66,17 @@ def get_stream_to_file(self, file_name, auto_decompress=False):
6366
if os.path.exists(tmp_file_name):
6467
os.remove(tmp_file_name)
6568
raise IOError("download failed with incomplete file")
66-
if os.path.exists(file_name):
67-
os.remove(file_name)
68-
os.rename(tmp_file_name, file_name)
69+
if file_name != tmp_file_name:
70+
if os.path.exists(file_name):
71+
os.remove(file_name)
72+
os.rename(tmp_file_name, file_name)
6973

7074
def pget_stream_to_file(self, fdst, offset, expected_len, auto_decompress=False):
7175
"""保存流到本地文件的offset偏移"""
7276
self._read_len = 0
7377
fdst.seek(offset, 0)
7478
chunk_size = 1024 * 1024
75-
while 1:
79+
while True:
7680
chunk = self.read(chunk_size, auto_decompress)
7781
if not chunk:
7882
break

0 commit comments

Comments
 (0)