Skip to content

Commit b44ed11

Browse files
committed
opt stats
1 parent 1cc234b commit b44ed11

File tree

2 files changed

+23
-55
lines changed

2 files changed

+23
-55
lines changed

dtable_events/dtable_io/__init__.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
from dtable_events.dtable_io.task_manager import task_manager
3737
from dtable_events.statistics.db import save_email_sending_records, batch_save_email_sending_records
3838
from dtable_events.data_sync.data_sync_utils import run_sync_emails
39-
from dtable_events.utils import get_inner_dtable_server_url, uuid_str_to_36_chars
39+
from dtable_events.utils import get_inner_dtable_server_url, uuid_str_to_32_chars, uuid_str_to_36_chars
4040
from dtable_events.utils.dtable_server_api import DTableServerAPI
4141

4242
dtable_io_logger = setup_logger('dtable_events_io.log')
@@ -1095,7 +1095,6 @@ def calc_dtable_asset_stats(repo_id_dtable_uuids_dict, config):
10951095
try:
10961096
db_session = init_db_session_class(config)()
10971097
except Exception as e:
1098-
db_session = None
10991098
dtable_io_logger.error('create db session failed. ERROR: {}'.format(e))
11001099
raise Exception('create db session failed. ERROR: {}'.format(e))
11011100
dtable_uuid_sizes = []
@@ -1108,12 +1107,17 @@ def calc_dtable_asset_stats(repo_id_dtable_uuids_dict, config):
11081107
asset_path = f'/asset/{uuid_str_to_36_chars(dtable_uuid)}'
11091108
asset_dir_id = seafile_api.get_dir_id_by_path(repo_id, asset_path)
11101109
if not asset_dir_id:
1111-
dtable_uuid_sizes.append([dtable_uuid, 0])
1110+
dtable_uuid_sizes.append([uuid_str_to_32_chars(dtable_uuid), 0])
11121111
continue
11131112
size = seafile_api.get_file_count_info_by_path(repo_id, asset_path).size
1114-
dtable_uuid_sizes.append([dtable_uuid, size])
1113+
dtable_uuid_sizes.append([uuid_str_to_32_chars(dtable_uuid), size])
11151114
except Exception as e:
11161115
dtable_io_logger.exception(e)
11171116
dtable_io_logger.error('repo_id: %s dtable_uuids: %s stats asset error: %s', repo_id, dtable_uuids, e)
1118-
update_dtable_asset_sizes(dtable_uuid_sizes, db_session)
1119-
db_session.close()
1117+
try:
1118+
update_dtable_asset_sizes(dtable_uuid_sizes, db_session)
1119+
except Exception as e:
1120+
dtable_io_logger.exception(e)
1121+
dtable_io_logger.error('update dtable asset sizes error: %s', e)
1122+
finally:
1123+
db_session.close()

dtable_events/tasks/dtable_asset_stats_worker.py

Lines changed: 13 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -31,24 +31,23 @@ def update_dtable_asset_sizes(dtable_uuid_sizes, db_session):
3131
step = 1000
3232
updated_at = datetime.utcnow()
3333
for i in range(0, len(dtable_uuid_sizes), step):
34-
updates = ', '.join(["('%s', %s, '%s')" % tuple(dtable_uuid_size + [updated_at]) for dtable_uuid_size in dtable_uuid_sizes[i: i+step]])
34+
updates = ', '.join(["('%s', %s, '%s')" % (
35+
uuid_str_to_32_chars(dtable_uuid_size[0]), dtable_uuid_size[1], updated_at
36+
) for dtable_uuid_size in dtable_uuid_sizes[i: i+step]])
3537
sql = '''
3638
INSERT INTO dtable_asset_stats(dtable_uuid, size, updated_at) VALUES %s
3739
ON DUPLICATE KEY UPDATE size=VALUES(size), updated_at=VALUES(updated_at)
3840
''' % updates
39-
try:
40-
db_session.execute(sql)
41-
db_session.commit()
42-
except Exception as e:
43-
logger.error('update dtable asset assets error: %s', e)
41+
db_session.execute(sql)
42+
db_session.commit()
4443

4544

4645
class DTableAssetStatsWorker(Thread):
4746
def __init__(self, config):
4847
Thread.__init__(self)
4948
self._finished = Event()
5049
self._db_session_class = init_db_session_class(config)
51-
self.interval = 5 * 60 # listen to seafile event for 5 mins and then calc dtable asset storage
50+
self.interval = 5 * 60 # listen to seafile event for some time and then calc dtable asset storage
5251
self.last_stats_time = time.time()
5352
self._redis_client = RedisClient(config)
5453

@@ -70,6 +69,8 @@ def run(self):
7069
content = msg.get('content')
7170
if not isinstance(content, str) or '\t' not in content:
7271
continue
72+
if not content.startswith('repo-update'):
73+
continue
7374
ctime = msg.get('ctime')
7475
if not isinstance(ctime, int) or ctime < time.time() - 30 * 60: # ignore messages half hour ago
7576
continue
@@ -81,15 +82,16 @@ def run(self):
8182
repo_id_ctime_dict[repo_id] = ctime
8283

8384
def stats_dtable_asset_storage(self, repo_id_ctime_dict):
85+
logger.info('Starting stats repo dtable asset storage...')
8486
dtable_uuid_sizes = []
8587
for repo_id, ctime in repo_id_ctime_dict.items():
8688
logger.debug('start stats repo: %s ctime: %s', repo_id, ctime)
8789
try:
8890
repo = seafile_api.get_repo(repo_id)
8991
if not repo:
9092
continue
91-
asset_dir_id = seafile_api.get_dir_id_by_path(repo_id, '/asset')
92-
if not asset_dir_id:
93+
asset_dirent = seafile_api.get_dirent_by_path(repo_id, '/asset')
94+
if not asset_dirent or asset_dirent.mtime < ctime:
9395
continue
9496
dirents = seafile_api.list_dir_by_path(repo_id, '/asset', offset=-1, limit=-1)
9597
for dirent in dirents:
@@ -98,7 +100,7 @@ def stats_dtable_asset_storage(self, repo_id_ctime_dict):
98100
if not is_valid_uuid(dirent.obj_name):
99101
continue
100102
logger.debug('start stats repo: %s dirent: %s', repo_id, dirent.obj_name)
101-
if dirent.mtime > ctime - 5:
103+
if dirent.mtime >= ctime:
102104
dtable_uuid = dirent.obj_name
103105
size = seafile_api.get_file_count_info_by_path(repo_id, f'/asset/{dtable_uuid}').size
104106
logger.debug('start stats repo: %s dirent: %s size: %s', repo_id, dirent.obj_name, size)
@@ -111,47 +113,9 @@ def stats_dtable_asset_storage(self, repo_id_ctime_dict):
111113
logger.debug('totally need to update dtable: %s', len(dtable_uuid_sizes))
112114
db_session = self._db_session_class()
113115
try:
114-
update_dtable_asset_sizes(dtable_uuid_sizes)
116+
update_dtable_asset_sizes(dtable_uuid_sizes, db_session)
115117
except Exception as e:
116118
logger.exception(e)
117119
logger.error('update dtable asset sizes error: %s', e)
118120
finally:
119121
db_session.close()
120-
121-
def listen_redis_and_update(self):
122-
logger.info('Starting handle table rows count...')
123-
subscriber = self._redis_client.get_subscriber('stat-asset')
124-
while not self._finished.is_set():
125-
try:
126-
message = subscriber.get_message()
127-
if message is not None:
128-
dtable_uuid_repo_ids = json.loads(message['data'])
129-
session = self._db_session_class()
130-
try:
131-
self.stats_dtable_uuids(dtable_uuid_repo_ids, session)
132-
except Exception as e:
133-
logger.error('Handle table rows count: %s' % e)
134-
finally:
135-
session.close()
136-
else:
137-
time.sleep(0.5)
138-
except Exception as e:
139-
logger.error('Failed get message from redis: %s' % e)
140-
subscriber = self._redis_client.get_subscriber('count-rows')
141-
142-
def stats_dtable_uuids(self, dtable_uuid_repo_ids, db_session):
143-
dtable_uuid_sizes = []
144-
for dtable_uuid, repo_id in dtable_uuid_repo_ids:
145-
try:
146-
asset_path = f'/asset/{uuid_str_to_36_chars(dtable_uuid)}'
147-
asset_dir_id = seafile_api.get_dir_id_by_path(repo_id, asset_path)
148-
if not asset_dir_id:
149-
dtable_uuid_sizes.append([uuid_str_to_32_chars(dtable_uuid), 0])
150-
size = seafile_api.get_file_count_info_by_path(repo_id, asset_path).size
151-
dtable_uuid_sizes.append([uuid_str_to_32_chars(dtable_uuid), size])
152-
logger.debug('redis repo: %s dtable_uuid: %s size: %s', repo_id, dtable_uuid, size)
153-
except Exception as e:
154-
logger.exception(e)
155-
logger.error('check repo: %s dtable: %s asset size error: %s', repo_id, dtable_uuid, e)
156-
logger.debug('redis totally need to update dtable: %s', len(dtable_uuid_sizes))
157-
update_dtable_asset_sizes(dtable_uuid_sizes, db_session)

0 commit comments

Comments
 (0)