moevm · SaveliyBorivets · Jul 28, 2024 · Jul 29, 2024 · Jul 29, 2024 · Sep 17, 2024
diff --git a/commits_parser.py b/commits_parser.py
@@ -2,23 +2,19 @@
 import pytz
 from time import sleep
 from github import Github, Repository, GithubException, PullRequest
-
-EMPTY_FIELD = 'Empty field'
-TIMEDELTA = 0.05
-TIMEZONE = 'Europe/Moscow'
-FIELDNAMES = ('repository name', 'author name', 'author login', 'author email', 'date and time', 'changed files', 'commit id', 'branch')
+from constants import EMPTY_FIELD, TIMEDELTA, TIMEZONE, FORKED_REPO, ORIG_REPO_COMMITS, COMMIT_FIELDNAMES
 
 def log_commit_to_csv(info, csv_name):
     with open(csv_name, 'a', newline='') as file:
-        writer = csv.DictWriter(file, fieldnames=FIELDNAMES)
+        writer = csv.DictWriter(file, fieldnames=COMMIT_FIELDNAMES)
         writer.writerow(info)
 
 
 def log_commit_to_stdout(info):
     print(info)
 
 
-def log_repository_commits(repository: Repository, csv_name, start, finish, branch):
+def log_repository_commits(repository: Repository, csv_name, start, finish, branch, fork_flag):
     branches = []
     match branch:
         case 'all':
@@ -37,11 +33,13 @@ def log_repository_commits(repository: Repository, csv_name, start, finish, bran
                     pytz.timezone(TIMEZONE)) < start or commit.commit.author.date.astimezone(
                 pytz.timezone(TIMEZONE)) > finish:
                 continue
-            if commit.commit is not None:
+            if commit.commit is not None and commit.commit.sha not in ORIG_REPO_COMMITS:
                 nvl = lambda val: val or EMPTY_FIELD
                 commit_data = [repository.full_name, commit.commit.author.name, nvl(commit.author.login), nvl(commit.commit.author.email),
-                               commit.commit.author.date, '; '.join([file.filename for file in commit.files]), commit.commit.sha, branch]
-                info = dict(zip(FIELDNAMES, commit_data))
+                               commit.commit.author.date, '; '.join([file.filename for file in commit.files]), commit.commit.sha, branch, commit.stats.additions, commit.stats.deletions]
+                info = dict(zip(COMMIT_FIELDNAMES, commit_data))
+                if fork_flag:
+                    ORIG_REPO_COMMITS.append(info['commit id'])
 
                 log_commit_to_csv(info, csv_name)
                 log_commit_to_stdout(info)
@@ -51,18 +49,19 @@ def log_repository_commits(repository: Repository, csv_name, start, finish, bran
 def log_commits(client: Github, working_repos, csv_name, start, finish, branch, fork_flag):
     with open(csv_name, 'w', newline='') as file:
         writer = csv.writer(file)
-        writer.writerow(FIELDNAMES)
+        writer.writerow(COMMIT_FIELDNAMES)
 
 
     for repo in working_repos:
         try:
             print('=' * 20, repo.full_name, '=' * 20)
-            log_repository_commits(repo, csv_name, start, finish, branch)
+            log_repository_commits(repo, csv_name, start, finish, branch, fork_flag)
             if fork_flag:
                 for forked_repo in repo.get_forks():
                     print('=' * 20, "FORKED:", forked_repo.full_name, '=' * 20)
-                    log_repository_commits(forked_repo, csv_name, start, finish, branch)
+                    log_repository_commits(forked_repo, csv_name, start, finish, branch, FORKED_REPO)
                     sleep(TIMEDELTA)
             sleep(TIMEDELTA)
+            ORIG_REPO_COMMITS.clear()
         except Exception as e:
             print(e)
diff --git a/constants.py b/constants.py
@@ -0,0 +1,76 @@
+#For all
+EMPTY_FIELD = 'Empty field'
+TIMEDELTA = 0.05
+TIMEZONE = 'Europe/Moscow'
+
+#Fieldnames
+REPO_NAME = 'repository name'
+AUTHOR_NAME = 'author name'
+AUTHOR_LOGIN = 'author login'
+AUTHOR_EMAIL = 'author email'
+DATE_AND_TIME = 'date and time'
+CHANGED_FILES = 'changed files'
+COMMIT_ID = 'commit id'
+BRANCH = 'branch'
+ADDED_LINES = 'added lines'
+DELETED_LINES = 'deleted lines'
+TITLE = 'title'
+ID = 'id'
+STATE = 'state'
+COMMIT_INTO = 'commit into'
+COMMIT_FROM = 'commit from'
+CREATED_AT = 'created at'
+CREATOR_NAME = 'creator name'
+CREATOR_LOGIN = 'creator login'
+CREATOR_EMAIL = 'creator email'
+COMMENT_BODY = 'comment body'
+COMMENT_CREATED_AT = 'comment created at'
+COMMENT_AUTHOR_NAME = 'comment author name'
+COMMENT_AUTHOR_LOGIN = 'comment author login'
+COMMENT_AUTHOR_EMAIL = 'comment author email'
+MERGER_NAME = 'merger name'
+MERGER_LOGIN = 'merger login'
+MERGER_EMAIL = 'merger email'
+SOURCE_BRANCH = 'source branch'
+TARGET_BRANCH = 'target branch'
+ASSIGNEE_STORY = 'assignee story'
+RELATED_ISSUES = 'related issues'
+LABELS = 'labels'
+MILESTONE = 'milestone'
+NUMBER = 'number'
+TASK = 'task'
+CLOSER_NAME = 'closer name'
+CLOSER_LOGIN = 'closer login'
+CLOSER_EMAIL = 'closer email'
+CLOSED_AT = 'closed at'
+CONNECTED_PULL_REQUESTS = 'connected pull requests'
+INVITED_LOGIN = 'invited login'
+INVITE_CREATION_DATE = 'invite creation date'
+INVITATION_URL = 'invitation url'
+PAGE = 'page'
+ACTION = 'action'
+REVISION_ID = 'revision id'
+
+#For commits
+FORKED_REPO = False
+ORIG_REPO_COMMITS = []
+COMMIT_FIELDNAMES = (REPO_NAME, AUTHOR_NAME, AUTHOR_LOGIN, AUTHOR_EMAIL, DATE_AND_TIME, CHANGED_FILES, COMMIT_ID, BRANCH, ADDED_LINES, DELETED_LINES)
+
+#For pull requests
+PULL_REQUEST_FIELDNAMES = (REPO_NAME, TITLE, ID, STATE, COMMIT_INTO, COMMIT_FROM, CREATED_AT, CREATOR_NAME, CREATOR_LOGIN, CREATOR_EMAIL,
+                           CHANGED_FILES, COMMENT_BODY, COMMENT_CREATED_AT, COMMENT_AUTHOR_NAME, COMMENT_AUTHOR_LOGIN, COMMENT_AUTHOR_EMAIL,
+                           MERGER_NAME, MERGER_LOGIN, MERGER_EMAIL, SOURCE_BRANCH, TARGET_BRANCH, ASSIGNEE_STORY, RELATED_ISSUES, LABELS, MILESTONE)
+
+#For issues
+ISSUE_FIELDNAMES = (REPO_NAME, NUMBER, TITLE, STATE, TASK, CREATED_AT, CREATOR_NAME, CREATOR_LOGIN, CREATOR_EMAIL, CLOSER_NAME, CLOSER_LOGIN,
+                     CLOSER_EMAIL, CLOSED_AT, COMMENT_BODY, COMMENT_CREATED_AT, COMMENT_AUTHOR_NAME, COMMENT_AUTHOR_LOGIN, COMMENT_AUTHOR_EMAIL,
+                     ASSIGNEE_STORY, CONNECTED_PULL_REQUESTS, LABELS, MILESTONE)
+
+#For invites
+INVITE_FIELDNAMES = (REPO_NAME, INVITED_LOGIN, INVITE_CREATION_DATE, INVITATION_URL)
+
+#For wikis
+EMPTY_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" #Хэш пустого дерева для сравнения с первым коммитом. Способ был найден здесь: https://stackoverflow.com/questions/33916648/get-the-diff-details-of-first-commit-in-gitpython
+ACTIVITY = {"A": "Страница добавлена", "M": "Страница изменена", "D": "Страница удалена", "R": "Страница переименована"}
+ENG_ACTIVITY = {"A" : "Page added", "M" : "Page modified", "D" : "Page deleted", "R": "Page renamed"}
+WIKI_FIELDNAMES = (REPO_NAME, AUTHOR_NAME, AUTHOR_LOGIN, DATE_AND_TIME, PAGE, ACTION, REVISION_ID, ADDED_LINES, DELETED_LINES)
diff --git a/git_logger.py b/git_logger.py
@@ -1,8 +1,6 @@
 from github import Github, GithubException, PullRequest
 from time import sleep
-
-TIMEDELTA = 0.05
-TIMEZONE = 'Europe/Moscow'
+from constants import TIMEDELTA, TIMEZONE
 
 def login(token):
     client = Github(login_or_token=token)

diff --git a/invites_parser.py b/invites_parser.py
@@ -4,8 +4,7 @@
 import json
 from time import sleep
 from github import Github, Repository, GithubException, PullRequest
-
-FIELDNAMES = ('repository name', 'invited login', 'invite creation date', 'invitation url')
+from constants import INVITE_FIELDNAMES
 
 def log_inviter(repo, invite):
     invite_info = [repo.full_name, invite.invitee.login, invite.created_at.strftime("%d/%m/%Y, %H:%M:%S"), invite.html_url]
@@ -16,7 +15,7 @@ def log_inviter(repo, invite):
 def log_invitations(client: Github, working_repos, csv_name):
     with open(csv_name, 'w', newline='') as file:
         writer = csv.writer(file)
-        writer.writerow(FIELDNAMES)
+        writer.writerow(INVITE_FIELDNAMES)
         for repo in working_repos:
             print('=' * 20, repo.full_name, '=' * 20)
             invitations = repo.get_pending_invitations()

diff --git a/issues_parser.py b/issues_parser.py
@@ -5,18 +5,11 @@
 from time import sleep
 from git_logger import get_assignee_story
 from github import Github, Repository, GithubException, PullRequest
-
-EMPTY_FIELD = 'Empty field'
-TIMEDELTA = 0.05
-TIMEZONE = 'Europe/Moscow'
-FIELDNAMES = ('repository name', 'number', 'title', 'state', 'task', 'created at', 'creator name', 'creator login',
-              'creator email', 'closer name', 'closer login', 'closer email', 'closed at', 'comment body',
-              'comment created at', 'comment author name', 'comment author login', 'comment author email',
-              'assignee story', 'connected pull requests', 'labels', 'milestone')
+from constants import EMPTY_FIELD, TIMEDELTA, TIMEZONE, ISSUE_FIELDNAMES, COMMENT_BODY, COMMENT_CREATED_AT, COMMENT_AUTHOR_NAME, COMMENT_AUTHOR_LOGIN, COMMENT_AUTHOR_EMAIL
 
 def log_issue_to_csv(info, csv_name):
     with open(csv_name, 'a', newline='') as file:
-        writer = csv.DictWriter(file, fieldnames=FIELDNAMES)
+        writer = csv.DictWriter(file, fieldnames=ISSUE_FIELDNAMES)
         writer.writerow(info)
 
 
@@ -93,36 +86,21 @@ def log_repository_issues(repository: Repository, csv_name, token, start, finish
             continue
         nvl = lambda val: val or EMPTY_FIELD
         get_info = lambda obj, attr: EMPTY_FIELD if obj is None else getattr(obj, attr)
-        info_tmp = {
-            'repository name': repository.full_name, 'number': issue.number, 'title': issue.title,
-            'state': issue.state, 'task': issue.body,
-            'created at': issue.created_at,
-            'creator name': get_info(issue.user, 'name'),
-            'creator login': get_info(issue.user, 'login'),
-            'creator email': get_info(issue.user, 'email'),
-            'closed at': nvl(issue.closed_at),
-            'closer name': get_info(issue.closed_by, 'name'),
-            'closer login': get_info(issue.closed_by, 'login'),
-            'closer email': get_info(issue.closed_by, 'email'),
-            'comment body': EMPTY_FIELD,
-            'comment created at': EMPTY_FIELD,
-            'comment author name': EMPTY_FIELD,
-            'comment author login': EMPTY_FIELD,
-            'comment author email': EMPTY_FIELD,
-            'assignee story': get_assignee_story(issue),
-            'connected pull requests': EMPTY_FIELD if issue.number is None else get_connected_pulls(issue.number, repository.owner, repository.name, token),
-            'labels': EMPTY_FIELD if issue.labels is None else ';'.join([label.name for label in issue.labels]),
-            'milestone': get_info(issue.milestone, 'title')
-        }
+        issue_data = [repository.full_name, issue.number, issue.title, issue.state, issue.body, issue.created_at, get_info(issue.user, 'name'),
+                      get_info(issue.user, 'login'), get_info(issue.user, 'email'), nvl(issue.closed_at), get_info(issue.closed_by, 'name'),
+                      get_info(issue.closed_by, 'login'), get_info(issue.closed_by, 'email'), EMPTY_FIELD, EMPTY_FIELD, EMPTY_FIELD, EMPTY_FIELD,
+                      EMPTY_FIELD, get_assignee_story(issue), EMPTY_FIELD if issue.number is None else get_connected_pulls(issue.number, repository.owner, repository.name, token),
+                      EMPTY_FIELD if issue.labels is None else ';'.join([label.name for label in issue.labels]), get_info(issue.milestone, 'title')]
+        info_tmp = dict(zip(ISSUE_FIELDNAMES, issue_data))
 
         if issue.get_comments().totalCount > 0:
             for comment in issue.get_comments():
                 info = info_tmp
-                info['comment body'] = comment.body
-                info['comment created at'] = comment.created_at
-                info['comment author name'] = comment.user.name
-                info['comment author login'] = comment.user.login
-                info['comment author email'] = comment.user.email
+                info[COMMENT_BODY] = comment.body
+                info[COMMENT_CREATED_AT] = comment.created_at
+                info[COMMENT_AUTHOR_NAME] = comment.user.name
+                info[COMMENT_AUTHOR_LOGIN] = comment.user.login
+                info[COMMENT_AUTHOR_EMAIL] = nvl(comment.user.email)
                 log_issue_to_csv(info, csv_name)
                 log_issue_to_stdout(info)
         else:
@@ -134,7 +112,7 @@ def log_repository_issues(repository: Repository, csv_name, token, start, finish
 def log_issues(client: Github, working_repo, csv_name, token, start, finish, fork_flag):
     with open(csv_name, 'w', newline='') as file:
         writer = csv.writer(file)
-        writer.writerow(FIELDNAMES)
+        writer.writerow(ISSUE_FIELDNAMES)
 
     for repo in working_repo:
         try:

diff --git a/pull_requests_parser.py b/pull_requests_parser.py
@@ -5,23 +5,15 @@
 from time import sleep
 from git_logger import get_assignee_story
 from github import Github, Repository, GithubException, PullRequest
-
-EMPTY_FIELD = 'Empty field'
-TIMEDELTA = 0.05
-TIMEZONE = 'Europe/Moscow'
-FIELDNAMES = ('repository name', 'title', 'id', 'state', 'commit into', 'commit from', 'created at', 'creator name',
-              'creator login', 'creator email', 'changed files', 'comment body',
-              'comment created at', 'comment author name', 'comment author login',
-              'comment author email', 'merger name', 'merger login', 'merger email', 'source branch',
-              'target branch', 'assignee story', 'related issues', 'labels', 'milestone')
+from constants import EMPTY_FIELD, TIMEDELTA, TIMEZONE, PULL_REQUEST_FIELDNAMES, COMMENT_BODY, COMMENT_CREATED_AT, COMMENT_AUTHOR_NAME, COMMENT_AUTHOR_LOGIN, COMMENT_AUTHOR_EMAIL
 
 def log_pr_to_stdout(info):
     print(info)
 
 
 def log_pr_to_csv(info, csv_name):
     with open(csv_name, 'a', newline='') as file:
-        writer = csv.DictWriter(file, fieldnames=FIELDNAMES)
+        writer = csv.DictWriter(file, fieldnames=PULL_REQUEST_FIELDNAMES)
         writer.writerow(info)
 
 
@@ -78,42 +70,22 @@ def log_repositories_pr(repository: Repository, csv_name, token, start, finish):
             continue
         nvl = lambda val: val or EMPTY_FIELD
         get_info = lambda obj, attr: EMPTY_FIELD if obj is None else getattr(obj, attr)
-        info_tmp = {
-            'repository name': repository.full_name,
-            'title': pull.title,
-            'id': pull.number,
-            'state': pull.state,
-            'commit into': pull.base.label,
-            'commit from': pull.head.label,
-            'created at': pull.created_at,
-            'creator name': nvl(pull.user.name),
-            'creator login': pull.user.login,
-            'creator email': pull.user.email,
-            'changed files': '; '.join([file.filename for file in pull.get_files()]),
-            'comment body': EMPTY_FIELD,
-            'comment created at': EMPTY_FIELD,
-            'comment author name': EMPTY_FIELD,
-            'comment author login': EMPTY_FIELD,
-            'comment author email': EMPTY_FIELD,
-            'merger name': get_info(pull.merged_by, 'name'),
-            'merger login': get_info(pull.merged_by, 'login'),
-            'merger email': get_info(pull.merged_by, 'email'),
-            'source branch': pull.head.ref,
-            'target branch': pull.base.ref,
-            'assignee story': get_assignee_story(pull),
-            'related issues': EMPTY_FIELD if pull.issue_url is None else get_related_issues(pull.number, repository.owner, repository.name, token),
-            'labels': EMPTY_FIELD if pull.labels is None else ';'.join([label.name for label in pull.labels]),
-            'milestone': get_info(pull.milestone, 'title')
-        }
+        pr_data = [repository.full_name, pull.title, pull.number, pull.state, pull.base.label, pull.head.label, pull.created_at,
+                   nvl(pull.user.name), pull.user.login, pull.user.email, '; '.join([file.filename for file in pull.get_files()]),
+                   EMPTY_FIELD, EMPTY_FIELD, EMPTY_FIELD, EMPTY_FIELD, EMPTY_FIELD, get_info(pull.merged_by, 'name'),
+                   get_info(pull.merged_by, 'login'), get_info(pull.merged_by, 'email'), pull.head.ref, pull.base.ref,
+                   get_assignee_story(pull), EMPTY_FIELD if pull.issue_url is None else get_related_issues(pull.number, repository.owner, repository.name, token),
+                   EMPTY_FIELD if pull.labels is None else ';'.join([label.name for label in pull.labels]), get_info(pull.milestone, 'title')]
+        info_tmp = dict(zip(PULL_REQUEST_FIELDNAMES, pr_data))
 
         if pull.get_comments().totalCount > 0:
             for comment in pull.get_comments():
                 info = info_tmp
-                info['comment body'] = comment.body
-                info['comment created at'] = comment.created_at
-                info['comment author name'] = comment.user.name
-                info['comment author login'] = comment.user.login
-                info['comment author email'] = nvl(comment.user.email)
+                info[COMMENT_BODY] = comment.body
+                info[COMMENT_CREATED_AT] = comment.created_at
+                info[COMMENT_AUTHOR_NAME] = comment.user.name
+                info[COMMENT_AUTHOR_LOGIN] = comment.user.login
+                info[COMMENT_AUTHOR_EMAIL] = nvl(comment.user.email)
                 log_pr_to_csv(info, csv_name)
                 log_pr_to_stdout(info)
         else:
@@ -125,7 +97,7 @@ def log_repositories_pr(repository: Repository, csv_name, token, start, finish):
 def log_pull_requests(client: Github, working_repos, csv_name, token, start, finish, fork_flag):
     with open(csv_name, 'w', newline='') as file:
         writer = csv.writer(file)
-        writer.writerow(FIELDNAMES)
+        writer.writerow(PULL_REQUEST_FIELDNAMES)
 
     for repo in working_repos:
         try: