Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,19 @@ repo.getIssues(false).ifPresent(issueData -> issueData.forEach(issue -> {
System.out.println(comment.user.username + ": " + comment.body));
}));
```

### Further data processing

The data extracted by this tool can be further processed, for example using the `run-issues.py` script from the tool [`codeface-extraction`](https://github.com/se-sic/codeface-extraction). This organizes and unifies the issue data into a single csv-like .list file. It also allows for synchronization with data from other data extraction tools, such as `codeface`.

### `referenced` events

`referenced` events are events generated in an issue if a commit references that issue in its commit message. The intended behavior is that the event is present in the issue's event data, and the commit is again present in the related commits of the issue. This does not work if it is not possible to fetch that commit. In this case, the event still exists, but it contains a link to a commit that the api cannot resolve, meaning that no data about the commit can be accessed.
Known causes of this include:

- a commit was rebased and changed/removed
- an external repository was deleted
- the commit's branch was deleted

Note that the commit might still be reachable until the automatic garbage collection has removed it from the remote repository.
In itself, this is not problematic. However, when further processing the data using `codeface-extraction`, this may lead to these `referenced` events being present in the final data, even though they should be filtered out as part of the issue processing.
51 changes: 51 additions & 0 deletions src/de/uni_passau/fim/gitwrapper/EventData.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
/**
* Copyright (C) 2016-2018 Florian Heck
* Copyright (C) 2019 Thomas Bock
* Copyright (C) 2025 Leo Sendelbach
* Copyright (C) 2025 Shiraz Jafri
*
* This file is part of GitHubWrapper.
*
Expand Down Expand Up @@ -33,6 +35,7 @@ public abstract class EventData {
UserData user;
OffsetDateTime created_at;
String event;
Long id;

/**
* The User that created the Event.
Expand Down Expand Up @@ -193,4 +196,52 @@ public UserData getAssigner() {
return assigner;
}
}

/**
* An Event generated by changing the state of an issue.
*/
public class StateChangedEventData extends EventData {

@Expose(deserialize = false)
Commit commit;
StateReason state_reason;

/**
* The commit references.
*/
public Commit getCommit() {
return commit;
}

/**
* The reason for the state change.
*/
public StateReason getStateReason() {
return state_reason;
}
}

/**
* An Event generated by changing the type of an issue.
*/
public class IssueTypeChangedEventData extends EventData {
}

/**
* An Event generated by changing the parent issue of an issue.
*/
public class ParentIssueChangedEventData extends EventData {
}

/**
* An Event generated by changing the sub-issue of an issue.
*/
public class SubIssueChangedEventData extends EventData {
}

/**
* An Event generated by connecting to a repository.
*/
public class ConnectedEventData extends EventData {
}
}
93 changes: 90 additions & 3 deletions src/de/uni_passau/fim/gitwrapper/EventDataProcessor.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
/**
* Copyright (C) 2016-2018 Florian Heck
* Copyright (C) 2019 Thomas Bock
* Copyright (C) 2025 Leo Sendelbach
* Copyright (C) 2025 Shiraz Jafri
*
* This file is part of GitHubWrapper.
*
Expand Down Expand Up @@ -43,7 +45,16 @@ class EventDataProcessor implements JsonDeserializer<EventData>, JsonSerializer<
map.put("unlabeled", EventData.LabeledEventData.class);
map.put("referenced", EventData.ReferencedEventData.class);
map.put("merged", EventData.ReferencedEventData.class);
map.put("closed", EventData.ReferencedEventData.class);
map.put("closed", EventData.StateChangedEventData.class);
map.put("reopened", EventData.StateChangedEventData.class);
map.put("connected", EventData.ConnectedEventData.class);
map.put("issue_type_added", EventData.IssueTypeChangedEventData.class);
map.put("issue_type_changed", EventData.IssueTypeChangedEventData.class);
map.put("issue_type_removed", EventData.IssueTypeChangedEventData.class);
map.put("parent_issue_added", EventData.ParentIssueChangedEventData.class);
map.put("parent_issue_removed", EventData.ParentIssueChangedEventData.class);
map.put("sub_issue_added", EventData.SubIssueChangedEventData.class);
map.put("sub_issue_removed", EventData.SubIssueChangedEventData.class);
map.put("review_requested", EventData.RequestedReviewEventData.class);
map.put("review_request_removed", EventData.RequestedReviewEventData.class);
map.put("review_dismissed", EventData.DismissedReviewEventData.class);
Expand Down Expand Up @@ -86,8 +97,12 @@ public void postDeserialize(EventData.ReferencedEventData result, JsonElement sr
}

result.commit = repo.getGithubCommit(hash.getAsString()).orElseGet(() -> {
LOG.warning("Found commit unknown to GitHub and local git repo: " + hash);
return null;
LOG.warning("Found commit unknown to GitHub and local git repo: " + hash + " Retry using url...");
JsonElement url = src.getAsJsonObject().get("commit_url");
return repo.getGithubCommitUrl(hash.getAsString(), url.getAsString()).orElseGet(() -> {
LOG.warning("Could not find commit: " + hash);
return null;
});
});
}

Expand Down Expand Up @@ -158,4 +173,76 @@ public void postDeserialize(EventData.AssignedEventData result, JsonElement src,
@Override
public void postSerialize(JsonElement result, EventData.AssignedEventData src, Gson gson) { }
}

/**
* Processor for state change events.
*/
static class StateChangedEventProcessor implements PostProcessor<EventData.StateChangedEventData> {

private GitHubRepository repo;

/**
* Creates a new EventDataProcessor for the given repo.
*
* @param repo
* the repo
*/
StateChangedEventProcessor(GitHubRepository repo) {
this.repo = repo;
}

@Override
public void postDeserialize(EventData.StateChangedEventData result, JsonElement src, Gson gson) {
JsonElement stateReasonElement = src.getAsJsonObject().get("state_reason");
String stateReasonValue = (stateReasonElement != null && !stateReasonElement.isJsonNull())
? stateReasonElement.getAsString()
: null;
result.state_reason = StateReason.getFromString(stateReasonValue);

JsonElement hash = src.getAsJsonObject().get("commit_id");
if (hash.isJsonNull()) {
return;
}

result.commit = repo.getGithubCommit(hash.getAsString()).orElseGet(() -> {
LOG.warning("Found commit unknown to GitHub and local git repo: " + hash + " Retry using url...");
JsonElement url = src.getAsJsonObject().get("commit_url");
return repo.getGithubCommitUrl(hash.getAsString(), url.getAsString()).orElseGet(() -> {
LOG.warning("Could not find commit: " + hash);
return null;
});
});
}

@Override
public void postSerialize(JsonElement result, EventData.StateChangedEventData src, Gson gson) { }
}

/**
* Processor for issue type change events.
*/
static class IssueTypeChangedEventProcessor implements PostProcessor<EventData.IssueTypeChangedEventData> {

@Override
public void postDeserialize(EventData.IssueTypeChangedEventData result, JsonElement src, Gson gson) {
}

@Override
public void postSerialize(JsonElement result, EventData.IssueTypeChangedEventData src, Gson gson) {
}
}

/**
* Processor for connected events.
*/
static class ConnectedEventProcessor implements PostProcessor<EventData.ConnectedEventData> {

@Override
public void postDeserialize(EventData.ConnectedEventData result, JsonElement src, Gson gson) {
}

@Override
public void postSerialize(JsonElement result, EventData.ConnectedEventData src, Gson gson) {
}
}
}
21 changes: 21 additions & 0 deletions src/de/uni_passau/fim/gitwrapper/GitHubCommit.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* Copyright (C) 2019 Thomas Bock
* Copyright (C) 2025 Leo Sendelbach
*
* This file is part of GitHubWrapper.
*
Expand All @@ -26,6 +27,7 @@ public class GitHubCommit extends Commit {
private String authorUsername;
private String committerUsername;
private boolean addedToPullRequest = false;
private boolean external = false;

/**
* Constructs a new {@link GitHubCommit} with the given <code>id</code> made in the <code>repo</code>.
Expand Down Expand Up @@ -119,4 +121,23 @@ public boolean isAddedToPullRequest() {
void setAddedToPullRequest(boolean added) {
this.addedToPullRequest = added;
}

/**
* Returns whether this commit is an external commit.
*
* @return whether this commit is an external commit
*/
boolean isExternal() {
return this.external;
}

/**
* Sets whether this commit is an external commit
*
* @param external this commit is an external commit
*/
void setExternal(boolean external) {
this.external = external;
}

}
41 changes: 40 additions & 1 deletion src/de/uni_passau/fim/gitwrapper/GitHubRepository.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
* Copyright (C) 2016-2020 Florian Heck
* Copyright (C) 2018 Claus Hunsen
* Copyright (C) 2019-2021 Thomas Bock
* Copyright (C) 2025 Leo Sendelbach
* Copyright (C) 2025 Shiraz Jafri
*
* This file is part of GitHubWrapper.
*
Expand Down Expand Up @@ -241,6 +243,9 @@ public GitHubRepository(String url, File dir, GitWrapper git, List<String> oauth
gfb.registerPostProcessor(EventData.LabeledEventData.class, new EventDataProcessor.LabeledEventProcessor());
gfb.registerPostProcessor(EventData.DismissedReviewEventData.class, new EventDataProcessor.DismissedReviewEventProcessor());
gfb.registerPostProcessor(EventData.AssignedEventData.class, new EventDataProcessor.AssignedEventProcessor());
gfb.registerPostProcessor(EventData.StateChangedEventData.class, new EventDataProcessor.StateChangedEventProcessor(this));
gfb.registerPostProcessor(EventData.IssueTypeChangedEventData.class, new EventDataProcessor.IssueTypeChangedEventProcessor());
gfb.registerPostProcessor(EventData.ConnectedEventData.class, new EventDataProcessor.ConnectedEventProcessor());
gfb.registerPostProcessor(ReviewData.ReviewInitialCommentData.class, new ReviewDataProcessor.ReviewInitialCommentDataProcessor(this));
GsonBuilder gb = gfb.createGsonBuilder();
gb.registerTypeAdapter(Commit.class, new CommitProcessor(this, userProcessor));
Expand Down Expand Up @@ -352,6 +357,8 @@ public Optional<List<IssueData>> getIssues(boolean includePullRequests, OffsetDa
}
else timeLimit = "";
Type finalType = type;
// For debugging, you may add additional parameters to the string. For example, '/issues?creator=username&state=all'
// will fetch issues created by the specified and all related issues and commits.
getJSONStringFromPath("/issues?state=all" + timeLimit).map(json -> {
List<IssueData> data;
try {
Expand All @@ -367,7 +374,7 @@ public Optional<List<IssueData>> getIssues(boolean includePullRequests, OffsetDa
threadPool.submit(() -> data.parallelStream().forEach(IssueData::freeze));

} catch (JsonSyntaxException e) {
LOG.warning("Encountered invalid JSON: " + json);
LOG.warning("Encountered invalid JSON: " + json + "\n\n" + e.getMessage() + "\n\n" + e);
return null;
}
return data;
Expand Down Expand Up @@ -1028,6 +1035,38 @@ Optional<GitHubCommit> getGithubCommit(String hash) {
});
}

Optional<GitHubCommit> getGithubCommitUrl(String hash, String url) {
if (offline.get()) {
return Optional.of(getGHCommitUnchecked(DummyCommit.DUMMY_COMMIT_ID));
} else {
try {
Optional<GitHubCommit> res = getJSONStringFromURL(url).map(commitInfo ->
gson.fromJson(commitInfo, new TypeToken<GitHubCommit>() {}.getType()));
checkedHashes.put(hash, res);
if (res.isPresent()) {
res.get().setExternal(true);
}
return res;
} catch (JsonSyntaxException e) {
/* For whatever reason, the JSON String is malformed, perhaps due to ill-encoded characters
* in patches within the files element of the JSON String.
* Due to that, get the JSON String again and remove the content of the files element of the
* JSON String, as it is not needed for further processing.
*/
LOG.info("Malformed JSON String when querying data for commit " + url + ". Neglect files element.");
String jsonStringFromURL = getJSONStringFromURL(url).get();
jsonStringFromURL = StringUtils.substringBefore(jsonStringFromURL, "\"files\":[");
jsonStringFromURL = jsonStringFromURL + "\"files\":[]}";
Optional<GitHubCommit> res = Optional.of(gson.fromJson(jsonStringFromURL, new TypeToken<GitHubCommit>() {}.getType()));
checkedHashes.put(hash, res);
if (res.isPresent()) {
res.get().setExternal(true);
}
return res;
}
}
}

/**
* Creates a new Commit with the given data, and tries to fill in the missing data from the local Repository
*
Expand Down
23 changes: 23 additions & 0 deletions src/de/uni_passau/fim/gitwrapper/IssueData.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
/**
* Copyright (C) 2016-2018 Florian Heck
* Copyright (C) 2019 Thomas Bock
* Copyright (C) 2025 Leo Sendelbach
* Copyright (C) 2025 Shiraz Jafri
*
* This file is part of GitHubWrapper.
*
Expand Down Expand Up @@ -39,6 +41,7 @@ public class IssueData implements GitHubRepository.IssueDataCached {
UserData user;

@Expose(deserialize = false) State state;
@Expose(deserialize = false) TypeData type;
OffsetDateTime created_at;
@Nullable OffsetDateTime closed_at;

Expand All @@ -51,6 +54,7 @@ public class IssueData implements GitHubRepository.IssueDataCached {
private List<ReviewData> reviewsList;
private List<ReferencedLink<GitHubCommit>> relatedCommits;
List<ReferencedLink<Integer>> relatedIssues;
private List<Integer> subIssues;

transient GitHubRepository repo;
private transient boolean frozen;
Expand Down Expand Up @@ -100,6 +104,16 @@ void setRelatedCommits(List<ReferencedLink<GitHubCommit>> commits) {
relatedCommits = commits;
}

/**
* Sets a list of sub-issues to this Issue.
*
* @param issues
* the list of issue numbers
*/
void setSubIssues(List<Integer> issues) {
subIssues = issues;
}

/**
* Sets a list of related Issues (rather their numbers) to this Issue
* from links containing just issues numbers.
Expand Down Expand Up @@ -276,6 +290,15 @@ public List<ReferencedLink<GitHubCommit>> getRelatedCommits() {
return relatedCommits;
}

/**
* Gets a List of all sub-issues that belong to the Issue.
*
* @return a List of sub-issues in form of a list containing their issue numbers
*/
public List<Integer> getSubIssues() {
return subIssues;
}

/**
* Gets a List of all Issues referenced in the Issue and its Comments.
*
Expand Down
Loading