src/timescale/repo-devstats.service.ts
Properties |
|
Methods |
|
constructor(pullRequestGithubEventsRepository: Repository<DbPullRequestGitHubEvents>, issuesGithubEventsRepository: Repository<DbIssuesGitHubEvents>, pushGithubEventsRepository: Repository<DbPushGitHubEvents>, watchGitHubEventsRepository: Repository<DbWatchGitHubEvents>, forkGitHubEventsRepository: Repository<DbForkGitHubEvents>)
|
||||||||||||||||||
|
Defined in src/timescale/repo-devstats.service.ts:15
|
||||||||||||||||||
|
Parameters :
|
| Async calculateContributorConfidenceByRepoName |
calculateContributorConfidenceByRepoName(repoName: string, range: number)
|
|
Defined in src/timescale/repo-devstats.service.ts:265
|
|
Returns :
Promise<number>
|
| Private Async calculateForkerConfidence |
calculateForkerConfidence(repoName: string, range: number)
|
|
Defined in src/timescale/repo-devstats.service.ts:177
|
|
Returns :
Promise<number>
|
| Async calculateRepoActivityRatio |
calculateRepoActivityRatio(repoName: string, range: number)
|
|
Defined in src/timescale/repo-devstats.service.ts:57
|
|
Returns :
Promise<number>
|
| Private Async calculateStarGazerConfidence |
calculateStarGazerConfidence(repoName: string, range: number)
|
|
Defined in src/timescale/repo-devstats.service.ts:121
|
|
Returns :
Promise<number>
|
| Async findRepoContributors | ||||||||||||
findRepoContributors(owner: string, repo: string, pageOptionsDto: PageOptionsDto)
|
||||||||||||
|
Defined in src/timescale/repo-devstats.service.ts:274
|
||||||||||||
|
Parameters :
Returns :
Promise<string[]>
|
| Async findRepoContributorStats | |||||||||||||||
findRepoContributorStats(owner: string, repo: string, contributor: string, pageOptionsDto: PageOptionsDto)
|
|||||||||||||||
|
Defined in src/timescale/repo-devstats.service.ts:314
|
|||||||||||||||
|
Parameters :
Returns :
Promise<DbRepoContributor>
|
| Private winsorizeAndNormalizeRatio | ||||||
winsorizeAndNormalizeRatio(ratio: number)
|
||||||
|
Defined in src/timescale/repo-devstats.service.ts:40
|
||||||
|
Parameters :
Returns :
number
|
| Private lowerBound |
Type : number
|
|
Defined in src/timescale/repo-devstats.service.ts:37
|
| Private upperBound |
Type : number
|
|
Defined in src/timescale/repo-devstats.service.ts:38
|
import { BadRequestException, Injectable, NotFoundException } from "@nestjs/common";
import { InjectRepository } from "@nestjs/typeorm";
import { Repository } from "typeorm";
import * as math from "mathjs";
import { PageOptionsDto } from "../common/dtos/page-options.dto";
import { avgRepoActivityRatioSample } from "../common/calculations/avg-repo-activity-ratio-sample";
import { DbRepoContributor } from "../repo/entities/repo_contributors.entity";
import { DbPullRequestGitHubEvents } from "./entities/pull_request_github_event.entity";
import { DbIssuesGitHubEvents } from "./entities/issues_github_event.entity";
import { DbPushGitHubEvents } from "./entities/push_github_events.entity";
import { DbWatchGitHubEvents } from "./entities/watch_github_events.entity";
import { DbForkGitHubEvents } from "./entities/fork_github_events.entity";
@Injectable()
export class RepoDevstatsService {
constructor(
@InjectRepository(DbPullRequestGitHubEvents, "TimescaleConnection")
private pullRequestGithubEventsRepository: Repository<DbPullRequestGitHubEvents>,
@InjectRepository(DbIssuesGitHubEvents, "TimescaleConnection")
private issuesGithubEventsRepository: Repository<DbIssuesGitHubEvents>,
@InjectRepository(DbPushGitHubEvents, "TimescaleConnection")
private pushGithubEventsRepository: Repository<DbPushGitHubEvents>,
@InjectRepository(DbWatchGitHubEvents, "TimescaleConnection")
private watchGitHubEventsRepository: Repository<DbWatchGitHubEvents>,
@InjectRepository(DbForkGitHubEvents, "TimescaleConnection")
private forkGitHubEventsRepository: Repository<DbForkGitHubEvents>
) {
// defines quantile boundaries (ignore lower 5% and upper 95% percentiles)
const lowerQ = 0.05;
const upperQ = 0.75;
// calculate quantile values using the constant sampled activity ratios
this.lowerBound = math.quantileSeq(avgRepoActivityRatioSample, lowerQ) as number;
this.upperBound = math.quantileSeq(avgRepoActivityRatioSample, upperQ) as number;
}
private lowerBound: number;
private upperBound: number;
private winsorizeAndNormalizeRatio(ratio: number): number {
/*
* winsorizing the data uses the upper and lower bounds to ignore extreme
* data samples and outliers.
* See the following for more detail: https://en.wikipedia.org/wiki/Winsorizing
*/
const winsorizedRatio = Math.max(this.lowerBound, Math.min(ratio, this.upperBound));
/*
* normalizing the data gives us a common, human readable ratio between 0 - 10
* that denotes the activity ratio.
*/
const normalized = ((winsorizedRatio - this.lowerBound) / (this.upperBound - this.lowerBound)) * 10;
return normalized;
}
async calculateRepoActivityRatio(repoName: string, range: number): Promise<number> {
// push events subquery
const pushCteBuilder = this.pushGithubEventsRepository
.createQueryBuilder("push_github_events")
.select("time_bucket('1 day', event_time)", "day")
.addSelect("repo_name")
.addSelect("COUNT(DISTINCT LOWER(actor_login))", "contributors")
.where("LOWER(repo_name) = LOWER(:repoName)", { repoName })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.groupBy("day")
.addGroupBy("repo_name");
// issues github events CTE
const issuesCteBuilder = this.issuesGithubEventsRepository
.createQueryBuilder("issues_github_events")
.select("time_bucket('1 day', event_time)", "day")
.addSelect("repo_name")
.addSelect("SUM(issue_comments)", "daily_comments")
.where("LOWER(repo_name) = LOWER(:repoName)", { repoName })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.groupBy("day")
.addGroupBy("repo_name");
// pull request github events subquery
const prCteBuilder = this.pullRequestGithubEventsRepository
.createQueryBuilder("pull_request_github_events")
.select("time_bucket('1 day', event_time)", "day")
.addSelect("repo_name")
.addSelect("SUM(pr_commits)", "daily_pr_commits")
.where("LOWER(repo_name) = LOWER(:repoName)", { repoName })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.groupBy("day")
.addGroupBy("repo_name");
const queryBuilder = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.addCommonTableExpression(pushCteBuilder, "contributors")
.setParameters(pushCteBuilder.getParameters())
.addCommonTableExpression(issuesCteBuilder, "issues")
.setParameters(pushCteBuilder.getParameters())
.addCommonTableExpression(prCteBuilder, "prs")
.setParameters(pushCteBuilder.getParameters())
.select("AVG(calculated_activity)", "avg_calculated_activity")
.from(
`(SELECT
co.day,
co.repo_name,
COALESCE((issues.daily_comments + pr.daily_pr_commits) / NULLIF(co.contributors, 0), 0) AS calculated_activity
FROM
contributors co
LEFT JOIN issues ON co.day = issues.day
AND co.repo_name = issues.repo_name
LEFT JOIN prs pr ON co.day = pr.day
AND co.repo_name = pr.repo_name
ORDER BY co.day DESC)`,
"average_activity"
);
const result = await queryBuilder.getRawOne<{ avg_calculated_activity: number }>();
const parsedResult = parseFloat(`${result?.avg_calculated_activity ?? "0"}`);
return this.winsorizeAndNormalizeRatio(parsedResult);
}
private async calculateStarGazerConfidence(repoName: string, range: number): Promise<number> {
let result = 0;
// gets relevant star gazers for the repo
const starGazersQuery = this.watchGitHubEventsRepository.manager
.createQueryBuilder()
.select("DISTINCT LOWER(actor_login) as login")
.from("watch_github_events", "watch_github_events")
.where("LOWER(repo_name) = LOWER(:repoName)", { repoName })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` });
const allStarGazers = await starGazersQuery.getRawMany<{ login: string }>();
if (allStarGazers.length === 0) {
return result;
}
const starGazers = allStarGazers
.map((starGazer) => (starGazer.login ? starGazer.login.toLowerCase() : ""))
.filter((starGazer) => starGazer !== "");
if (starGazers.length === 0) {
return result;
}
const starGazersContributingReposQuery = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.select("count(DISTINCT repo_name) as contributing_repos")
.addSelect("pr_author_login")
.addSelect(`STRING_AGG(DISTINCT repo_name, ',')`, "repo_names")
.from("pull_request_github_events", "pull_request_github_events")
.where("LOWER(pr_author_login) IN (:...logins)", { logins: starGazers })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.groupBy("pr_author_login");
const starGazersContributingReposCount = await starGazersContributingReposQuery.getRawMany<{
contributing_repos: number;
pr_author_login: string;
repo_names: string;
}>();
starGazersContributingReposCount.forEach((repoCount) => {
const foundContribution = repoCount.repo_names.split(",").includes(repoName);
if (foundContribution) {
// someone made a contribution within the time window they starred the repo
result += 1;
} else if (repoCount.contributing_repos > 1) {
// starring a repo is weighted less than forking
result += 0.5;
}
});
return result / starGazers.length;
}
private async calculateForkerConfidence(repoName: string, range: number): Promise<number> {
let result = 0;
// gets relevant forkers for the repo over range of days
const forkerQuery = this.forkGitHubEventsRepository.manager
.createQueryBuilder()
.select("DISTINCT LOWER(actor_login) as login")
.from("fork_github_events", "fork_github_events")
.where("LOWER(repo_name) = LOWER(:repoName)", { repoName })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` });
const allForkers = await forkerQuery.getRawMany<{ login: string }>();
if (allForkers.length === 0) {
return result;
}
const forkers = allForkers
.map((forker) => (forker.login ? forker.login.toLowerCase() : ""))
.filter((forker) => forker !== "");
if (forkers.length === 0) {
return result;
}
const forkerContributingReposQuery = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.select("count(DISTINCT repo_name) as contributing_repos")
.addSelect("pr_author_login")
.addSelect(`STRING_AGG(DISTINCT repo_name, ',')`, "repo_names")
.from("pull_request_github_events", "pull_request_github_events")
.where("LOWER(pr_author_login) IN (:...logins)", { logins: forkers })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.groupBy("pr_author_login");
const forkerContributingReposCount = await forkerContributingReposQuery.getRawMany<{
contributing_repos: number;
pr_author_login: string;
repo_names: string;
}>();
forkerContributingReposCount.forEach((repoCount) => {
const foundContribution = repoCount.repo_names.split(",").includes(repoName);
if (foundContribution) {
// someone made a contribution within the time window they forked the repo
result += 1;
} else if (repoCount.contributing_repos > 1) {
// forking is weighted slightly higher than simply watching a repo
result += 0.75;
}
});
return result / forkers.length;
}
/*
* this is a proof of concept metric called the "Contributor Confidence".
*
* the confidence score is a percentage metric that determines if certain activity on a
* repository (starring, forking, etc.) may result in a meaningful contribution. This can be
* used to determine how likely "fly by" contributors who've contributed meaningfully elsewhere
* within a given time range, are to contribute meaningfully back to the project in question
*
* it is assumed that ranges past 90 days are generally "low confidence". I.e., someone who forked
* a repo half a year ago likely isn't a good indicator of them making a meaningful
* contribution today.
*
* Currently the algorithm exists as:
* --------------------------------------------------------------------------
* Truncate range down to max 90 days
*
* For all stargazers over the time range:
* If they made a contribution to the repo in question:
* Add 1 to score
* Else check if that user has more than one contribution to 2 or more projects:
* Add 0.5 to score
*
* For all forkers over the time range:
* If they made a contribution to the repo in question:
* Add 1 to score
* Else check if that user has more than one contribution to 2 or more projects:
* Add 0.75 to score (forks weight slightly higher)
*
* Finally, calculate:
* score / (# stargazers + # forkers) within time range
* = confidence score as a percentage
*/
async calculateContributorConfidenceByRepoName(repoName: string, range: number): Promise<number> {
range = range > 90 ? 90 : range;
const forkerConfidence = await this.calculateForkerConfidence(repoName, range);
const starGazerConfidence = await this.calculateStarGazerConfidence(repoName, range);
return (forkerConfidence + starGazerConfidence) / 2;
}
async findRepoContributors(owner: string, repo: string, pageOptionsDto: PageOptionsDto): Promise<string[]> {
if (!owner) {
throw new BadRequestException("owner must be a valid github owner");
}
if (!repo) {
throw new BadRequestException("repo must be a valid github repo");
}
const repoName = `${owner}/${repo}`.toLowerCase();
const range = pageOptionsDto.range!;
const usersQuery = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.select("DISTINCT LOWER(actor_login) as login")
.from("pull_request_github_events", "pull_request_github_events")
.where("LOWER(repo_name) = LOWER(:repoName)", { repoName })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.groupBy("login");
const allUsers = await usersQuery.getRawMany<{ login: string }>();
if (allUsers.length === 0) {
return new Array<string>();
}
const users = allUsers.map((user) => (user.login ? user.login.toLowerCase() : "")).filter((user) => user !== "");
if (users.length === 0) {
return new Array<string>();
}
return users;
}
/*
* this method is very similar to finding user list contributor stats but instead
* filters on owner/repo for a given github repo name. It also derives the
* users from distinct pull request contributors to that given repo in the given date range
*/
async findRepoContributorStats(
owner: string,
repo: string,
contributor: string,
pageOptionsDto: PageOptionsDto
): Promise<DbRepoContributor> {
if (!owner) {
throw new BadRequestException("owner must be a valid github owner");
}
if (!repo) {
throw new BadRequestException("repo must be a valid github repo");
}
const repoName = `${owner}/${repo}`.toLowerCase();
const range = pageOptionsDto.range!;
const userCte = this.pullRequestGithubEventsRepository.manager.connection
.createQueryBuilder()
.select("LOWER(:contributor) as login")
.from("pull_request_github_events", "pull_request_github_events")
.limit(1)
.setParameter("contributor", contributor);
const commitsCte = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.select("LOWER(actor_login)", "actor_login")
.addSelect("COALESCE(sum(push_num_commits), 0) AS commits")
.from("push_github_events", "push_github_events")
.where("LOWER(actor_login) = :contributor", { contributor })
.andWhere("push_ref IN ('refs/heads/main', 'refs/heads/master')")
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.andWhere("LOWER(repo_name) = LOWER(:repoName)", { repoName })
.groupBy("LOWER(actor_login)");
const prsCreatedCte = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.select("LOWER(actor_login)", "actor_login")
.addSelect("COALESCE(COUNT(*), 0) AS prs_created")
.from("pull_request_github_events", "pull_request_github_events")
.where("LOWER(actor_login) = :contributor", { contributor })
.andWhere("pr_action = 'opened'")
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.andWhere("LOWER(repo_name) = LOWER(:repoName)", { repoName })
.groupBy("LOWER(actor_login)");
const prsReviewedCte = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.select("LOWER(actor_login)", "actor_login")
.addSelect("COALESCE(COUNT(*), 0) AS prs_reviewed")
.from("pull_request_review_github_events", "pull_request_review_github_events")
.where("LOWER(actor_login) = :contributor", { contributor })
.andWhere("pr_review_action = 'created'")
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.andWhere("LOWER(repo_name) = LOWER(:repoName)", { repoName })
.groupBy("LOWER(actor_login)");
const issuesCreatedCte = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.select("LOWER(actor_login)", "actor_login")
.addSelect("COALESCE(COUNT(*), 0) AS issues_created")
.from("issues_github_events", "issues_github_events")
.where("LOWER(actor_login) = :contributor", { contributor })
.andWhere("issue_action = 'opened'")
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.andWhere("LOWER(repo_name) = LOWER(:repoName)", { repoName })
.groupBy("LOWER(actor_login)");
const commitCommentsCte = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.select("LOWER(actor_login)", "actor_login")
.addSelect("COALESCE(COUNT(*), 0) AS commit_comments")
.from("commit_comment_github_events", "commit_comment_github_events")
.where("LOWER(actor_login) = :contributor", { contributor })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.andWhere("LOWER(repo_name) = LOWER(:repoName)", { repoName })
.groupBy("LOWER(actor_login)");
const issueCommentsCte = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.select("LOWER(actor_login)", "actor_login")
.addSelect("COALESCE(COUNT(*), 0) AS issue_comments")
.from("issue_comment_github_events", "issue_comment_github_events")
.where("LOWER(actor_login) = :contributor", { contributor })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.andWhere("LOWER(repo_name) = LOWER(:repoName)", { repoName })
.groupBy("LOWER(actor_login)");
const prReviewCommentsCte = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.select("LOWER(actor_login)", "actor_login")
.addSelect("COALESCE(COUNT(*), 0) AS pr_review_comments")
.from("pull_request_review_comment_github_events", "pull_request_review_comment_github_events")
.where("LOWER(actor_login) = :contributor", { contributor })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.andWhere("LOWER(repo_name) = LOWER(:repoName)", { repoName })
.groupBy("LOWER(actor_login)");
const entityQb = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.addCommonTableExpression(userCte, "user_stub")
.setParameters(userCte.getParameters())
.addCommonTableExpression(commitsCte, "commits_agg")
.setParameters(commitsCte.getParameters())
.addCommonTableExpression(prsCreatedCte, "prs_created_agg")
.setParameters(prsCreatedCte.getParameters())
.addCommonTableExpression(prsReviewedCte, "prs_reviewed_agg")
.setParameters(prsReviewedCte.getParameters())
.addCommonTableExpression(issuesCreatedCte, "issues_created_agg")
.setParameters(issuesCreatedCte.getParameters())
.addCommonTableExpression(commitCommentsCte, "commit_comments_agg")
.setParameters(commitCommentsCte.getParameters())
.addCommonTableExpression(issueCommentsCte, "issue_comments_agg")
.setParameters(issueCommentsCte.getParameters())
.addCommonTableExpression(prReviewCommentsCte, "pr_review_comments_agg")
.setParameters(prReviewCommentsCte.getParameters())
.addSelect("COALESCE(commits_agg.commits, 0)::INTEGER AS commits")
.addSelect("COALESCE(prs_created_agg.prs_created, 0)::INTEGER AS prs_created")
.addSelect("COALESCE(prs_reviewed_agg.prs_reviewed, 0)::INTEGER AS prs_reviewed")
.addSelect("COALESCE(issues_created_agg.issues_created, 0)::INTEGER AS issues_created")
.addSelect("COALESCE(commit_comments_agg.commit_comments, 0)::INTEGER AS commit_comments")
.addSelect(
`
COALESCE(commits, 0)::INTEGER +
COALESCE(prs_created, 0)::INTEGER +
COALESCE(prs_reviewed, 0)::INTEGER +
COALESCE(issues_created, 0)::INTEGER +
COALESCE(commit_comments, 0)::INTEGER AS total_contributions
`
)
.from("user_stub", "user_stub")
.leftJoin("commits_agg", "commits_agg", "user_stub.login = commits_agg.actor_login")
.leftJoin("prs_created_agg", "prs_created_agg", "user_stub.login = prs_created_agg.actor_login")
.leftJoin("prs_reviewed_agg", "prs_reviewed_agg", "user_stub.login = prs_reviewed_agg.actor_login")
.leftJoin("issues_created_agg", "issues_created_agg", "user_stub.login = issues_created_agg.actor_login")
.leftJoin("commit_comments_agg", "commit_comments_agg", "user_stub.login = commit_comments_agg.actor_login")
.leftJoin("issue_comments_agg", "issue_comments_agg", "commits_agg.actor_login = issue_comments_agg.actor_login")
.leftJoin(
"pr_review_comments_agg",
"pr_review_comments_agg",
"user_stub.login = pr_review_comments_agg.actor_login"
);
const entity: DbRepoContributor | undefined = await entityQb.getRawOne();
if (!entity) {
throw new NotFoundException("could not of derive repo contributor devstats");
}
return entity;
}
}