src/timescale/contrib-stats.service.ts
Methods |
|
constructor(pullRequestGithubEventsRepository: Repository<DbPullRequestGitHubEvents>, watchGitHubEventsRepository: Repository<DbWatchGitHubEvents>, forkGitHubEventsRepository: Repository<DbForkGitHubEvents>, pushGithubEventsService: PushGithubEventsService, pullRequestGithubEventsService: PullRequestGithubEventsService, pullRequestReviewGithubEventsService: PullRequestReviewGithubEventsService, pullRequestReviewCommentGithubEventsService: PullRequestReviewCommentGithubEventsService, issuesGithubEventsService: IssuesGithubEventsService, commitCommentsGithubEventsService: CommitCommentGithubEventsService, issueCommentsGithubEventsService: IssueCommentGithubEventsService)
|
|||||||||||||||||||||||||||||||||
Defined in src/timescale/contrib-stats.service.ts:29
|
|||||||||||||||||||||||||||||||||
Parameters :
|
baseQueryBuilder |
baseQueryBuilder()
|
Defined in src/timescale/contrib-stats.service.ts:48
|
Async calculateContributorConfidenceByUsername |
calculateContributorConfidenceByUsername(username: string, range: number)
|
Defined in src/timescale/contrib-stats.service.ts:738
|
Returns :
Promise<number>
|
Private Async calculateContributorQuality | ||||||
calculateContributorQuality(username: string)
|
||||||
Defined in src/timescale/contrib-stats.service.ts:633
|
||||||
Parameters :
Returns :
Promise<number>
|
Async calculateContributorSwayByUsername | ||||||
calculateContributorSwayByUsername(username: string)
|
||||||
Defined in src/timescale/contrib-stats.service.ts:789
|
||||||
Parameters :
Returns :
Promise<number>
|
Private Async calculateForkerConfidence |
calculateForkerConfidence(username: string, range: number)
|
Defined in src/timescale/contrib-stats.service.ts:469
|
Returns :
Promise<number>
|
Async calculateOpenSourceContributorRating | ||||||
calculateOpenSourceContributorRating(username: string)
|
||||||
Defined in src/timescale/contrib-stats.service.ts:883
|
||||||
Parameters :
Returns :
Promise<number>
|
Private Async calculateStarGazerConfidence |
calculateStarGazerConfidence(username: string, range: number)
|
Defined in src/timescale/contrib-stats.service.ts:538
|
Returns :
Promise<number>
|
Async findAllContributionsByProject | |||||||||
findAllContributionsByProject(pageOptionsDto: ContributionsByProjectDto, users: string[])
|
|||||||||
Defined in src/timescale/contrib-stats.service.ts:168
|
|||||||||
Parameters :
Returns :
Promise<DbContributionsProjects[]>
|
Async findAllContributionsByTimeframe | |||||||||
findAllContributionsByTimeframe(pageOptionsDto: ContributionsTimeframeDto, users: string[])
|
|||||||||
Defined in src/timescale/contrib-stats.service.ts:317
|
|||||||||
Parameters :
Returns :
Promise<DbContributionStatTimeframe[]>
|
Async findAllContributorStats | |||||||||
findAllContributorStats(pageOptionsDto: MostActiveContributorsDto, users: string[])
|
|||||||||
Defined in src/timescale/contrib-stats.service.ts:54
|
|||||||||
Parameters :
Returns :
Promise<DbContributorStat[]>
|
Private Async findContributorStats | |||||||||
findContributorStats(pageOptionsDto: MostActiveContributorsDto, user: string)
|
|||||||||
Defined in src/timescale/contrib-stats.service.ts:76
|
|||||||||
Parameters :
Returns :
Promise<DbContributorStat>
|
import { cpus } from "os";
import { Inject, Injectable, forwardRef } from "@nestjs/common";
import { Repository, SelectQueryBuilder } from "typeorm";
import { InjectRepository } from "@nestjs/typeorm";
import { PromisePool } from "@supercharge/promise-pool";
import { OrderDirectionEnum } from "../common/constants/order-direction.constant";
import {
DbContributionStatTimeframe,
contributionTypeEnum,
} from "../user-lists/entities/contributions-timeframe.entity";
import { ContributionsTimeframeDto } from "../user-lists/dtos/contributions-timeframe.dto";
import { ContributionsByProjectDto } from "../user-lists/dtos/contributions-by-project.dto";
import { DbContributionsProjects } from "../user-lists/entities/contributions-projects.entity";
import { DbPullRequestGitHubEvents } from "./entities/pull_request_github_event.entity";
import { ContributorStatsTypeEnum, MostActiveContributorsDto } from "./dtos/most-active-contrib.dto";
import { DbContributorStat } from "./entities/contributor_devstat.entity";
import { PushGithubEventsService } from "./push_github_events.service";
import { PullRequestGithubEventsService } from "./pull_request_github_events.service";
import { PullRequestReviewGithubEventsService } from "./pull_request_review_github_events.service";
import { IssuesGithubEventsService } from "./issues_github_events.service";
import { CommitCommentGithubEventsService } from "./commit_comment_github_events.service";
import { IssueCommentGithubEventsService } from "./issue_comment_github_events.service";
import { PullRequestReviewCommentGithubEventsService } from "./pull_request_review_comment_github_events.service";
import { DbWatchGitHubEvents } from "./entities/watch_github_events.entity";
import { DbForkGitHubEvents } from "./entities/fork_github_events.entity";
@Injectable()
export class ContributorDevstatsService {
constructor(
@InjectRepository(DbPullRequestGitHubEvents, "TimescaleConnection")
private pullRequestGithubEventsRepository: Repository<DbPullRequestGitHubEvents>,
@InjectRepository(DbWatchGitHubEvents, "TimescaleConnection")
private watchGitHubEventsRepository: Repository<DbWatchGitHubEvents>,
@InjectRepository(DbForkGitHubEvents, "TimescaleConnection")
private forkGitHubEventsRepository: Repository<DbForkGitHubEvents>,
private pushGithubEventsService: PushGithubEventsService,
@Inject(forwardRef(() => PullRequestGithubEventsService))
private pullRequestGithubEventsService: PullRequestGithubEventsService,
private pullRequestReviewGithubEventsService: PullRequestReviewGithubEventsService,
private pullRequestReviewCommentGithubEventsService: PullRequestReviewCommentGithubEventsService,
@Inject(forwardRef(() => IssuesGithubEventsService))
private issuesGithubEventsService: IssuesGithubEventsService,
private commitCommentsGithubEventsService: CommitCommentGithubEventsService,
private issueCommentsGithubEventsService: IssueCommentGithubEventsService
) {}
baseQueryBuilder(): SelectQueryBuilder<DbPullRequestGitHubEvents> {
const builder = this.pullRequestGithubEventsRepository.createQueryBuilder();
return builder;
}
async findAllContributorStats(
pageOptionsDto: MostActiveContributorsDto,
users: string[]
): Promise<DbContributorStat[]> {
const { results, errors } = await PromisePool.withConcurrency(Math.max(2, cpus().length))
.for(users)
.process(async (user) => this.findContributorStats(pageOptionsDto, user));
if (errors.length) {
console.error("Errors occurred:", errors);
}
return results;
}
/*
* this function aggregates contributor stats from multiple events services
* in parallel.
*
* warning! It is assumed that the "users" string input is already valid.
* make all best efforts to validate and filter invalid user strings before calling this
*/
private async findContributorStats(
pageOptionsDto: MostActiveContributorsDto,
user: string
): Promise<DbContributorStat> {
const contribType = pageOptionsDto.contributorType ?? ContributorStatsTypeEnum.all;
const range = pageOptionsDto.range!;
const repos = pageOptionsDto.repos ? pageOptionsDto.repos.toLowerCase().split(",") : undefined;
const statsFunctions = [
async () => this.pushGithubEventsService.getPushCountForLogin(user, contribType, range, repos),
async () => this.pullRequestGithubEventsService.getOpenedPrsCountForAuthor(user, contribType, range, repos),
async () =>
this.pullRequestReviewGithubEventsService.getPrReviewCountForReviewer(user, contribType, range, repos),
async () => this.issuesGithubEventsService.getIssueCountForAuthor(user, contribType, range, repos),
async () =>
this.commitCommentsGithubEventsService.getCommitCommentCountForAuthor(user, contribType, range, repos),
async () => this.issueCommentsGithubEventsService.getIssueCommentCountForAuthor(user, contribType, range, repos),
async () =>
this.pullRequestReviewCommentGithubEventsService.getPrReviewCommentCountForAuthor(
user,
contribType,
range,
repos
),
];
const { results } = await PromisePool.withConcurrency(Math.max(2, cpus().length))
.for(statsFunctions)
.useCorrespondingResults()
.process(async (func) => func());
const [commits, prs_created, prs_reviewed, issues_created, commit_comments, issue_comments, pr_review_comments] =
results;
const contribStat = new DbContributorStat({
login: user,
commits: 0,
prs_created: 0,
prs_reviewed: 0,
issues_created: 0,
commit_comments: 0,
issue_comments: 0,
pr_review_comments: 0,
comments: 0,
total_contributions: 0,
});
if (typeof commits !== "symbol") {
contribStat.commits = commits;
contribStat.total_contributions += commits;
}
if (typeof prs_created !== "symbol") {
contribStat.prs_created = prs_created;
contribStat.total_contributions += prs_created;
}
if (typeof prs_reviewed !== "symbol") {
contribStat.prs_reviewed = prs_reviewed;
contribStat.total_contributions += prs_reviewed;
}
if (typeof issues_created !== "symbol") {
contribStat.issues_created = issues_created;
contribStat.total_contributions += issues_created;
}
if (typeof commit_comments !== "symbol") {
contribStat.commit_comments = commit_comments;
contribStat.comments += commit_comments;
}
if (typeof issue_comments !== "symbol") {
contribStat.issue_comments = issue_comments;
contribStat.comments += issue_comments;
}
if (typeof pr_review_comments !== "symbol") {
contribStat.pr_review_comments = pr_review_comments;
contribStat.comments += pr_review_comments;
}
return contribStat;
}
/*
* this function aggregates contributor stats from multiple events services
* in parallel.
*
* warning! It is assumed that the "users" string input is already valid.
* make all best efforts to validate and filter invalid user strings before calling this
*/
async findAllContributionsByProject(
pageOptionsDto: ContributionsByProjectDto,
users: string[]
): Promise<DbContributionsProjects[]> {
const aggregatedStats: Record<string, DbContributionsProjects> = {};
const range = pageOptionsDto.range!;
const repos = pageOptionsDto.repos ? pageOptionsDto.repos.toLowerCase().split(",") : undefined;
/*
* only process 1 users info at a time since there may be race conditions with
* building the Record.
*/
await PromisePool.withConcurrency(1)
.for(users)
.process(async (user) => {
const statsFunctions = [
async () => this.pushGithubEventsService.getPushEventsAllForLogin(user, range, repos),
async () => this.pullRequestGithubEventsService.getOpenedPullReqEventsForLogin(user, range, repos),
async () =>
this.pullRequestReviewGithubEventsService.getCreatedPullReqReviewEventsForLogin(user, range, repos),
async () => this.issuesGithubEventsService.getCreatedIssueEventsForLogin(user, range, repos),
async () => this.commitCommentsGithubEventsService.getCommitCommentEventsForLogin(user, range, repos),
async () => this.issueCommentsGithubEventsService.getIssueCommentEventsForLogin(user, range, repos),
async () =>
this.pullRequestReviewCommentGithubEventsService.getPullReqReviewCommentEventsForLogin(user, range, repos),
];
const { results, errors } = await PromisePool.withConcurrency(Math.max(2, cpus().length))
.for(statsFunctions)
.useCorrespondingResults()
.process(async (func) => func());
if (errors.length !== 0) {
console.error("Errors occurred:", errors);
}
const [
commits,
prs_created,
prs_reviewed,
issues_created,
commit_comments,
issue_comments,
pr_review_comments,
] = results;
if (typeof commits !== "symbol") {
commits.forEach((commit) => {
if (commit.repo_name in aggregatedStats) {
aggregatedStats[commit.repo_name].commits++;
aggregatedStats[commit.repo_name].total_contributions++;
} else {
aggregatedStats[commit.repo_name] = new DbContributionsProjects();
aggregatedStats[commit.repo_name].repo_name = commit.repo_name;
aggregatedStats[commit.repo_name].commits = 1;
aggregatedStats[commit.repo_name].total_contributions = 1;
}
});
}
if (typeof prs_created !== "symbol") {
prs_created.forEach((pr) => {
if (pr.repo_name in aggregatedStats) {
aggregatedStats[pr.repo_name].prs_created++;
aggregatedStats[pr.repo_name].total_contributions++;
} else {
aggregatedStats[pr.repo_name] = new DbContributionsProjects();
aggregatedStats[pr.repo_name].repo_name = pr.repo_name;
aggregatedStats[pr.repo_name].prs_created = 1;
aggregatedStats[pr.repo_name].total_contributions = 1;
}
});
}
if (typeof prs_reviewed !== "symbol") {
prs_reviewed.forEach((pr) => {
if (pr.repo_name in aggregatedStats) {
aggregatedStats[pr.repo_name].prs_reviewed++;
aggregatedStats[pr.repo_name].total_contributions++;
} else {
aggregatedStats[pr.repo_name] = new DbContributionsProjects();
aggregatedStats[pr.repo_name].repo_name = pr.repo_name;
aggregatedStats[pr.repo_name].prs_reviewed = 1;
aggregatedStats[pr.repo_name].total_contributions = 1;
}
});
}
if (typeof issues_created !== "symbol") {
issues_created.forEach((issue) => {
if (issue.repo_name in aggregatedStats) {
aggregatedStats[issue.repo_name].issues_created++;
aggregatedStats[issue.repo_name].total_contributions++;
} else {
aggregatedStats[issue.repo_name] = new DbContributionsProjects();
aggregatedStats[issue.repo_name].repo_name = issue.repo_name;
aggregatedStats[issue.repo_name].issues_created = 1;
aggregatedStats[issue.repo_name].total_contributions = 1;
}
});
}
if (typeof commit_comments !== "symbol") {
commit_comments.forEach((comment) => {
if (comment.repo_name in aggregatedStats) {
aggregatedStats[comment.repo_name].commit_comments++;
aggregatedStats[comment.repo_name].comments++;
} else {
aggregatedStats[comment.repo_name] = new DbContributionsProjects();
aggregatedStats[comment.repo_name].repo_name = comment.repo_name;
aggregatedStats[comment.repo_name].commit_comments = 1;
aggregatedStats[comment.repo_name].comments = 1;
}
});
}
if (typeof issue_comments !== "symbol") {
issue_comments.forEach((comment) => {
if (comment.repo_name in aggregatedStats) {
aggregatedStats[comment.repo_name].issue_comments++;
aggregatedStats[comment.repo_name].comments++;
} else {
aggregatedStats[comment.repo_name] = new DbContributionsProjects();
aggregatedStats[comment.repo_name].repo_name = comment.repo_name;
aggregatedStats[comment.repo_name].issue_comments = 1;
aggregatedStats[comment.repo_name].comments = 1;
}
});
}
if (typeof pr_review_comments !== "symbol") {
pr_review_comments.forEach((comment) => {
if (comment.repo_name in aggregatedStats) {
aggregatedStats[comment.repo_name].pr_review_comments++;
aggregatedStats[comment.repo_name].comments++;
} else {
aggregatedStats[comment.repo_name] = new DbContributionsProjects();
aggregatedStats[comment.repo_name].repo_name = comment.repo_name;
aggregatedStats[comment.repo_name].pr_review_comments = 1;
aggregatedStats[comment.repo_name].comments = 1;
}
});
}
});
return Object.values(aggregatedStats);
}
async findAllContributionsByTimeframe(
pageOptionsDto: ContributionsTimeframeDto,
users: string[]
): Promise<DbContributionStatTimeframe[]> {
const aggregatedStats: Record<string, DbContributionStatTimeframe> = {};
const range = pageOptionsDto.range!;
const repos = pageOptionsDto.repos ? pageOptionsDto.repos.toLowerCase().split(",") : undefined;
/*
* only process 1 users info at a time since there may be race conditions with
* building the Record.
*/
await PromisePool.withConcurrency(1)
.for(users)
.process(async (user) => {
const statsFunctions = [
async () => this.pushGithubEventsService.getPushEventsAllForLogin(user, range, repos),
async () => this.pullRequestGithubEventsService.getOpenedPullReqEventsForLogin(user, range, repos),
async () =>
this.pullRequestReviewGithubEventsService.getCreatedPullReqReviewEventsForLogin(user, range, repos),
async () => this.issuesGithubEventsService.getCreatedIssueEventsForLogin(user, range, repos),
async () => this.commitCommentsGithubEventsService.getCommitCommentEventsForLogin(user, range, repos),
async () => this.issueCommentsGithubEventsService.getIssueCommentEventsForLogin(user, range, repos),
async () =>
this.pullRequestReviewCommentGithubEventsService.getPullReqReviewCommentEventsForLogin(user, range, repos),
];
const { results, errors } = await PromisePool.withConcurrency(Math.max(2, cpus().length))
.for(statsFunctions)
.useCorrespondingResults()
.process(async (func) => func());
if (errors.length !== 0) {
console.error("Errors occurred:", errors);
}
const [
commits,
prs_created,
prs_reviewed,
issues_created,
commit_comments,
issue_comments,
pr_review_comments,
] = results;
const addToBucket = (eventTime: Date, contributionType: contributionTypeEnum) => {
const bucket = new Date(
eventTime.getUTCFullYear(),
eventTime.getUTCMonth(),
eventTime.getUTCDate()
).toISOString();
if (!(bucket in aggregatedStats)) {
aggregatedStats[bucket] = new DbContributionStatTimeframe();
aggregatedStats[bucket].bucket = bucket;
aggregatedStats[bucket].commits = 0;
aggregatedStats[bucket].prs_created = 0;
aggregatedStats[bucket].prs_reviewed = 0;
aggregatedStats[bucket].issues_created = 0;
aggregatedStats[bucket].commit_comments = 0;
aggregatedStats[bucket].issue_comments = 0;
aggregatedStats[bucket].pr_review_comments = 0;
aggregatedStats[bucket].comments = 0;
aggregatedStats[bucket].total_contributions = 0;
}
switch (contributionType) {
case contributionTypeEnum.commit:
aggregatedStats[bucket].commits++;
aggregatedStats[bucket].total_contributions++;
break;
case contributionTypeEnum.pr:
aggregatedStats[bucket].prs_created++;
aggregatedStats[bucket].total_contributions++;
break;
case contributionTypeEnum.pr_review:
aggregatedStats[bucket].prs_reviewed++;
aggregatedStats[bucket].total_contributions++;
break;
case contributionTypeEnum.issue:
aggregatedStats[bucket].issues_created++;
aggregatedStats[bucket].total_contributions++;
break;
case contributionTypeEnum.commit_comment:
aggregatedStats[bucket].commit_comments++;
aggregatedStats[bucket].comments++;
break;
case contributionTypeEnum.issue_comment:
aggregatedStats[bucket].issue_comments++;
aggregatedStats[bucket].comments++;
break;
case contributionTypeEnum.pr_review_comment:
aggregatedStats[bucket].pr_review_comments++;
aggregatedStats[bucket].comments++;
break;
default:
console.error("got unhandled contributor timeframe enum", contributionType);
break;
}
};
if (typeof commits !== "symbol") {
commits.forEach((commit) => addToBucket(commit.event_time, contributionTypeEnum.commit));
}
if (typeof prs_created !== "symbol") {
prs_created.forEach((pr) => addToBucket(pr.event_time, contributionTypeEnum.pr));
}
if (typeof prs_reviewed !== "symbol") {
prs_reviewed.forEach((pr_review) => addToBucket(pr_review.event_time, contributionTypeEnum.pr_review));
}
if (typeof issues_created !== "symbol") {
issues_created.forEach((issue) => addToBucket(issue.event_time, contributionTypeEnum.issue));
}
if (typeof commit_comments !== "symbol") {
commit_comments.forEach((commit_comment) =>
addToBucket(commit_comment.event_time, contributionTypeEnum.commit_comment)
);
}
if (typeof issue_comments !== "symbol") {
issue_comments.forEach((issue_comment) =>
addToBucket(issue_comment.event_time, contributionTypeEnum.issue_comment)
);
}
if (typeof pr_review_comments !== "symbol") {
pr_review_comments.forEach((pr_review_comment) =>
addToBucket(pr_review_comment.event_time, contributionTypeEnum.pr_review_comment)
);
}
});
return Object.values(aggregatedStats);
}
/*
* used to calculate the contributor confidence for the given user
* by looking at the repos the user forked and if they contributed back within that range.
*/
private async calculateForkerConfidence(username: string, range: number): Promise<number> {
let result = 0;
// gets relevant forkers for the repo over range of days
const forkedReposQuery = this.forkGitHubEventsRepository.manager
.createQueryBuilder()
.select("DISTINCT LOWER(repo_name) as repo_name")
.from("fork_github_events", "fork_github_events")
.where("LOWER(actor_login) = LOWER(:username)", { username })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` });
const allForkedRepos = await forkedReposQuery.getRawMany<{ repo_name: string }>();
if (allForkedRepos.length === 0) {
return result;
}
const forkedRepos = allForkedRepos
.map((fork) => (fork.repo_name ? fork.repo_name.toLowerCase() : ""))
.filter((fork) => fork !== "");
if (forkedRepos.length === 0) {
return result;
}
// forks set for fast lookup
const forkedReposSet = new Set(forkedRepos);
// for each fork, check for corresponding contributions
const contributionsQuery = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.addSelect("DISTINCT repo_name", "repo_name")
.from("pull_request_github_events", "pull_request_github_events")
.where("LOWER(pr_author_login) = :username", { username })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.groupBy("repo_name");
const contributions = await contributionsQuery.getRawMany<{
repo_name: string;
}>();
contributions.forEach((repo_prs) => {
if (forkedReposSet.has(repo_prs.repo_name)) {
/*
* someone made a contribution within the time window they forked the repo
*/
result += 1;
}
});
/*
* someone made a contribution in a repo that wasn't forked in the given
* time range
*/
const madeContributionElsewhere = contributions.some((repo_prs) => !forkedReposSet.has(repo_prs.repo_name));
if (madeContributionElsewhere) {
result += 0.75;
}
return result / forkedRepos.length;
}
/*
* used to calculate the contributor confidence for the given user
* by looking at the repos the user starred and if they contributed back within that range.
*/
private async calculateStarGazerConfidence(username: string, range: number): Promise<number> {
let result = 0;
// gets relevant star gazers for the repo
const starGazedReposQuery = this.watchGitHubEventsRepository.manager
.createQueryBuilder()
.select("DISTINCT LOWER(repo_name) as repo_name")
.from("watch_github_events", "watch_github_events")
.where("LOWER(actor_login) = LOWER(:username)", { username })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` });
const allStarGazedRepos = await starGazedReposQuery.getRawMany<{ repo_name: string }>();
if (allStarGazedRepos.length === 0) {
return result;
}
const starGazedRepos = allStarGazedRepos
.map((starGazedRepo) => (starGazedRepo.repo_name ? starGazedRepo.repo_name.toLowerCase() : ""))
.filter((starGazedRepo) => starGazedRepo !== "");
if (starGazedRepos.length === 0) {
return result;
}
// star gazed set for fast lookup
const starGazedReposSet = new Set(starGazedRepos);
const contributionsQuery = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.select("DISTINCT repo_name", "repo_names")
.from("pull_request_github_events", "pull_request_github_events")
.where("LOWER(pr_author_login) = :username", { username })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.groupBy("repo_name");
const contributions = await contributionsQuery.getRawMany<{
repo_name: string;
}>();
contributions.forEach((contribution) => {
if (starGazedReposSet.has(contribution.repo_name)) {
// someone made a contribution within the time window they star gazed the repo
result += 1;
}
});
/*
* someone made a contribution in a repo that wasn't forked in the given
* time range
*/
const madeContributionElsewhere = contributions.some((repo_prs) => !starGazedReposSet.has(repo_prs.repo_name));
if (madeContributionElsewhere) {
result += 0.5;
}
return result / starGazedRepos.length;
}
/*
* the following is an experimental, proof of concept contributor metric called "Contributor Quality"
*
* This is a calculation of how a user's open source contributions are accepted
* (or rejected) in the open source. The more that are accepted, the higher this score goes.
* This looks over a static period of 90 days so there is less of a chance to
* manipulate or game the system: it looks back far enough that it won't
*
* Much like a credit score, this is a sliding window. So, eventually, after the 90 day period,
* a user's score should change slightly where PRs marked as spam or otherwise
* "hurtful" to the given score drop off and someone can begin "rebuilding" their score.
*
* The algorithm is as follows:
* ----------------------------
* for the individual contributor:
* For each PR in the last 90 days:
* If merged: +3 to score
* If opened: +1 to score
* If closed within 7 days: -1 to score
* If "pr_active_lock_reason" is "spam": -99 to score
* return score as a percentage
*
*
* The following can be used as an estimate / "opinion" on what the various
* scores here mean:
*
* -1 : Special case - Likely a spam account. Use extreme caution!
* 0 - 10 : Of unknown, questionable, or subpar quality
* 10 - 20 : Good!
* 20 - 40 : High quality.
* 40 - 60 : Superb quality, the highest possible.
*
* Scores are returned as percentages of the total max score: this may be used with other
* metrics to weight an overall score.
*/
private async calculateContributorQuality(username: string): Promise<number> {
let result = 0;
const prs = await this.pullRequestGithubEventsService.findAllByPrAuthor(username, {
limit: 1000,
skip: 0,
range: 90,
orderDirection: OrderDirectionEnum.DESC,
});
prs.data.forEach((pr) => {
// found spam PR
if (pr.pr_active_lock_reason && pr.pr_active_lock_reason === "spam") {
result -= 99;
return;
}
// pr was closed within 7 days without merge
const prCreatedAtPlusSevenDays = new Date(pr.pr_created_at!.setDate(pr.pr_created_at!.getDate() + 7));
if (
pr.pr_action === "closed" &&
!pr.pr_is_merged &&
pr.pr_closed_at &&
pr.pr_created_at &&
prCreatedAtPlusSevenDays > pr.pr_closed_at
) {
result -= 1;
return;
}
// pr was merged successfully
if (pr.pr_is_merged) {
result += 3;
return;
}
// pr is opened
if (pr.pr_action === "opened") {
result += 1;
}
});
/*
* short circuit to just return the special case -1 if overall score is below zero
* likely indicates user has created spam PRs
*/
if (result < 0) {
return -1;
}
/*
* if, somehow, someone has surpassed the highest range of our quality score
* (over 60). I.e., they've had just under 1 PR merge every day for the 90 days
*/
if (result > 60) {
result = 60;
}
// return the score as a percent of the max score
return result / 60;
}
/*
* this is a proof of concept metric called the "Contributor Confidence" for individual users.
*
* this confidence score is a percentage metric that determines if certain activities on a
* repository (starring, forking, etc.) may result in a meaningful contribution. This can be
* used to determine how likely a user's "fly by" activites result in meaningful contributions
* within a given time range.
*
* it is assumed that ranges past 90 days are generally "low confidence". I.e., someone who forked
* a repo half a year ago likely isn't a good indicator of them making a meaningful
* contribution today.
*
* This is similar to the contributor confidence that can be applied on a given repo
* but differs in a significant way: there are far fewer points assigned for contributing
* elsewhere. This captures the nuanced nature of an individual making alot of stars/forks
* (which sometimes can just be a simple bookmark) but can surface really supurb
* individuals who make alot of contributions within open source ecosystems.
*
* Currently the algorithm exists as:
* --------------------------------------------------------------------------
* Truncate range down to max 90 days
*
* For all repos a user stargazed over the time range:
* If they made a contribution to the repo in question:
* Add 1 to star gazer score for repo
* Check if user has made a contribution anywhere:
* Add 0.5 to star gazer total score
*
* Star gazer confidence percentage = score / number of starred repos
*
* For all repos a user forked over the time range:
* If they made a contribution to the repo in question:
* Add 1 to forker score for repo
* Check if user has made a contribution anywhere:
* Add 0.75 to forker total score
*
* Forker confidence percentage = score / number of forked repos
*
* Finally, calculate:
* ( Star gazer score / forker score ) / 2
* = confidence score as a percentage
*/
async calculateContributorConfidenceByUsername(username: string, range: number): Promise<number> {
range = range > 90 ? 90 : range;
const forkerConfidence = await this.calculateForkerConfidence(username, range);
const starGazerConfidence = await this.calculateStarGazerConfidence(username, range);
return (forkerConfidence + starGazerConfidence) / 2;
}
/*
* this is a proof of concept metric called the "Contributor Sway" for individual users.
* Sway is how influential someone is in the open source ecosystem. It looks at
* issues, issue comments, pull requests, pull request comments, pull request reviews
* and pull request review comments.
*
* This looks over a period of 90 days to prevent gaming the system and allow for "hurtful"
* incidents to fall off eventually, much like a credit score.
*
*
* Currently the algorithm exists as:
* --------------------------------------------------------------------------
* For all issues created by contributor over 90 days:
* +1 point for opening issues
* +1 point per subsequent comment within issue
* +1 * 1.5 (slightly heavier weight) per reaction on issues
*
* For all pull requests created by contributor over 90 days:
* +1 point for creating pr
* +1 point per subsequent comment or review on pr
*
* For all issue comments (which includes PRs) by contributor:
* +1 point for making issue/pr comment
* +1 * 1.5 (slightly heavier weight) point for issue comment reactions
* +1 * 1.5 (slightly heavier weight) point for pr comment reactions
*
* For all pull request reviews by contributor:
* +1 point for making review
*
* For all pull request review comments by contributor:
* +1 point for making review comment
*
* Return as percentage of the max score
*
*
* This also looks for spammy type content from users. If content that's been
* makred as spam is found, huge penalties are applied.
*
* The max score is 1200: this is a very rough estimate of what a rather active
* contributor might be getting up to. For example, if someone makes 10 comments/issues/pull reqs every day
* and every other one gets of those gets a reaction, that's a pretty good signal they have some solid sway.
*/
async calculateContributorSwayByUsername(username: string): Promise<number> {
const maxScore = 1200;
const range = 90;
let result = 0;
const issuesHisto = await this.issuesGithubEventsService.genIssueHistogram({ range, contributor: username });
issuesHisto.map((bucket) => {
if (bucket.spam_issues !== 0) {
result -= 9999;
}
result += bucket.opened_issues;
result += bucket.comments_on_issues;
result += bucket.reactions_count * 1.5;
});
const prsHisto = await this.pullRequestGithubEventsService.genPrHistogram({ range, contributor: username });
prsHisto.map((bucket) => {
if (bucket.spam_prs !== 0) {
result -= 9999;
}
result += bucket.active_prs;
result += bucket.comments_on_prs;
result += bucket.review_comments_on_prs;
});
const issueCommentsHisto = await this.issueCommentsGithubEventsService.genIssueCommentHistogram({
range,
contributor: username,
});
issueCommentsHisto.map((bucket) => {
result += bucket.all_comments;
result += bucket.issue_comment_reactions_count * 1.5;
result += bucket.pr_comment_reactions_count * 1.5;
});
const prReviewsHisto = await this.pullRequestReviewGithubEventsService.genPrReviewHistogram({
range,
contributor: username,
});
prReviewsHisto.map((bucket) => {
result += bucket.all_reviews;
});
const prReviewCommentsHisto =
await this.pullRequestReviewCommentGithubEventsService.genPullRequestReviewCommentHistogram({
range,
contributor: username,
});
prReviewCommentsHisto.map((bucket) => {
result += bucket.all_review_comments;
});
// minimum score is 0. If someone has negative points, they've created spam issues / prs.
if (result < 0) {
return -1;
}
// when someone surpasses the max score, simply reset them to the max score
if (result > maxScore) {
result = maxScore;
}
// return result as a percentage of the max score
return result / maxScore;
}
/*
* this is a proof of concept metric called the "Open Source Contributor Rating" or "OSCR" for short.
*
* It is a sort of sliding scale "credit score" ratting that goes from 0 - 100%
* which is mostly weighted on the "quality" score which measures user's contributions.
* to open source projects.
*
* The OSCR algorithm is as follows:
* --------------------------------------------------------------------------
* for the given user:
* calculate their confidence score
* calculate their quality score
* calculate their sway score
*
* if quality or sway score is -1:
* short circuit to an OSCR of 0
*
* calculate OSCR by taking 40% quality score, 40% sway score, and 20% confidence score
* Max rating is out of 300
*/
async calculateOpenSourceContributorRating(username: string): Promise<number> {
const maxOscr = 300;
const qualityPercentage = await this.calculateContributorQuality(username);
const confidencePercentage = await this.calculateContributorConfidenceByUsername(username, 90);
const swayPercentage = await this.calculateContributorSwayByUsername(username);
/*
* -1 is a special case indicating some type of spam or problem was detected of the
* Automatically return a 0 for the user's score.
*/
if (qualityPercentage === -1) {
return 0;
}
if (swayPercentage === -1) {
return 0;
}
/*
* apply weighting:
* - confidence has a weight of 20%
* - quality has a weight of 40%
* - sway has a weight of 40%
*/
const weightedConfidence = confidencePercentage * 0.2;
const weightedQuality = qualityPercentage * 0.4;
const weightedSway = swayPercentage * 0.4;
const weightedOscr = weightedConfidence + weightedQuality + weightedSway;
return weightedOscr * maxOscr;
}
}