import { BadRequestException, Inject, Injectable, NotFoundException, forwardRef } from "@nestjs/common";
import { InjectRepository } from "@nestjs/typeorm";
import { Repository, SelectQueryBuilder } from "typeorm";
import { UserPrsDto } from "../user/dtos/user-prs.dto";
import { ContributorHistogramDto } from "../histogram/dtos/contributor.dto";
import { DbContributorHistogram } from "../histogram/entities/contributors.entity";
import { PullRequestHistogramDto } from "../histogram/dtos/pull_request.dto";
import { FilterListContributorsDto } from "../user-lists/dtos/filter-contributors.dto";
import { RepoService } from "../repo/repo.service";
import { PullRequestPageOptionsDto } from "../pull-requests/dtos/pull-request-page-options.dto";
import { RepoSearchOptionsDto } from "../repo/dtos/repo-search-options.dto";
import { PageOptionsDto } from "../common/dtos/page-options.dto";
import { PageMetaDto } from "../common/dtos/page-meta.dto";
import { PageDto } from "../common/dtos/page.dto";
import { GetPrevDateISOString } from "../common/util/datetimes";
import { UserListService } from "../user-lists/user-list.service";
import { PullRequestContributorOptionsDto } from "../pull-requests/dtos/pull-request-contributor-options.dto";
import { DbPullRequestContributor } from "../pull-requests/dtos/pull-request-contributor.dto";
import { PullRequestContributorInsightsDto } from "../pull-requests/dtos/pull-request-contributor-insights.dto";
import { OrderDirectionEnum } from "../common/constants/order-direction.constant";
import { DbPullRequestGitHubEvents } from "./entities/pull_request_github_event.entity";
import { DbPullRequestGitHubEventsHistogram } from "./entities/pull_request_github_events_histogram.entity";
import { DbRossContributorsHistogram, DbRossIndexHistogram } from "./entities/ross_index_histogram.entity";
import { sanitizeRepos } from "./common/repos";
import { DbContributorCounts } from "./entities/contributor_counts.entity";
import { applyContribTypeEnumFilters } from "./common/counts";
import { ContributorStatsTypeEnum } from "./dtos/most-active-contrib.dto";
/*
* pull request events, named "PullRequestEvent" in the GitHub API, are when
* a GitHub actor opens/modifies/closes a pull request.
*
* IMPORTANT NOTE: issue events in this context are for only repo issues.
* This may be confusing because "issues" in the context of the GitHub API refer to BOTH pull
* requests and actual issues. But, pull requests in this service are for only prs on GitHub repos.
* Not repo issues. For creation / edits of GitHub issues, see IssuesGithubEventsService.
*
* for further details, refer to: https://docs.github.com/en/rest/using-the-rest-api/github-event-types?apiVersion=2022-11-28
*/
@Injectable()
export class PullRequestGithubEventsService {
constructor(
@InjectRepository(DbPullRequestGitHubEvents, "TimescaleConnection")
private pullRequestGithubEventsRepository: Repository<DbPullRequestGitHubEvents>,
@Inject(forwardRef(() => RepoService))
private readonly repoService: RepoService,
@Inject(forwardRef(() => UserListService))
private userListService: UserListService
) {}
baseQueryBuilder() {
const builder = this.pullRequestGithubEventsRepository.createQueryBuilder("pull_request_github_events");
return builder;
}
/*
* this CTE gets all pull requests for a given author in a given time window.
* the prs are partitioned by the most recent event (since there may be multiple
* events for any given pr): this way, the most up to date pr events can be used with "row_num = 1"
*/
basePrAuthorCteBuilder(author: string, range: number, prevDays: number) {
const startDate = GetPrevDateISOString(prevDays);
const cteBuilder = this.pullRequestGithubEventsRepository
.createQueryBuilder("pull_request_github_events")
.select("*")
.addSelect(`ROW_NUMBER() OVER (PARTITION BY pr_number, repo_name ORDER BY event_time DESC) AS row_num`)
.where(`LOWER("pull_request_github_events"."pr_author_login") = LOWER(:author)`, { author: author.toLowerCase() })
.andWhere(`:start_date::TIMESTAMP >= "pull_request_github_events"."event_time"`, { start_date: startDate })
.andWhere(`:start_date::TIMESTAMP - :range_interval::INTERVAL <= "pull_request_github_events"."event_time"`, {
start_date: startDate,
range_interval: `${range} days`,
});
return cteBuilder;
}
/*
* this CTE gets all pull requests for a given repo in a given time window.
* the prs are partitioned by the most recent event (since there may be multiple
* events for any given pr): this way, the most up to date pr events can be used with "row_num = 1"
*/
baseRepoCteBuilder(repo: string, range: number, prevDays: number) {
const startDate = GetPrevDateISOString(prevDays);
const cteBuilder = this.pullRequestGithubEventsRepository
.createQueryBuilder("pull_request_github_events")
.select("*")
.addSelect(`ROW_NUMBER() OVER (PARTITION BY pr_number, repo_name ORDER BY event_time DESC) AS row_num`)
.where(`LOWER("pull_request_github_events"."repo_name") = LOWER(:repo_name)`, { repo_name: repo.toLowerCase() })
.andWhere(`:start_date::TIMESTAMP >= "pull_request_github_events"."event_time"`)
.andWhere(`:start_date::TIMESTAMP - :range_interval::INTERVAL <= "pull_request_github_events"."event_time"`, {
start_date: startDate,
range_interval: `${range} days`,
});
return cteBuilder;
}
/*
* this function takes a cte builder and gets the common rows for pull_request_github_events
* off of it. It also builds a cte counter to ensure metadata is built correctly
* for the timescale query.
*/
async execCommonTableExpression(
pageOptionsDto: PageOptionsDto,
cteBuilder: SelectQueryBuilder<DbPullRequestGitHubEvents>
) {
const queryBuilder = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.addCommonTableExpression(cteBuilder, "CTE")
.setParameters(cteBuilder.getParameters())
.select(
`event_id,
pr_number,
pr_state,
pr_is_draft,
pr_is_merged,
pr_mergeable_state,
pr_is_rebaseable,
pr_title,
pr_head_label,
pr_base_label,
pr_head_ref,
pr_base_ref,
pr_author_login,
pr_created_at,
pr_closed_at,
pr_merged_at,
pr_updated_at,
pr_comments,
pr_additions,
pr_deletions,
pr_changed_files,
repo_name,
pr_commits`
)
.from("CTE", "CTE")
.where("row_num = 1")
.offset(pageOptionsDto.skip)
.limit(pageOptionsDto.limit);
const cteCounter = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.addCommonTableExpression(cteBuilder, "CTE")
.setParameters(cteBuilder.getParameters())
.select(`COUNT(*) as count`)
.from("CTE", "CTE")
.where("row_num = 1");
const cteCounterResult = await cteCounter.getRawOne<{ count: number }>();
const itemCount = parseInt(`${cteCounterResult?.count ?? "0"}`, 10);
const entities = await queryBuilder.getRawMany<DbPullRequestGitHubEvents>();
const pageMetaDto = new PageMetaDto({ itemCount, pageOptionsDto });
return new PageDto(entities, pageMetaDto);
}
async execVelocityCommonTableExpression(cteBuilder: SelectQueryBuilder<DbPullRequestGitHubEvents>): Promise<number> {
/*
* use a CTE aggregator to get most recent merged PR events
* and aggregate the velocity over a period of time by:
* average(merged date - opened date)
*/
const queryBuilder = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.addCommonTableExpression(cteBuilder, "CTE")
.setParameters(cteBuilder.getParameters())
.select(`COALESCE(AVG(pr_merged_at::DATE - pr_created_at::DATE), 0)::INTEGER AS velocity`)
.from("CTE", "CTE")
.where("row_num = 1")
.andWhere(`pr_is_merged = true`);
const counterResult = await queryBuilder.getRawOne<{ velocity: number }>();
const avgVelocity = parseInt(`${counterResult?.velocity ?? "0"}`, 10);
return avgVelocity;
}
async findVelocityByPrAuthor(author: string, range: number, prevDaysStartDate: number): Promise<number> {
const cteBuilder = this.basePrAuthorCteBuilder(author, range, prevDaysStartDate);
return this.execVelocityCommonTableExpression(cteBuilder);
}
async findVelocityByRepoName(repo: string, range: number, prevDaysStartDate: number): Promise<number> {
const cteBuilder = this.baseRepoCteBuilder(repo, range, prevDaysStartDate);
return this.execVelocityCommonTableExpression(cteBuilder);
}
counterBaseQueryBuilder(range: number, prevDay: number) {
const startDate = GetPrevDateISOString(prevDay);
const queryBuilder = this.pullRequestGithubEventsRepository
.createQueryBuilder("pull_request_github_events")
.select("COUNT(DISTINCT pr_number)", "count")
.where(`:start_date::TIMESTAMP >= "pull_request_github_events"."event_time"`, { start_date: startDate })
.andWhere(`:start_date::TIMESTAMP - :range_interval::INTERVAL <= "pull_request_github_events"."event_time"`, {
start_date: startDate,
range_interval: `${range} days`,
});
return queryBuilder;
}
async getOpenedPrsCountForAuthor(
username: string,
contribType: ContributorStatsTypeEnum,
range: number,
repos?: string[]
): Promise<number> {
const queryBuilder = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.select("COALESCE(COUNT(*), 0) AS prs_created")
.from("pull_request_github_events", "pull_request_github_events")
.where("LOWER(actor_login) = :username", { username })
.andWhere("pr_action = 'opened'")
.groupBy("LOWER(actor_login)");
if (repos && repos.length > 0) {
queryBuilder.andWhere(`LOWER(repo_name) IN (:...repos)`, { repos });
}
applyContribTypeEnumFilters(contribType, queryBuilder, range);
const result = await queryBuilder.getRawOne<{ prs_created: number }>();
const parsedResult = parseFloat(`${result?.prs_created ?? "0"}`);
return parsedResult;
}
async findCountByPrAuthor(author: string, range: number, prevDaysStartDate: number): Promise<number> {
/*
* because PR events may be "opened" or "closed" many times, this inner CTE query gets similar PRs rows
* based on pr_number for a sole pr author.
* This essentially gives a full picture of all PRs in a timeframe for a given the author
* across all repos they've contributed to regardless of their state.
* since getting the whole count over a time period doesn't need to get the most "up to date" event
* we can just do a count over a whole timerange instead of the cte workflow.
*/
const queryBuilder = this.counterBaseQueryBuilder(range, prevDaysStartDate);
queryBuilder.andWhere(`LOWER("pull_request_github_events"."pr_author_login") = LOWER(:author)`, {
author: author.toLowerCase(),
});
const counterResult = await queryBuilder.getRawOne<{ count: number }>();
const itemCount = parseInt(`${counterResult?.count ?? "0"}`, 10);
return itemCount;
}
async findAllPrAuthorCounts({
range,
prevDaysStartDate,
repoNames,
noBots = false,
}: {
range: number;
prevDaysStartDate: number;
repoNames: string[];
noBots?: boolean;
}): Promise<DbContributorCounts[]> {
const startDate = GetPrevDateISOString(prevDaysStartDate);
const queryBuilder = this.pullRequestGithubEventsRepository
.createQueryBuilder("pull_request_github_events")
.select("pr_author_login", "contributor")
.addSelect("count(*)", "count")
.where(`:start_date::TIMESTAMP >= "pull_request_github_events"."event_time"`, { start_date: startDate })
.andWhere(`:start_date::TIMESTAMP - :range_interval::INTERVAL <= "pull_request_github_events"."event_time"`, {
start_date: startDate,
range_interval: `${range} days`,
})
.andWhere(`"pull_request_github_events"."pr_action" = 'opened'`)
.andWhere(`LOWER("pull_request_github_events"."repo_name") IN (:...repoNames)`, {
repoNames,
})
.groupBy("pr_author_login")
.orderBy("count", "DESC");
if (noBots) {
queryBuilder.andWhere("LOWER(pr_author_login) NOT LIKE '%[bot]%'");
}
return queryBuilder.getRawMany<DbContributorCounts>();
}
async isMaintainer(merger: string, repoIds?: string[]): Promise<boolean> {
const queryBuilder = this.baseQueryBuilder();
queryBuilder
.select("COUNT(*)")
.where(
`"pull_request_github_events"."pr_is_merged" = true AND "pull_request_github_events"."pr_action" = 'closed'`
)
.andWhere(`LOWER("pull_request_github_events"."actor_login") = LOWER(:merger)`, {
merger: merger.toLowerCase(),
});
if (repoIds && repoIds.length > 0) {
const repoIdReqs = repoIds.map(async (repoId) =>
this.repoService
.tryFindRepoOrMakeStub({ repoId: Number(repoId) })
.then((repoInfo) => repoInfo.full_name)
.catch(() => undefined)
);
const repoNames = (await Promise.all(repoIdReqs)).filter((name) => name !== undefined);
if (repoNames.length > 0) {
queryBuilder.andWhere(`LOWER("pull_request_github_events"."repo_name") IN (:...repos)`, { repos: repoNames });
}
}
const countResult = await queryBuilder.getRawOne<{ count: number }>();
if (!countResult) {
return false;
}
return countResult.count !== 0;
}
async getOpenedPullReqEventsForLogin(
username: string,
range: number,
repos?: string[]
): Promise<DbPullRequestGitHubEvents[]> {
const queryBuilder = this.baseQueryBuilder()
.where("LOWER(actor_login) = :username", { username })
.andWhere("pr_action = 'opened'")
.andWhere("event_time > NOW() - :range_interval::INTERVAL", { range_interval: `${range} days` });
if (repos && repos.length > 0) {
queryBuilder.andWhere(`LOWER(repo_name) IN (:...repos)`, { repos });
}
return queryBuilder.getMany();
}
async findAllByPrAuthor(author: string, pageOptionsDto: UserPrsDto): Promise<PageDto<DbPullRequestGitHubEvents>> {
const startDate = GetPrevDateISOString(pageOptionsDto.prev_days_start_date);
const range = pageOptionsDto.range!;
const order = pageOptionsDto.orderDirection!;
const repos = pageOptionsDto.repos ? pageOptionsDto.repos.toLowerCase().split(",") : undefined;
const repoIds = pageOptionsDto.repoIds ? pageOptionsDto.repoIds.toLowerCase().split(",") : undefined;
/*
* because PR events may be "opened" or "closed" many times, this inner CTE query gets similar PRs rows
* based on pr_number and repo_name. This essentially gives a full picture of opened/closed PRs
* and their current state
*/
const cteBuilder = this.pullRequestGithubEventsRepository
.createQueryBuilder("pull_request_github_events")
.select("*")
.addSelect(`ROW_NUMBER() OVER (PARTITION BY pr_number, repo_name ORDER BY event_time ${order}) AS row_num`)
.where(`LOWER("pull_request_github_events"."pr_author_login") = LOWER(:author)`, { author: author.toLowerCase() })
.andWhere(`:start_date::TIMESTAMP >= "pull_request_github_events"."event_time"`, { start_date: startDate })
.andWhere(`:start_date::TIMESTAMP - :range_interval::INTERVAL <= "pull_request_github_events"."event_time"`, {
start_date: startDate,
range_interval: `${range} days`,
})
.orderBy("event_time", order);
if (repos && repos.length > 0) {
cteBuilder.andWhere(`LOWER("pull_request_github_events"."repo_name") IN (:...repos)`, { repos });
}
if (repoIds && repoIds.length > 0) {
cteBuilder.andWhere(`"pull_request_github_events"."repo_id" IN (:...repoIds)`, { repoIds });
}
return this.execCommonTableExpression(pageOptionsDto, cteBuilder);
}
async findAllMergedByRefRepo(repo: string, range: number, ref: string): Promise<DbPullRequestGitHubEvents[]> {
const startDate = GetPrevDateISOString(0);
const queryBuilder = this.baseQueryBuilder()
.where(`LOWER(repo_name) = LOWER(:repo_name)`, { repo_name: repo.toLowerCase() })
.andWhere(`:start_date::TIMESTAMP >= event_time`)
.andWhere(`:start_date::TIMESTAMP - :range_interval::INTERVAL <= event_time`, {
start_date: startDate,
range_interval: `${range} days`,
})
.andWhere("pr_is_merged = TRUE")
.andWhere("pr_base_ref = :ref", { ref });
return queryBuilder.getMany();
}
async findAllWithFilters(pageOptionsDto: PullRequestPageOptionsDto): Promise<PageDto<DbPullRequestGitHubEvents>> {
const startDate = GetPrevDateISOString(pageOptionsDto.prev_days_start_date);
const range = pageOptionsDto.range!;
const order = pageOptionsDto.orderDirection!;
const cteBuilder = this.pullRequestGithubEventsRepository
.createQueryBuilder("pull_request_github_events")
.select("*");
if (pageOptionsDto.distinctAuthors) {
const distinctAuthors = pageOptionsDto.distinctAuthors === "true" || pageOptionsDto.distinctAuthors === "1";
if (distinctAuthors) {
cteBuilder.addSelect(
`ROW_NUMBER() OVER (PARTITION BY pr_author_login, repo_name ORDER BY event_time ${order}) AS row_num`
);
} else {
cteBuilder.addSelect(
`ROW_NUMBER() OVER (PARTITION BY pr_number, repo_name ORDER BY event_time ${order}) AS row_num`
);
}
}
cteBuilder
.orderBy("event_time", order)
.where(`:start_date::TIMESTAMP >= "pull_request_github_events"."event_time"`, { start_date: startDate })
.andWhere(`:start_date::TIMESTAMP - :range_interval::INTERVAL <= "pull_request_github_events"."event_time"`, {
start_date: startDate,
range_interval: `${range} days`,
});
/* filter on PR author / contributor */
if (pageOptionsDto.contributor) {
cteBuilder.andWhere(`LOWER("pull_request_github_events"."pr_author_login") = LOWER(:author)`, {
author: pageOptionsDto.contributor,
});
}
/*
* apply repo specific filters (topics, top 100, etc.) - this captures a few
* pre-defined filters provided by the PullRequestPageOptionsDto.
* This will call out to the API connection to get metadata on the repos.
*/
if (pageOptionsDto.filter || pageOptionsDto.topic) {
const filtersDto: RepoSearchOptionsDto = {
filter: pageOptionsDto.filter,
topic: pageOptionsDto.topic,
limit: 50,
skip: 0,
range,
};
const repos = await this.repoService.findAllWithFilters(filtersDto);
const repoNames = repos.data.map((repo) => repo.full_name.toLowerCase());
cteBuilder.andWhere(`LOWER("pull_request_github_events"."repo_name") IN (:...repoNames)`, {
repoNames,
});
}
/* apply user provided repo name filters */
if (pageOptionsDto.repo) {
cteBuilder.andWhere(`LOWER("pull_request_github_events"."repo_name") IN (:...repoNames)`, {
repoNames: pageOptionsDto.repo.toLowerCase().split(","),
});
}
/* apply filters for consumer provided repo ids */
if (pageOptionsDto.repoIds) {
cteBuilder.andWhere(`"pull_request_github_events"."repo_id" IN (:...repoIds)`, {
repoIds: pageOptionsDto.repoIds.split(","),
});
}
/*
* filter on a given list ID: this uses the API connection to find the usernames
* to use for filtering on the timescale data.
*/
if (pageOptionsDto.listId) {
const filtersDto: FilterListContributorsDto = {
skip: 0,
};
const users = await this.userListService.findContributorsByListId(filtersDto, pageOptionsDto.listId);
const userNames = users.data.map((user) => user.username?.toLowerCase());
cteBuilder.andWhere(`LOWER("pull_request_github_events"."pr_author_login") IN (:...userNames)`, {
userNames,
});
}
/* filter on provided status */
if (pageOptionsDto.status) {
cteBuilder.andWhere(`"pull_request_github_events"."pr_state" = LOWER(:status)`, {
status: pageOptionsDto.status,
});
}
return this.execCommonTableExpression(pageOptionsDto, cteBuilder);
}
async findPrStatsByRepo(
repo: string,
range: number,
prevDaysStartDate: number
): Promise<DbPullRequestGitHubEventsHistogram> {
const startDate = GetPrevDateISOString(prevDaysStartDate);
const queryBuilder = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.addSelect("count(*)", "prs_count")
.addSelect("count(CASE WHEN LOWER(pr_action) = 'closed' AND pr_is_merged = true THEN 1 END)", "accepted_prs")
.addSelect("count(CASE WHEN LOWER(pr_action) = 'opened' AND pr_is_draft = false THEN 1 END)", "open_prs")
.addSelect("count(CASE WHEN LOWER(pr_action) = 'closed' AND pr_is_merged = false THEN 1 END)", "closed_prs")
.addSelect("count(CASE WHEN LOWER(pr_action) = 'opened' AND pr_is_draft = true THEN 1 END)", "draft_prs")
.addSelect("count(CASE WHEN LOWER(pr_action) = 'opened' THEN 1 END)", "active_prs")
.addSelect("count(CASE WHEN pr_active_lock_reason = 'spam' THEN 1 END)", "spam_prs")
.addSelect(
`COALESCE(AVG(CASE WHEN pr_is_merged = true THEN pr_merged_at::DATE - pr_created_at::DATE END), 0)::INTEGER AS pr_velocity`
)
.from("pull_request_github_events", "pull_request_github_events")
.where(`LOWER("pull_request_github_events"."repo_name") = LOWER(:repo_name)`, { repo_name: repo.toLowerCase() })
.andWhere(`:start_date::TIMESTAMP >= "pull_request_github_events"."event_time"`, { start_date: startDate })
.andWhere(`:start_date::TIMESTAMP - :range_interval::INTERVAL <= "pull_request_github_events"."event_time"`, {
start_date: startDate,
range_interval: `${range} days`,
});
const result: DbPullRequestGitHubEventsHistogram | undefined = await queryBuilder.getRawOne();
if (!result) {
throw new NotFoundException();
}
return result;
}
async findRossIndexByRepos(repos: string[], range: number): Promise<DbRossIndexHistogram[]> {
const sanitizedRepos = sanitizeRepos(repos);
const outsideContribsCte = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.select(`time_bucket('7 day', event_time)`, "bucket")
.addSelect("COALESCE(COUNT(DISTINCT pr_author_login), 0) AS contributors")
.from("pull_request_github_events", "pull_request_github_events")
.where("pr_author_association = 'NONE'")
.andWhere("pr_is_merged = TRUE")
.andWhere("LOWER(repo_name) IN (:...sanitizedRepos)", { sanitizedRepos })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.groupBy("bucket");
const totalContribPrsCte = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.select(`time_bucket('7 day', event_time)`, "bucket")
.addSelect("COALESCE(COUNT(DISTINCT pr_number), 0) AS weekly_prs")
.from("pull_request_github_events", "pull_request_github_events")
.andWhere("LOWER(repo_name) IN (:...sanitizedRepos)", { sanitizedRepos })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.groupBy("bucket");
const entityQb = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.addCommonTableExpression(outsideContribsCte, "outside_contributors")
.setParameters(outsideContribsCte.getParameters())
.addCommonTableExpression(totalContribPrsCte, "total_contributor_prs")
.setParameters(totalContribPrsCte.getParameters())
.select("oc.bucket", "bucket")
.addSelect("COALESCE(oc.contributors::FLOAT / NULLIF(prs.weekly_prs, 0), 0)", "index")
.from("outside_contributors", "oc")
.leftJoin("total_contributor_prs", "prs", "oc.bucket = prs.bucket")
.orderBy("oc.bucket", "DESC");
return entityQb.getRawMany<DbRossIndexHistogram>();
}
async findRossContributorsByRepos(repos: string[], range: number): Promise<DbRossContributorsHistogram[]> {
const sanitizedRepos = sanitizeRepos(repos);
const outsideContribsCte = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.select(`time_bucket('7 day', event_time)`, "bucket")
.addSelect("COALESCE(COUNT(DISTINCT pr_author_login), 0) AS contributors")
.from("pull_request_github_events", "pull_request_github_events")
.where("pr_author_association = 'NONE'")
.andWhere("pr_is_merged = TRUE")
.andWhere("LOWER(repo_name) IN (:...sanitizedRepos)", { sanitizedRepos })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.groupBy("bucket");
const returningContribsCte = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.select(`time_bucket('7 day', event_time)`, "bucket")
.addSelect("COALESCE(COUNT(DISTINCT pr_author_login), 0) AS contributors")
.from("pull_request_github_events", "pull_request_github_events")
.where("pr_author_association = 'CONTRIBUTOR'")
.andWhere("pr_is_merged = TRUE")
.andWhere("LOWER(repo_name) IN (:...sanitizedRepos)", { sanitizedRepos })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.groupBy("bucket");
const orgContribsCte = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.select(`time_bucket('7 day', event_time)`, "bucket")
.addSelect("COALESCE(COUNT(DISTINCT pr_author_login), 0) AS contributors")
.from("pull_request_github_events", "pull_request_github_events")
.where("pr_author_association = 'MEMBER'")
.andWhere("pr_is_merged = TRUE")
.andWhere("LOWER(repo_name) IN (:...sanitizedRepos)", { sanitizedRepos })
.andWhere("now() - :range_interval::INTERVAL <= event_time", { range_interval: `${range} days` })
.groupBy("bucket");
const entityQb = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.addCommonTableExpression(outsideContribsCte, "outside_contributors")
.setParameters(outsideContribsCte.getParameters())
.addCommonTableExpression(returningContribsCte, "returning_contributors")
.setParameters(returningContribsCte.getParameters())
.addCommonTableExpression(orgContribsCte, "org_contributors")
.setParameters(orgContribsCte.getParameters())
.select("outside_contributors.bucket", "bucket")
.addSelect("outside_contributors.contributors", "new")
.addSelect("returning_contributors.contributors", "recurring")
.addSelect("org_contributors.contributors", "internal")
.from("outside_contributors", "outside_contributors")
.leftJoin(
"returning_contributors",
"returning_contributors",
"outside_contributors.bucket = returning_contributors.bucket"
)
.leftJoin("org_contributors", "org_contributors", "outside_contributors.bucket = org_contributors.bucket")
.orderBy("outside_contributors.bucket", "DESC");
return entityQb.getRawMany<DbRossContributorsHistogram>();
}
async genPrHistogram(options: PullRequestHistogramDto): Promise<DbPullRequestGitHubEventsHistogram[]> {
if (!options.contributor && !options.repo && !options.topic && !options.filter && !options.repoIds) {
throw new BadRequestException("must provide contributor, repo, topic, filter, or repoIds");
}
const { range } = options;
const order = options.orderDirection ?? OrderDirectionEnum.DESC;
const startDate = GetPrevDateISOString(options.prev_days_start_date ?? 0);
const width = options.width ?? 1;
const cteBuilder = this.pullRequestGithubEventsRepository
.createQueryBuilder("pull_request_github_events")
.select("*")
.addSelect(`ROW_NUMBER() OVER (PARTITION BY pr_number, repo_name ORDER BY event_time ${order}) AS row_num`)
.orderBy("event_time", order)
.where(`:start_date::TIMESTAMP >= "pull_request_github_events"."event_time"`, { start_date: startDate })
.andWhere(`:start_date::TIMESTAMP - :range_interval::INTERVAL <= "pull_request_github_events"."event_time"`, {
start_date: startDate,
range_interval: `${range} days`,
});
/* filter on PR author / contributor */
if (options.contributor) {
cteBuilder.andWhere(`LOWER("pull_request_github_events"."pr_author_login") = LOWER(:author)`, {
author: options.contributor,
});
}
/*
* apply repo specific filters (topics, top 100, etc.) - this captures a few
* pre-defined filters provided by the PullRequestPageOptionsDto.
* This will call out to the API connection to get metadata on the repos.
*/
if (options.filter || options.topic) {
const filtersDto: RepoSearchOptionsDto = {
filter: options.filter,
topic: options.topic,
limit: 50,
skip: 0,
range,
};
const repos = await this.repoService.findAllWithFilters(filtersDto);
const repoNames = repos.data.map((repo) => repo.full_name.toLowerCase());
cteBuilder.andWhere(`LOWER("pull_request_github_events"."repo_name") IN (:...repoNames)`, {
repoNames,
});
}
/* apply user provided repo name filters */
if (options.repo) {
cteBuilder.andWhere(`LOWER("pull_request_github_events"."repo_name") IN (:...repoNames)`, {
repoNames: options.repo.toLowerCase().split(","),
});
}
/* apply filters for consumer provided repo ids */
if (options.repoIds) {
cteBuilder.andWhere(`"pull_request_github_events"."repo_id" IN (:...repoIds)`, {
repoIds: options.repoIds.split(","),
});
}
const queryBuilder = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.addCommonTableExpression(cteBuilder, "CTE")
.setParameters(cteBuilder.getParameters())
.select(`time_bucket(:width_interval::INTERVAL, event_time)`, "bucket")
.addSelect("count(*)", "prs_count")
.addSelect(
"count(CASE WHEN LOWER(pr_author_association) = 'collaborator' THEN 1 END)",
"collaborator_associated_prs"
)
.addSelect(
"count(CASE WHEN LOWER(pr_author_association) = 'contributor' THEN 1 END)",
"contributor_associated_prs"
)
.addSelect("count(CASE WHEN LOWER(pr_author_association) = 'member' THEN 1 END)", "member_associated_prs")
.addSelect("count(CASE WHEN LOWER(pr_author_association) = 'none' THEN 1 END)", "non_associated_prs")
.addSelect("count(CASE WHEN LOWER(pr_author_association) = 'owner' THEN 1 END)", "owner_associated_prs")
.addSelect("count(CASE WHEN LOWER(pr_action) = 'closed' AND pr_is_merged = true THEN 1 END)", "accepted_prs")
.addSelect("count(CASE WHEN LOWER(pr_action) = 'closed' AND pr_is_merged = false THEN 1 END)", "closed_prs")
.addSelect("count(CASE WHEN LOWER(pr_action) = 'opened' AND pr_is_draft = true THEN 1 END)", "draft_prs")
.addSelect("count(CASE WHEN LOWER(pr_action) = 'opened' THEN 1 END)", "active_prs")
.addSelect("count(CASE WHEN pr_active_lock_reason = 'spam' THEN 1 END)", "spam_prs")
.addSelect(
"COALESCE(AVG(CASE WHEN pr_is_merged = true THEN pr_merged_at::DATE - pr_created_at::DATE END), 0)::INTEGER AS pr_velocity"
)
.addSelect("count(pr_comments)", "comments_on_prs")
.addSelect("count(pr_review_comments)", "review_comments_on_prs")
.from("CTE", "CTE")
.where("row_num = 1")
.groupBy("bucket")
.orderBy("bucket", order)
.setParameter("width_interval", `${width} days`);
return queryBuilder.getRawMany<DbPullRequestGitHubEventsHistogram>();
}
async genContributorsHistogram(options: ContributorHistogramDto): Promise<DbContributorHistogram[]> {
if (!options.repo && !options.repoIds) {
throw new BadRequestException("must provide repo or repoIds");
}
const { range } = options;
const order = options.orderDirection ?? OrderDirectionEnum.DESC;
const startDate = GetPrevDateISOString(options.prev_days_start_date ?? 0);
const width = options.width ?? 1;
const queryBuilder = this.pullRequestGithubEventsRepository
.createQueryBuilder("pull_request_github_events")
.select("time_bucket(:width_interval::INTERVAL, event_time)", "bucket")
.addSelect(`count(distinct "pull_request_github_events"."pr_author_login")`, "contributor_count")
.where(`:start_date::TIMESTAMP >= "pull_request_github_events"."event_time"`, { start_date: startDate })
.andWhere(`:start_date::TIMESTAMP - :range_interval::INTERVAL <= "pull_request_github_events"."event_time"`, {
start_date: startDate,
range_interval: `${range} days`,
})
.andWhere(`"pull_request_github_events"."pr_action" = 'opened'`)
.groupBy("bucket")
.orderBy("bucket", order)
.setParameter("width_interval", `${width} days`);
/* apply user provided repo name filters */
if (options.repo) {
queryBuilder.andWhere(`LOWER("pull_request_github_events"."repo_name") IN (:...repoNames)`, {
repoNames: options.repo.toLowerCase().split(","),
});
}
/* apply filters for consumer provided repo ids */
if (options.repoIds) {
queryBuilder.andWhere(`"pull_request_github_events"."repo_id" IN (:...repoIds)`, {
repoIds: options.repoIds.split(","),
});
}
return queryBuilder.getRawMany<DbContributorHistogram>();
}
async searchAuthors(pageOptionsDto: PullRequestContributorOptionsDto): Promise<PageDto<DbPullRequestContributor>> {
if (!pageOptionsDto.repos && !pageOptionsDto.repoIds && !pageOptionsDto.topic && !pageOptionsDto.filter) {
throw new BadRequestException("must provide repo, repoIds, topic, filter");
}
const startDate = GetPrevDateISOString(pageOptionsDto.prev_days_start_date);
const range = pageOptionsDto.range!;
const order = pageOptionsDto.orderDirection!;
/*
* partitions by pr_author_login only and orders by time to get the latest PRs
* for the given subset of prs for a repo
*/
const cteBuilder = this.pullRequestGithubEventsRepository
.createQueryBuilder("pull_request_github_events")
.select(
`
pull_request_github_events.pr_author_login,
pull_request_github_events.pr_author_id,
pull_request_github_events.event_time`
)
.addSelect(
`ROW_NUMBER() OVER (PARTITION BY pull_request_github_events.pr_author_login ORDER BY pull_request_github_events.event_time ${order}) AS row_num`
)
.orderBy("event_time", order)
.where(`:start_date::TIMESTAMP >= "pull_request_github_events"."event_time"`, { start_date: startDate })
.andWhere(`:start_date::TIMESTAMP - :range_interval::INTERVAL <= "pull_request_github_events"."event_time"`, {
start_date: startDate,
range_interval: `${range} days`,
});
/*
* apply repo specific filters (topics, top 100, etc.) - this captures a few
* pre-defined filters provided by the PullRequestPageOptionsDto.
* This will call out to the API connection to get metadata on the repos.
*/
if (pageOptionsDto.filter || pageOptionsDto.topic) {
const filtersDto: RepoSearchOptionsDto = {
filter: pageOptionsDto.filter,
topic: pageOptionsDto.topic,
limit: 50,
skip: 0,
range,
};
const repos = await this.repoService.findAllWithFilters(filtersDto);
const repoNames = repos.data.map((repo) => repo.full_name.toLowerCase());
cteBuilder.andWhere(`LOWER("pull_request_github_events"."repo_name") IN (:...repoNames)`, {
repoNames,
});
}
/* apply user provided repo name filters */
if (pageOptionsDto.repos) {
cteBuilder.andWhere(`LOWER("pull_request_github_events"."repo_name") IN (:...repoNames)`, {
repoNames: pageOptionsDto.repos.toLowerCase().split(","),
});
}
/* apply filters for consumer provided repo ids */
if (pageOptionsDto.repoIds) {
cteBuilder.andWhere(`"pull_request_github_events"."repo_id" IN (:...repoIds)`, {
repoIds: pageOptionsDto.repoIds.split(","),
});
}
const queryBuilder = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.addCommonTableExpression(cteBuilder, "CTE")
.setParameters(cteBuilder.getParameters())
.select(
`
pr_author_login AS author_login,
pr_author_id AS user_id,
event_time AS updated_at
`
)
.from("CTE", "CTE")
.where("row_num = 1")
.offset(pageOptionsDto.skip)
.limit(pageOptionsDto.limit);
const cteCounter = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.addCommonTableExpression(cteBuilder, "CTE")
.setParameters(cteBuilder.getParameters())
.select(`COUNT(*) as count`)
.from("CTE", "CTE")
.where("row_num = 1");
const cteCounterResult = await cteCounter.getRawOne<{ count: number }>();
const itemCount = parseInt(`${cteCounterResult?.count ?? "0"}`, 10);
const entities = await queryBuilder.getRawMany<DbPullRequestContributor>();
const pageMetaDto = new PageMetaDto({ itemCount, pageOptionsDto });
return new PageDto(entities, pageMetaDto);
}
async findAuthorsWithFilters(
pageOptionsDto: PullRequestContributorInsightsDto,
contribType = "all"
): Promise<PageDto<DbPullRequestContributor>> {
const startDate = GetPrevDateISOString(pageOptionsDto.prev_days_start_date);
const range = pageOptionsDto.range!;
const order = pageOptionsDto.orderDirection!;
const repos = pageOptionsDto.repos.toLowerCase();
/*
* partitions by pr_author_login only and orders by time to get the latest PRs
* for the given subset of prs for a repo
*/
const cteBuilder = this.pullRequestGithubEventsRepository
.createQueryBuilder("pull_request_github_events")
.select(
`
pull_request_github_events.pr_author_login,
pull_request_github_events.pr_author_id,
pull_request_github_events.event_time`
)
.addSelect(
`ROW_NUMBER() OVER (PARTITION BY pull_request_github_events.pr_author_login ORDER BY pull_request_github_events.event_time ${order}) AS row_num`
)
.orderBy("event_time", order);
switch (contribType) {
case "active":
/* capture pr authors in current range window */
cteBuilder
.where(`:start_date::TIMESTAMP >= "pull_request_github_events"."event_time"`, { start_date: startDate })
.andWhere(`:start_date::TIMESTAMP - :range_interval::INTERVAL <= "pull_request_github_events"."event_time"`, {
start_date: startDate,
range_interval: `${range} days`,
});
this.applyActiveContributorsFilter(cteBuilder, repos, startDate, range);
break;
case "new":
/* capture pr authors in current range window */
cteBuilder
.where(`:start_date::TIMESTAMP >= "pull_request_github_events"."event_time"`, { start_date: startDate })
.andWhere(`:start_date::TIMESTAMP - :range_interval::INTERVAL <= "pull_request_github_events"."event_time"`, {
start_date: startDate,
range_interval: `${range} days`,
});
this.applyNewContributorsFilter(cteBuilder, repos, startDate, range);
break;
case "alumni": {
/* capture pr authors in previous range window */
cteBuilder
.where(`:start_date::TIMESTAMP >= "pull_request_github_events"."event_time"`, { start_date: startDate })
.andWhere(
`:start_date::TIMESTAMP - :double_range_interval::INTERVAL <= "pull_request_github_events"."event_time"`,
{
start_date: startDate,
double_range_interval: `${range + range} days`,
}
);
this.applyAlumniContributorsFilter(cteBuilder, repos, startDate, range);
break;
}
default:
/* capture pr authors in current range window */
cteBuilder
.where(`:start_date::TIMESTAMP >= "pull_request_github_events"."event_time"`, {
start_date: startDate,
})
.andWhere(`:start_date::TIMESTAMP - :range_interval::INTERVAL <= "pull_request_github_events"."event_time"`, {
start_date: startDate,
range_interval: `${range} days`,
});
break;
}
/* apply user provided repo name filters */
if (pageOptionsDto.repos) {
cteBuilder.andWhere(`LOWER("pull_request_github_events"."repo_name") IN (:...repoNames)`, {
repoNames: pageOptionsDto.repos.toLowerCase().split(","),
});
}
const queryBuilder = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.addCommonTableExpression(cteBuilder, "CTE")
.setParameters(cteBuilder.getParameters())
.select(
`
pr_author_login AS author_login,
pr_author_id AS user_id,
event_time AS updated_at
`
)
.from("CTE", "CTE")
.where("row_num = 1")
.offset(pageOptionsDto.skip)
.limit(pageOptionsDto.limit);
const cteCounter = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.addCommonTableExpression(cteBuilder, "CTE")
.setParameters(cteBuilder.getParameters())
.select(`COUNT(*) as count`)
.from("CTE", "CTE")
.where("row_num = 1");
const cteCounterResult = await cteCounter.getRawOne<{ count: number }>();
const itemCount = parseInt(`${cteCounterResult?.count ?? "0"}`, 10);
const entities = await queryBuilder.getRawMany<DbPullRequestContributor>();
const pageMetaDto = new PageMetaDto({ itemCount, pageOptionsDto });
return new PageDto(entities, pageMetaDto);
}
/*
* the following "apply filter" functions can be used on a table named "pull_request_github_events"
* with a "pr_author_login" column (likely just the raw pull_request_github_events table in Timescale).
* these can be used to filter for users who are:
*
* - active: have made PRs in the previous time range
* and the time range block before (i.e., the last 30 days and the 30 days before that)
*
* - new: have made a PR in the last time range, but not the previous one:
* (i.e., made a PR in the last 30 days, but not he previous 30 days before that)
*
* - alumni: have not made a PR in the last time range block
* but did in the block before that (i.e., did not contribute in the last 30 days,
* but did before that in the previous 30 day block)
*
*/
private leftJoinCurrentMonthPrs(
queryBuilder: SelectQueryBuilder<DbPullRequestGitHubEvents>,
repos = "",
startDate: string,
range = 30
): SelectQueryBuilder<DbPullRequestGitHubEvents> {
queryBuilder.leftJoin(
(qb: SelectQueryBuilder<DbPullRequestGitHubEvents>) => {
qb.select("DISTINCT LOWER(pr_author_login)", "pr_author_login")
.from("pull_request_github_events", "pull_request_github_events")
.where("event_time BETWEEN :start_date::TIMESTAMP - :range_interval::INTERVAL AND :start_date::TIMESTAMP", {
start_date: startDate,
range_interval: `${range} days`,
});
if (repos && repos.length > 0) {
qb.andWhere("LOWER(repo_name) IN (:...repoNames)", {
repoNames: repos.toLowerCase().split(","),
});
}
return qb;
},
"current_month_prs",
"pull_request_github_events.pr_author_login = current_month_prs.pr_author_login"
);
return queryBuilder;
}
private leftJoinPreviousMonthPrs(
queryBuilder: SelectQueryBuilder<DbPullRequestGitHubEvents>,
repos = "",
startDate: string,
range = 30
): SelectQueryBuilder<DbPullRequestGitHubEvents> {
queryBuilder.leftJoin(
(qb: SelectQueryBuilder<DbPullRequestGitHubEvents>) => {
qb.select("DISTINCT LOWER(pr_author_login)", "pr_author_login")
.from("pull_request_github_events", "pull_request_github_events")
.where(
"event_time BETWEEN :start_date::TIMESTAMP - :double_range_interval::INTERVAL AND :start_date::TIMESTAMP - :range_interval::INTERVAL",
{
start_date: startDate,
range_interval: `${range} days`,
double_range_interval: `${range + range} days`,
}
);
if (repos && repos.length > 0) {
qb.andWhere("LOWER(repo_name) IN (:...repoNames)", {
repoNames: repos.toLowerCase().split(","),
});
}
return qb;
},
"previous_month_prs",
"pull_request_github_events.pr_author_login = previous_month_prs.pr_author_login"
);
return queryBuilder;
}
private applyActiveContributorsFilter(
queryBuilder: SelectQueryBuilder<DbPullRequestGitHubEvents>,
repos = "",
startDate: string,
range = 30
): SelectQueryBuilder<DbPullRequestGitHubEvents> {
this.leftJoinCurrentMonthPrs(queryBuilder, repos, startDate, range);
this.leftJoinPreviousMonthPrs(queryBuilder, repos, startDate, range);
queryBuilder
.andWhere(`"previous_month_prs"."pr_author_login" IS NOT NULL`)
.andWhere(`"current_month_prs"."pr_author_login" IS NOT NULL`);
return queryBuilder;
}
private applyNewContributorsFilter(
queryBuilder: SelectQueryBuilder<DbPullRequestGitHubEvents>,
repos = "",
startDate: string,
range = 30
): SelectQueryBuilder<DbPullRequestGitHubEvents> {
this.leftJoinCurrentMonthPrs(queryBuilder, repos, startDate, range);
this.leftJoinPreviousMonthPrs(queryBuilder, repos, startDate, range);
queryBuilder
.andWhere(`"previous_month_prs"."pr_author_login" IS NULL`)
.andWhere(`"current_month_prs"."pr_author_login" IS NOT NULL`);
return queryBuilder;
}
private applyAlumniContributorsFilter(
queryBuilder: SelectQueryBuilder<DbPullRequestGitHubEvents>,
repos = "",
startDate: string,
range = 30
): SelectQueryBuilder<DbPullRequestGitHubEvents> {
this.leftJoinCurrentMonthPrs(queryBuilder, repos, startDate, range);
this.leftJoinPreviousMonthPrs(queryBuilder, repos, startDate, range);
queryBuilder
.andWhere(`"previous_month_prs"."pr_author_login" IS NOT NULL`)
.andWhere(`"current_month_prs"."pr_author_login" IS NULL`);
return queryBuilder;
}
}