import { Injectable } from "@nestjs/common";
import { Repository, SelectQueryBuilder } from "typeorm";
import { InjectRepository } from "@nestjs/typeorm";
import { orderDbContributorStats } from "../timescale/common/most-active-contributors";
import { PageDto } from "../common/dtos/page.dto";
import { DbPullRequestGitHubEvents } from "../timescale/entities/pull_request_github_event.entity";
import { DbContributorStat } from "../timescale/entities/contributor_devstat.entity";
import { ContributionsPageDto } from "../timescale/dtos/contrib-page.dto";
import { ContributionPageMetaDto } from "../timescale/dtos/contrib-page-meta.dto";
import { ContributorDevstatsService } from "../timescale/contrib-stats.service";
import { ContributorStatsTypeEnum, MostActiveContributorsDto } from "../timescale/dtos/most-active-contrib.dto";
import { DbUserListContributor } from "./entities/user-list-contributor.entity";
import { ContributionsTimeframeDto } from "./dtos/contributions-timeframe.dto";
import { DbContributionStatTimeframe } from "./entities/contributions-timeframe.entity";
import { ContributionsByProjectDto } from "./dtos/contributions-by-project.dto";
import { DbContributionsProjects } from "./entities/contributions-projects.entity";
import { TopProjectsDto } from "./dtos/top-projects.dto";
import { DbContributorCategoryTimeframe } from "./entities/contributors-timeframe.entity";
@Injectable()
export class UserListEventsStatsService {
constructor(
@InjectRepository(DbPullRequestGitHubEvents, "TimescaleConnection")
private pullRequestGithubEventsRepository: Repository<DbPullRequestGitHubEvents>,
@InjectRepository(DbUserListContributor, "ApiConnection")
private userListContributorRepository: Repository<DbUserListContributor>,
private contributorDevstatsService: ContributorDevstatsService
) {}
baseQueryBuilder(): SelectQueryBuilder<DbPullRequestGitHubEvents> {
const builder = this.pullRequestGithubEventsRepository.createQueryBuilder();
return builder;
}
private userListUsersQueryBuilder(): SelectQueryBuilder<DbUserListContributor> {
const builder = this.userListContributorRepository.createQueryBuilder("user_list_contributors");
return builder;
}
async findContributorsByType(
listId: string,
range: number,
type: ContributorStatsTypeEnum = ContributorStatsTypeEnum.all,
repos?: string[]
): Promise<string[]> {
const now = new Date().toISOString();
const userListUsersBuilder = this.userListUsersQueryBuilder();
userListUsersBuilder
.leftJoin("users", "users", "user_list_contributors.user_id=users.id")
.where("user_list_contributors.list_id = :listId", { listId });
const allUsers = await userListUsersBuilder.getMany();
if (allUsers.length === 0) {
return [];
}
const users = allUsers
.map((user) => (user.username ? user.username.toLowerCase() : ""))
.filter((user) => user !== "");
if (users.length === 0) {
return [];
}
const userListQueryBuilder =
this.pullRequestGithubEventsRepository.manager.createQueryBuilder() as SelectQueryBuilder<DbPullRequestGitHubEvents>;
userListQueryBuilder.select("DISTINCT users.login", "login");
userListQueryBuilder.from((qb: SelectQueryBuilder<DbPullRequestGitHubEvents>) => {
qb.select("LOWER(actor_login)", "login")
.distinct()
.from("pull_request_github_events", "pull_request_github_events")
.where("LOWER(actor_login) IN (:...users)", { users });
if (repos && repos.length > 0) {
qb.andWhere("LOWER(repo_name) IN (:...repos)", { repos });
}
return qb;
}, "users");
switch (type) {
case ContributorStatsTypeEnum.all:
break;
case ContributorStatsTypeEnum.active:
this.applyActiveContributorsFilter({ queryBuilder: userListQueryBuilder, users, repos, startDate: now, range });
break;
case ContributorStatsTypeEnum.new:
this.applyNewContributorsFilter({ queryBuilder: userListQueryBuilder, users, repos, startDate: now, range });
break;
case ContributorStatsTypeEnum.alumni: {
this.applyAlumniContributorsFilter({ queryBuilder: userListQueryBuilder, users, repos, startDate: now, range });
break;
}
default:
break;
}
const entityQb = this.pullRequestGithubEventsRepository.manager
.createQueryBuilder()
.addCommonTableExpression(userListQueryBuilder, "CTE")
.setParameters(userListQueryBuilder.getParameters())
.select("login")
.from("CTE", "CTE");
const entities = await entityQb.getRawMany<{ login: string }>();
return entities.map((result) => result.login);
}
async findAllListContributorStats(
pageOptionsDto: MostActiveContributorsDto,
listId: string
): Promise<PageDto<DbContributorStat>> {
const userListUsersBuilder = this.userListUsersQueryBuilder();
userListUsersBuilder
.leftJoin("users", "users", "user_list_contributors.user_id=users.id")
.where("user_list_contributors.list_id = :listId", { listId });
const users = await userListUsersBuilder.getMany();
if (users.length === 0) {
return new ContributionsPageDto(
new Array<DbContributorStat>(),
new ContributionPageMetaDto({ itemCount: 0, pageOptionsDto }, 0)
);
}
/*
* ignores 2 usernames that cause problems when crunching this data:
*
* 1. Usernames that somehow are an empty string. This shouldn't happen
* since a username is more or less a required field in the users table.
* but we have seen this from time to time which can cause problems trying
* to crunch timescale data on all an empty username
*
* 2. Ignores bot accounts: many bot accounts make an astronomical number of
* commits / comments / reviews etc. etc. And attempting to crunch all that data
* for the bot accounts won't work and would require massive resources.
*/
const filteredUsers = users
.map((user) => (user.username ? user.username.toLowerCase() : ""))
.filter((user) => user !== "" && !user.endsWith("[bot]"));
if (filteredUsers.length === 0) {
return new ContributionsPageDto(
new Array<DbContributorStat>(),
new ContributionPageMetaDto({ itemCount: 0, pageOptionsDto }, 0)
);
}
const userStats = await this.contributorDevstatsService.findAllContributorStats(pageOptionsDto, filteredUsers);
orderDbContributorStats(pageOptionsDto, userStats);
const { skip } = pageOptionsDto;
const limit = pageOptionsDto.limit!;
const slicedUserStats = userStats.slice(skip, skip + limit);
let totalCount = 0;
userStats.forEach((entity) => {
totalCount += entity.total_contributions;
});
const pageMetaDto = new ContributionPageMetaDto({ itemCount: userStats.length, pageOptionsDto }, totalCount);
return new ContributionsPageDto(slicedUserStats, pageMetaDto);
}
async findContributionsInTimeFrame(
options: ContributionsTimeframeDto,
listId: string
): Promise<DbContributionStatTimeframe[]> {
const range = options.range!;
const contribType = options.contributorType;
const repos = options.repos ? options.repos.toLowerCase().split(",") : undefined;
const allUsers = await this.findContributorsByType(listId, range, contribType, repos);
if (allUsers.length === 0) {
return [];
}
const stats = await this.contributorDevstatsService.findAllContributionsByTimeframe(options, allUsers);
return stats.sort((a, b) => new Date(b.bucket).getTime() - new Date(a.bucket).getTime());
}
async findContributionsByProject(
options: ContributionsByProjectDto,
listId: string
): Promise<DbContributionsProjects[]> {
const userListUsersBuilder = this.userListUsersQueryBuilder();
userListUsersBuilder
.leftJoin("users", "users", "user_list_contributors.user_id=users.id")
.where("user_list_contributors.list_id = :listId", { listId });
const users = await userListUsersBuilder.getMany();
if (users.length === 0) {
return [];
}
/*
* ignore both users who have a missing username for some reason
* and bot users. This helps prevent extremely long running queries in the
* database
*/
const filteredUsers = users
.map((user) => (user.username ? user.username.toLowerCase() : ""))
.filter((user) => user !== "" && !user.endsWith("[bot]"));
if (filteredUsers.length === 0) {
return [];
}
return this.contributorDevstatsService.findAllContributionsByProject(options, filteredUsers);
}
async findTopContributorsByProject(options: TopProjectsDto, listId: string): Promise<DbContributorStat[]> {
const range = options.range!;
const repos = options.repos ? options.repos.toLowerCase().split(",") : undefined;
const allUsers = await this.findContributorsByType(listId, range, undefined, repos);
if (allUsers.length === 0) {
return [];
}
return this.contributorDevstatsService.findAllContributorStats(
{
...options,
skip: 0,
},
allUsers
);
}
async findContributorCategoriesByTimeframe(
options: ContributionsTimeframeDto,
listId: string
): Promise<DbContributorCategoryTimeframe[]> {
const range = options.range!;
const repos = options.repos ? options.repos.toLowerCase().split(",") : undefined;
const aggregatedStats: Record<string, DbContributorCategoryTimeframe> = {};
const allUsers = await this.findContributorsByType(listId, range, ContributorStatsTypeEnum.all, repos);
if (allUsers.length === 0) {
return [];
}
const activeUsers = await this.findContributorsByType(listId, range, ContributorStatsTypeEnum.active, repos);
const newUsers = await this.findContributorsByType(listId, range, ContributorStatsTypeEnum.new, repos);
const alumniUsers = await this.findContributorsByType(listId, range, ContributorStatsTypeEnum.alumni, repos);
/*
* for each category of contributor, we aggregate their contributions into buckets
* using the "findAllContributionsByTimeframe" meta function. This returns a "bucket"
* member that can be used to generate the record of all/active/alum/new contributors.
*/
await Promise.all(
allUsers.map(async (user) => {
const allUsersStats = await this.contributorDevstatsService.findAllContributionsByTimeframe(options, [user]);
allUsersStats.forEach((stat) => {
const eventTime = new Date(stat.bucket);
const bucket = new Date(
eventTime.getUTCFullYear(),
eventTime.getUTCMonth(),
eventTime.getUTCDate()
).toISOString();
if (!(bucket in aggregatedStats)) {
aggregatedStats[bucket] = new DbContributorCategoryTimeframe();
aggregatedStats[bucket].bucket = bucket;
aggregatedStats[bucket].all = 0;
aggregatedStats[bucket].active = 0;
aggregatedStats[bucket].new = 0;
aggregatedStats[bucket].alumni = 0;
}
aggregatedStats[bucket].all += 1;
});
})
);
await Promise.all(
activeUsers.map(async (user) => {
const activeUsersStats = await this.contributorDevstatsService.findAllContributionsByTimeframe(options, [user]);
activeUsersStats.forEach((stat) => {
const eventTime = new Date(stat.bucket);
const bucket = new Date(
eventTime.getUTCFullYear(),
eventTime.getUTCMonth(),
eventTime.getUTCDate()
).toISOString();
if (!(bucket in aggregatedStats)) {
aggregatedStats[bucket] = new DbContributorCategoryTimeframe();
aggregatedStats[bucket].bucket = bucket;
aggregatedStats[bucket].all = 0;
aggregatedStats[bucket].active = 0;
aggregatedStats[bucket].new = 0;
aggregatedStats[bucket].alumni = 0;
}
aggregatedStats[bucket].active += 1;
});
})
);
await Promise.all(
newUsers.map(async (user) => {
const newUsersStats = await this.contributorDevstatsService.findAllContributionsByTimeframe(options, [user]);
newUsersStats.forEach((stat) => {
const eventTime = new Date(stat.bucket);
const bucket = new Date(
eventTime.getUTCFullYear(),
eventTime.getUTCMonth(),
eventTime.getUTCDate()
).toISOString();
if (!(bucket in aggregatedStats)) {
aggregatedStats[bucket] = new DbContributorCategoryTimeframe();
aggregatedStats[bucket].bucket = bucket;
aggregatedStats[bucket].all = 0;
aggregatedStats[bucket].active = 0;
aggregatedStats[bucket].new = 0;
aggregatedStats[bucket].alumni = 0;
}
aggregatedStats[bucket].new += 1;
});
})
);
await Promise.all(
alumniUsers.map(async (user) => {
const alumniUsersStats = await this.contributorDevstatsService.findAllContributionsByTimeframe(options, [user]);
alumniUsersStats.forEach((stat) => {
const eventTime = new Date(stat.bucket);
const bucket = new Date(
eventTime.getUTCFullYear(),
eventTime.getUTCMonth(),
eventTime.getUTCDate()
).toISOString();
if (!(bucket in aggregatedStats)) {
aggregatedStats[bucket] = new DbContributorCategoryTimeframe();
aggregatedStats[bucket].bucket = bucket;
aggregatedStats[bucket].all = 0;
aggregatedStats[bucket].active = 0;
aggregatedStats[bucket].new = 0;
aggregatedStats[bucket].alumni = 0;
}
aggregatedStats[bucket].alumni += 1;
});
})
);
const results = Object.values(aggregatedStats);
return results.sort((a, b) => new Date(b.bucket).getTime() - new Date(a.bucket).getTime());
}
/*
* the following "apply filter" functions can be used on a table named "users"
* with a "login" column (likely derived from the pull_request_github_events table).
* these can be used to filter for users who are:
*
* - active: have made contributions in the previous time range
* and the time range block before (i.e., the last 30 days and the 30 days before that)
*
* - new: have made a contribution in the last time range, but not the previous one:
* (i.e., made a contribution in the last 30 days, but not he previous 30 days before that)
*
* - alumni: have not made a contribution in the last time range block
* but did in the block before that (i.e., did not contribute in the last 30 days,
* but did before that in the previous 30 day block)
*
*/
private leftJoinCurrentMonthPrs({
queryBuilder,
users,
repos,
startDate,
range = 30,
}: {
queryBuilder: SelectQueryBuilder<DbPullRequestGitHubEvents>;
users: string[];
repos?: string[];
startDate: string;
range: number;
}): SelectQueryBuilder<DbPullRequestGitHubEvents> {
queryBuilder.leftJoin(
(qb: SelectQueryBuilder<DbPullRequestGitHubEvents>) => {
qb.select("DISTINCT LOWER(actor_login)", "actor_login")
.from("pull_request_github_events", "pull_request_github_events")
.where("LOWER(actor_login) IN (:...users)", { users })
.andWhere(
"event_time BETWEEN :start_date::TIMESTAMP - :range_interval::INTERVAL AND :start_date::TIMESTAMP",
{
start_date: startDate,
range_interval: `${range} days`,
}
);
if (repos && repos.length > 0) {
qb.andWhere("LOWER(repo_name) IN (:...repos)", { repos });
}
return qb;
},
"current_month_prs",
"users.login = current_month_prs.actor_login"
);
return queryBuilder;
}
private leftJoinPreviousMonthPrs({
queryBuilder,
users,
repos,
startDate,
range = 30,
}: {
queryBuilder: SelectQueryBuilder<DbPullRequestGitHubEvents>;
users: string[];
repos?: string[];
startDate: string;
range: number;
}): SelectQueryBuilder<DbPullRequestGitHubEvents> {
queryBuilder.leftJoin(
(qb: SelectQueryBuilder<DbPullRequestGitHubEvents>) => {
qb.select("DISTINCT LOWER(actor_login)", "actor_login")
.from("pull_request_github_events", "pull_request_github_events")
.where("LOWER(actor_login) IN (:...users)", { users })
.andWhere(
"event_time BETWEEN :start_date::TIMESTAMP - :double_range_interval::INTERVAL AND :start_date::TIMESTAMP - :range_interval::INTERVAL",
{
start_date: startDate,
range_interval: `${range} days`,
double_range_interval: `${range + range} days`,
}
);
if (repos && repos.length > 0) {
qb.andWhere("LOWER(repo_name) IN (:...repos)", { repos });
}
return qb;
},
"previous_month_prs",
"users.login = previous_month_prs.actor_login"
);
return queryBuilder;
}
private applyActiveContributorsFilter({
queryBuilder,
users,
repos,
startDate,
range,
}: {
queryBuilder: SelectQueryBuilder<DbPullRequestGitHubEvents>;
users: string[];
repos?: string[];
startDate: string;
range: number;
}): SelectQueryBuilder<DbPullRequestGitHubEvents> {
this.leftJoinCurrentMonthPrs({ queryBuilder, users, repos, startDate, range });
this.leftJoinPreviousMonthPrs({ queryBuilder, users, repos, startDate, range });
queryBuilder
.andWhere(`"previous_month_prs"."actor_login" IS NOT NULL`)
.andWhere(`"current_month_prs"."actor_login" IS NOT NULL`);
return queryBuilder;
}
private applyNewContributorsFilter({
queryBuilder,
users,
repos,
startDate,
range,
}: {
queryBuilder: SelectQueryBuilder<DbPullRequestGitHubEvents>;
users: string[];
repos?: string[];
startDate: string;
range: number;
}): SelectQueryBuilder<DbPullRequestGitHubEvents> {
this.leftJoinCurrentMonthPrs({ queryBuilder, users, repos, startDate, range });
this.leftJoinPreviousMonthPrs({ queryBuilder, users, repos, startDate, range });
queryBuilder
.andWhere(`"previous_month_prs"."actor_login" IS NULL`)
.andWhere(`"current_month_prs"."actor_login" IS NOT NULL`);
return queryBuilder;
}
private applyAlumniContributorsFilter({
queryBuilder,
users,
repos,
startDate,
range,
}: {
queryBuilder: SelectQueryBuilder<DbPullRequestGitHubEvents>;
users: string[];
repos?: string[];
startDate: string;
range: number;
}): SelectQueryBuilder<DbPullRequestGitHubEvents> {
this.leftJoinCurrentMonthPrs({ queryBuilder, users, repos, startDate, range });
this.leftJoinPreviousMonthPrs({ queryBuilder, users, repos, startDate, range });
queryBuilder
.andWhere(`"previous_month_prs"."actor_login" IS NOT NULL`)
.andWhere(`"current_month_prs"."actor_login" IS NULL`);
return queryBuilder;
}
}