File

src/user-lists/user-list-events-stats.service.ts

Index

Methods

Constructor

constructor(pullRequestGithubEventsRepository: Repository<DbPullRequestGitHubEvents>, userListContributorRepository: Repository<DbUserListContributor>, contributorDevstatsService: ContributorDevstatsService)
Parameters :
Name Type Optional
pullRequestGithubEventsRepository Repository<DbPullRequestGitHubEvents> No
userListContributorRepository Repository<DbUserListContributor> No
contributorDevstatsService ContributorDevstatsService No

Methods

Private applyActiveContributorsFilter
applyActiveContributorsFilter(undefined: literal type)
Parameters :
Name Type Optional
literal type No
Private applyAlumniContributorsFilter
applyAlumniContributorsFilter(undefined: literal type)
Parameters :
Name Type Optional
literal type No
Private applyNewContributorsFilter
applyNewContributorsFilter(undefined: literal type)
Parameters :
Name Type Optional
literal type No
baseQueryBuilder
baseQueryBuilder()
Async findAllListContributorStats
findAllListContributorStats(pageOptionsDto: MostActiveContributorsDto, listId: string)
Parameters :
Name Type Optional
pageOptionsDto MostActiveContributorsDto No
listId string No
Async findContributionsByProject
findContributionsByProject(options: ContributionsByProjectDto, listId: string)
Parameters :
Name Type Optional
options ContributionsByProjectDto No
listId string No
Async findContributionsInTimeFrame
findContributionsInTimeFrame(options: ContributionsTimeframeDto, listId: string)
Parameters :
Name Type Optional
options ContributionsTimeframeDto No
listId string No
Async findContributorCategoriesByTimeframe
findContributorCategoriesByTimeframe(options: ContributionsTimeframeDto, listId: string)
Parameters :
Name Type Optional
options ContributionsTimeframeDto No
listId string No
Async findContributorsByType
findContributorsByType(listId: string, range: number, type: ContributorStatsTypeEnum, repos?: string[])
Parameters :
Name Type Optional Default value
listId string No
range number No
type ContributorStatsTypeEnum No ContributorStatsTypeEnum.all
repos string[] Yes
Returns : Promise<string[]>
Async findTopContributorsByProject
findTopContributorsByProject(options: TopProjectsDto, listId: string)
Parameters :
Name Type Optional
options TopProjectsDto No
listId string No
Private leftJoinCurrentMonthPrs
leftJoinCurrentMonthPrs(undefined: literal type)
Parameters :
Name Type Optional
literal type No
Private leftJoinPreviousMonthPrs
leftJoinPreviousMonthPrs(undefined: literal type)
Parameters :
Name Type Optional
literal type No
Private userListUsersQueryBuilder
userListUsersQueryBuilder()
import { Injectable } from "@nestjs/common";
import { Repository, SelectQueryBuilder } from "typeorm";
import { InjectRepository } from "@nestjs/typeorm";

import { orderDbContributorStats } from "../timescale/common/most-active-contributors";
import { PageDto } from "../common/dtos/page.dto";
import { DbPullRequestGitHubEvents } from "../timescale/entities/pull_request_github_event.entity";
import { DbContributorStat } from "../timescale/entities/contributor_devstat.entity";
import { ContributionsPageDto } from "../timescale/dtos/contrib-page.dto";
import { ContributionPageMetaDto } from "../timescale/dtos/contrib-page-meta.dto";
import { ContributorDevstatsService } from "../timescale/contrib-stats.service";
import { ContributorStatsTypeEnum, MostActiveContributorsDto } from "../timescale/dtos/most-active-contrib.dto";
import { DbUserListContributor } from "./entities/user-list-contributor.entity";
import { ContributionsTimeframeDto } from "./dtos/contributions-timeframe.dto";
import { DbContributionStatTimeframe } from "./entities/contributions-timeframe.entity";
import { ContributionsByProjectDto } from "./dtos/contributions-by-project.dto";
import { DbContributionsProjects } from "./entities/contributions-projects.entity";
import { TopProjectsDto } from "./dtos/top-projects.dto";
import { DbContributorCategoryTimeframe } from "./entities/contributors-timeframe.entity";

@Injectable()
export class UserListEventsStatsService {
  constructor(
    @InjectRepository(DbPullRequestGitHubEvents, "TimescaleConnection")
    private pullRequestGithubEventsRepository: Repository<DbPullRequestGitHubEvents>,
    @InjectRepository(DbUserListContributor, "ApiConnection")
    private userListContributorRepository: Repository<DbUserListContributor>,
    private contributorDevstatsService: ContributorDevstatsService
  ) {}

  baseQueryBuilder(): SelectQueryBuilder<DbPullRequestGitHubEvents> {
    const builder = this.pullRequestGithubEventsRepository.createQueryBuilder();

    return builder;
  }

  private userListUsersQueryBuilder(): SelectQueryBuilder<DbUserListContributor> {
    const builder = this.userListContributorRepository.createQueryBuilder("user_list_contributors");

    return builder;
  }

  async findContributorsByType(
    listId: string,
    range: number,
    type: ContributorStatsTypeEnum = ContributorStatsTypeEnum.all,
    repos?: string[]
  ): Promise<string[]> {
    const now = new Date().toISOString();

    const userListUsersBuilder = this.userListUsersQueryBuilder();

    userListUsersBuilder
      .leftJoin("users", "users", "user_list_contributors.user_id=users.id")
      .where("user_list_contributors.list_id = :listId", { listId });

    const allUsers = await userListUsersBuilder.getMany();

    if (allUsers.length === 0) {
      return [];
    }

    const users = allUsers
      .map((user) => (user.username ? user.username.toLowerCase() : ""))
      .filter((user) => user !== "");

    if (users.length === 0) {
      return [];
    }

    const userListQueryBuilder =
      this.pullRequestGithubEventsRepository.manager.createQueryBuilder() as SelectQueryBuilder<DbPullRequestGitHubEvents>;

    userListQueryBuilder.select("DISTINCT users.login", "login");

    userListQueryBuilder.from((qb: SelectQueryBuilder<DbPullRequestGitHubEvents>) => {
      qb.select("LOWER(actor_login)", "login")
        .distinct()
        .from("pull_request_github_events", "pull_request_github_events")
        .where("LOWER(actor_login) IN (:...users)", { users });

      if (repos && repos.length > 0) {
        qb.andWhere("LOWER(repo_name) IN (:...repos)", { repos });
      }

      return qb;
    }, "users");

    switch (type) {
      case ContributorStatsTypeEnum.all:
        break;

      case ContributorStatsTypeEnum.active:
        this.applyActiveContributorsFilter({ queryBuilder: userListQueryBuilder, users, repos, startDate: now, range });
        break;

      case ContributorStatsTypeEnum.new:
        this.applyNewContributorsFilter({ queryBuilder: userListQueryBuilder, users, repos, startDate: now, range });
        break;

      case ContributorStatsTypeEnum.alumni: {
        this.applyAlumniContributorsFilter({ queryBuilder: userListQueryBuilder, users, repos, startDate: now, range });
        break;
      }

      default:
        break;
    }

    const entityQb = this.pullRequestGithubEventsRepository.manager
      .createQueryBuilder()
      .addCommonTableExpression(userListQueryBuilder, "CTE")
      .setParameters(userListQueryBuilder.getParameters())
      .select("login")
      .from("CTE", "CTE");

    const entities = await entityQb.getRawMany<{ login: string }>();

    return entities.map((result) => result.login);
  }

  async findAllListContributorStats(
    pageOptionsDto: MostActiveContributorsDto,
    listId: string
  ): Promise<PageDto<DbContributorStat>> {
    const userListUsersBuilder = this.userListUsersQueryBuilder();

    userListUsersBuilder
      .leftJoin("users", "users", "user_list_contributors.user_id=users.id")
      .where("user_list_contributors.list_id = :listId", { listId });

    const users = await userListUsersBuilder.getMany();

    if (users.length === 0) {
      return new ContributionsPageDto(
        new Array<DbContributorStat>(),
        new ContributionPageMetaDto({ itemCount: 0, pageOptionsDto }, 0)
      );
    }

    /*
     * ignores 2 usernames that cause problems when crunching this data:
     *
     * 1. Usernames that somehow are an empty string. This shouldn't happen
     *    since a username is more or less a required field in the users table.
     *    but we have seen this from time to time which can cause problems trying
     *    to crunch timescale data on all an empty username
     *
     * 2. Ignores bot accounts: many bot accounts make an astronomical number of
     *    commits / comments / reviews etc. etc. And attempting to crunch all that data
     *    for the bot accounts won't work and would require massive resources.
     */
    const filteredUsers = users
      .map((user) => (user.username ? user.username.toLowerCase() : ""))
      .filter((user) => user !== "" && !user.endsWith("[bot]"));

    if (filteredUsers.length === 0) {
      return new ContributionsPageDto(
        new Array<DbContributorStat>(),
        new ContributionPageMetaDto({ itemCount: 0, pageOptionsDto }, 0)
      );
    }

    const userStats = await this.contributorDevstatsService.findAllContributorStats(pageOptionsDto, filteredUsers);

    orderDbContributorStats(pageOptionsDto, userStats);

    const { skip } = pageOptionsDto;
    const limit = pageOptionsDto.limit!;
    const slicedUserStats = userStats.slice(skip, skip + limit);

    let totalCount = 0;

    userStats.forEach((entity) => {
      totalCount += entity.total_contributions;
    });

    const pageMetaDto = new ContributionPageMetaDto({ itemCount: userStats.length, pageOptionsDto }, totalCount);

    return new ContributionsPageDto(slicedUserStats, pageMetaDto);
  }

  async findContributionsInTimeFrame(
    options: ContributionsTimeframeDto,
    listId: string
  ): Promise<DbContributionStatTimeframe[]> {
    const range = options.range!;
    const contribType = options.contributorType;
    const repos = options.repos ? options.repos.toLowerCase().split(",") : undefined;

    const allUsers = await this.findContributorsByType(listId, range, contribType, repos);

    if (allUsers.length === 0) {
      return [];
    }

    const stats = await this.contributorDevstatsService.findAllContributionsByTimeframe(options, allUsers);

    return stats.sort((a, b) => new Date(b.bucket).getTime() - new Date(a.bucket).getTime());
  }

  async findContributionsByProject(
    options: ContributionsByProjectDto,
    listId: string
  ): Promise<DbContributionsProjects[]> {
    const userListUsersBuilder = this.userListUsersQueryBuilder();

    userListUsersBuilder
      .leftJoin("users", "users", "user_list_contributors.user_id=users.id")
      .where("user_list_contributors.list_id = :listId", { listId });

    const users = await userListUsersBuilder.getMany();

    if (users.length === 0) {
      return [];
    }

    /*
     * ignore both users who have a missing username for some reason
     * and bot users. This helps prevent extremely long running queries in the
     * database
     */
    const filteredUsers = users
      .map((user) => (user.username ? user.username.toLowerCase() : ""))
      .filter((user) => user !== "" && !user.endsWith("[bot]"));

    if (filteredUsers.length === 0) {
      return [];
    }

    return this.contributorDevstatsService.findAllContributionsByProject(options, filteredUsers);
  }

  async findTopContributorsByProject(options: TopProjectsDto, listId: string): Promise<DbContributorStat[]> {
    const range = options.range!;
    const repos = options.repos ? options.repos.toLowerCase().split(",") : undefined;

    const allUsers = await this.findContributorsByType(listId, range, undefined, repos);

    if (allUsers.length === 0) {
      return [];
    }

    return this.contributorDevstatsService.findAllContributorStats(
      {
        ...options,
        skip: 0,
      },
      allUsers
    );
  }

  async findContributorCategoriesByTimeframe(
    options: ContributionsTimeframeDto,
    listId: string
  ): Promise<DbContributorCategoryTimeframe[]> {
    const range = options.range!;
    const repos = options.repos ? options.repos.toLowerCase().split(",") : undefined;

    const aggregatedStats: Record<string, DbContributorCategoryTimeframe> = {};

    const allUsers = await this.findContributorsByType(listId, range, ContributorStatsTypeEnum.all, repos);

    if (allUsers.length === 0) {
      return [];
    }

    const activeUsers = await this.findContributorsByType(listId, range, ContributorStatsTypeEnum.active, repos);
    const newUsers = await this.findContributorsByType(listId, range, ContributorStatsTypeEnum.new, repos);
    const alumniUsers = await this.findContributorsByType(listId, range, ContributorStatsTypeEnum.alumni, repos);

    /*
     * for each category of contributor, we aggregate their contributions into buckets
     * using the "findAllContributionsByTimeframe" meta function. This returns a "bucket"
     * member that can be used to generate the record of all/active/alum/new contributors.
     */

    await Promise.all(
      allUsers.map(async (user) => {
        const allUsersStats = await this.contributorDevstatsService.findAllContributionsByTimeframe(options, [user]);

        allUsersStats.forEach((stat) => {
          const eventTime = new Date(stat.bucket);
          const bucket = new Date(
            eventTime.getUTCFullYear(),
            eventTime.getUTCMonth(),
            eventTime.getUTCDate()
          ).toISOString();

          if (!(bucket in aggregatedStats)) {
            aggregatedStats[bucket] = new DbContributorCategoryTimeframe();
            aggregatedStats[bucket].bucket = bucket;
            aggregatedStats[bucket].all = 0;
            aggregatedStats[bucket].active = 0;
            aggregatedStats[bucket].new = 0;
            aggregatedStats[bucket].alumni = 0;
          }

          aggregatedStats[bucket].all += 1;
        });
      })
    );

    await Promise.all(
      activeUsers.map(async (user) => {
        const activeUsersStats = await this.contributorDevstatsService.findAllContributionsByTimeframe(options, [user]);

        activeUsersStats.forEach((stat) => {
          const eventTime = new Date(stat.bucket);
          const bucket = new Date(
            eventTime.getUTCFullYear(),
            eventTime.getUTCMonth(),
            eventTime.getUTCDate()
          ).toISOString();

          if (!(bucket in aggregatedStats)) {
            aggregatedStats[bucket] = new DbContributorCategoryTimeframe();
            aggregatedStats[bucket].bucket = bucket;
            aggregatedStats[bucket].all = 0;
            aggregatedStats[bucket].active = 0;
            aggregatedStats[bucket].new = 0;
            aggregatedStats[bucket].alumni = 0;
          }

          aggregatedStats[bucket].active += 1;
        });
      })
    );

    await Promise.all(
      newUsers.map(async (user) => {
        const newUsersStats = await this.contributorDevstatsService.findAllContributionsByTimeframe(options, [user]);

        newUsersStats.forEach((stat) => {
          const eventTime = new Date(stat.bucket);
          const bucket = new Date(
            eventTime.getUTCFullYear(),
            eventTime.getUTCMonth(),
            eventTime.getUTCDate()
          ).toISOString();

          if (!(bucket in aggregatedStats)) {
            aggregatedStats[bucket] = new DbContributorCategoryTimeframe();
            aggregatedStats[bucket].bucket = bucket;
            aggregatedStats[bucket].all = 0;
            aggregatedStats[bucket].active = 0;
            aggregatedStats[bucket].new = 0;
            aggregatedStats[bucket].alumni = 0;
          }

          aggregatedStats[bucket].new += 1;
        });
      })
    );

    await Promise.all(
      alumniUsers.map(async (user) => {
        const alumniUsersStats = await this.contributorDevstatsService.findAllContributionsByTimeframe(options, [user]);

        alumniUsersStats.forEach((stat) => {
          const eventTime = new Date(stat.bucket);
          const bucket = new Date(
            eventTime.getUTCFullYear(),
            eventTime.getUTCMonth(),
            eventTime.getUTCDate()
          ).toISOString();

          if (!(bucket in aggregatedStats)) {
            aggregatedStats[bucket] = new DbContributorCategoryTimeframe();
            aggregatedStats[bucket].bucket = bucket;
            aggregatedStats[bucket].all = 0;
            aggregatedStats[bucket].active = 0;
            aggregatedStats[bucket].new = 0;
            aggregatedStats[bucket].alumni = 0;
          }

          aggregatedStats[bucket].alumni += 1;
        });
      })
    );

    const results = Object.values(aggregatedStats);

    return results.sort((a, b) => new Date(b.bucket).getTime() - new Date(a.bucket).getTime());
  }

  /*
   * the following "apply filter" functions can be used on a table named "users"
   * with a "login" column (likely derived from the pull_request_github_events table).
   * these can be used to filter for users who are:
   *
   * - active: have made contributions in the previous time range
   *   and the time range block before (i.e., the last 30 days and the 30 days before that)
   *
   * - new: have made a contribution in the last time range, but not the previous one:
   *   (i.e., made a contribution in the last 30 days, but not he previous 30 days before that)
   *
   * - alumni: have not made a contribution in the last time range block
   *   but did in the block before that (i.e., did not contribute in the last 30 days,
   *   but did before that in the previous 30 day block)
   *
   */

  private leftJoinCurrentMonthPrs({
    queryBuilder,
    users,
    repos,
    startDate,
    range = 30,
  }: {
    queryBuilder: SelectQueryBuilder<DbPullRequestGitHubEvents>;
    users: string[];
    repos?: string[];
    startDate: string;
    range: number;
  }): SelectQueryBuilder<DbPullRequestGitHubEvents> {
    queryBuilder.leftJoin(
      (qb: SelectQueryBuilder<DbPullRequestGitHubEvents>) => {
        qb.select("DISTINCT LOWER(actor_login)", "actor_login")
          .from("pull_request_github_events", "pull_request_github_events")
          .where("LOWER(actor_login) IN (:...users)", { users })
          .andWhere(
            "event_time BETWEEN :start_date::TIMESTAMP - :range_interval::INTERVAL AND :start_date::TIMESTAMP",
            {
              start_date: startDate,
              range_interval: `${range} days`,
            }
          );

        if (repos && repos.length > 0) {
          qb.andWhere("LOWER(repo_name) IN (:...repos)", { repos });
        }

        return qb;
      },
      "current_month_prs",
      "users.login = current_month_prs.actor_login"
    );

    return queryBuilder;
  }

  private leftJoinPreviousMonthPrs({
    queryBuilder,
    users,
    repos,
    startDate,
    range = 30,
  }: {
    queryBuilder: SelectQueryBuilder<DbPullRequestGitHubEvents>;
    users: string[];
    repos?: string[];
    startDate: string;
    range: number;
  }): SelectQueryBuilder<DbPullRequestGitHubEvents> {
    queryBuilder.leftJoin(
      (qb: SelectQueryBuilder<DbPullRequestGitHubEvents>) => {
        qb.select("DISTINCT LOWER(actor_login)", "actor_login")
          .from("pull_request_github_events", "pull_request_github_events")
          .where("LOWER(actor_login) IN (:...users)", { users })
          .andWhere(
            "event_time BETWEEN :start_date::TIMESTAMP - :double_range_interval::INTERVAL AND :start_date::TIMESTAMP - :range_interval::INTERVAL",
            {
              start_date: startDate,
              range_interval: `${range} days`,
              double_range_interval: `${range + range} days`,
            }
          );

        if (repos && repos.length > 0) {
          qb.andWhere("LOWER(repo_name) IN (:...repos)", { repos });
        }

        return qb;
      },
      "previous_month_prs",
      "users.login = previous_month_prs.actor_login"
    );

    return queryBuilder;
  }

  private applyActiveContributorsFilter({
    queryBuilder,
    users,
    repos,
    startDate,
    range,
  }: {
    queryBuilder: SelectQueryBuilder<DbPullRequestGitHubEvents>;
    users: string[];
    repos?: string[];
    startDate: string;
    range: number;
  }): SelectQueryBuilder<DbPullRequestGitHubEvents> {
    this.leftJoinCurrentMonthPrs({ queryBuilder, users, repos, startDate, range });
    this.leftJoinPreviousMonthPrs({ queryBuilder, users, repos, startDate, range });

    queryBuilder
      .andWhere(`"previous_month_prs"."actor_login" IS NOT NULL`)
      .andWhere(`"current_month_prs"."actor_login" IS NOT NULL`);

    return queryBuilder;
  }

  private applyNewContributorsFilter({
    queryBuilder,
    users,
    repos,
    startDate,
    range,
  }: {
    queryBuilder: SelectQueryBuilder<DbPullRequestGitHubEvents>;
    users: string[];
    repos?: string[];
    startDate: string;
    range: number;
  }): SelectQueryBuilder<DbPullRequestGitHubEvents> {
    this.leftJoinCurrentMonthPrs({ queryBuilder, users, repos, startDate, range });
    this.leftJoinPreviousMonthPrs({ queryBuilder, users, repos, startDate, range });

    queryBuilder
      .andWhere(`"previous_month_prs"."actor_login" IS NULL`)
      .andWhere(`"current_month_prs"."actor_login" IS NOT NULL`);

    return queryBuilder;
  }

  private applyAlumniContributorsFilter({
    queryBuilder,
    users,
    repos,
    startDate,
    range,
  }: {
    queryBuilder: SelectQueryBuilder<DbPullRequestGitHubEvents>;
    users: string[];
    repos?: string[];
    startDate: string;
    range: number;
  }): SelectQueryBuilder<DbPullRequestGitHubEvents> {
    this.leftJoinCurrentMonthPrs({ queryBuilder, users, repos, startDate, range });
    this.leftJoinPreviousMonthPrs({ queryBuilder, users, repos, startDate, range });

    queryBuilder
      .andWhere(`"previous_month_prs"."actor_login" IS NOT NULL`)
      .andWhere(`"current_month_prs"."actor_login" IS NULL`);

    return queryBuilder;
  }
}

results matching ""

    No results matching ""