[Buildroot] [PATCH buildroot-test 01/10] utils/daily-mail: new data: gitlab-ci jobs

Victor Huesca victor.huesca at bootlin.com
Thu Aug 29 13:23:39 UTC 2019


Buildroot have a mirror repository on gitlab.com that is used to run
continious integration tasks. Among these tasks, a complete checkup of
defconfigs and runtime-tests is regularly performed.

This patch allows to retrieve tests that have failed over a given period
of time. It is based on the (very imperfect) gitlab-ci API.

Due to multiple limitations of the API (pagination of results, crucial
data requiring a dedicated request, etc.), this implementation performs
multiple optimisation to be able to retreive the tasks needed.

Signed-off-by: Victor Huesca <victor.huesca at bootlin.com>
---
 utils/daily-mail | 157 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 156 insertions(+), 1 deletion(-)

diff --git a/utils/daily-mail b/utils/daily-mail
index baf0e0b..2d4ed1c 100755
--- a/utils/daily-mail
+++ b/utils/daily-mail
@@ -7,7 +7,7 @@ import textwrap
 import mysmtplib as smtplib
 from email.mime.text import MIMEText
 from email.utils import formatdate
-from datetime import date, timedelta
+from datetime import date, timedelta, datetime
 import localconfig
 import csv
 from collections import defaultdict
@@ -16,6 +16,10 @@ import argparse
 import re
 import json
 from packaging import version
+import certifi
+from urllib3 import HTTPSConnectionPool
+from urllib3.exceptions import HTTPError
+from multiprocessing import Pool as ProcessPool
 
 sys.path.append(os.path.join(localconfig.brbase, "utils"))
 import getdeveloperlib  # noqa: E402
@@ -27,6 +31,7 @@ baseurl = "autobuild.buildroot.net"
 http_baseurl = "http://" + baseurl
 
 developers = getdeveloperlib.parse_developers(localconfig.brbase)
+gitlab_ci_http_pool = None
 
 
 def get_branches():
@@ -469,6 +474,156 @@ def get_outdated_pkgs(path):
     return sorted(s, key=lambda pkg: pkg['name'])
 
 
+def request_paginated_url(pool, url, page_size=100, page_name='page', **request_kwargs):
+    '''
+    Simple helper to retreive data from call paginated url.
+    Request's result is assumed to be json list.
+    '''
+    request_kwargs.setdefault('fields', {})
+    request_kwargs['fields']['per_page'] = page_size
+    results = []
+    page = 1
+    while "len(res) == page_size":
+        request_kwargs['fields'][page_name] = page
+        try:
+            req = pool.request('GET', url, **request_kwargs)
+        except HTTPError:
+            print('error: there were an error with gitlab api. '
+                  'Please make sure your identification token is right and try again.')
+            break
+        if req.status != 200:
+            print('error: there were an error with gitlab api. '
+                  'Please make sure your identification token is right and try again.')
+            break
+        res = json.loads(req.data)
+        results += res
+        page += 1
+        if len(res) != page_size:
+            break
+    return results
+
+
+def get_pipeline_details(project_id, pipeline, **request_kwargs):
+    pool = gitlab_ci_http_pool
+    pipeline_url = '/api/v4/projects/{}/pipelines/{}'.format(project_id, pipeline)
+    try:
+        pipeline_req = pool.request('GET', pipeline_url, **request_kwargs)
+    except HTTPError:
+        print('error: there were an error while fetching the gitlab-ci pipeline\'s details. '
+              'Please make sure your identification token is right and try again.')
+        return {}
+    if pipeline_req.status != 200:
+        print('error: there were an error while fetching the gitlab-ci pipeline\'s details. '
+              'Please make sure your identification token is right and try again.')
+        return {}
+    infos = json.loads(pipeline_req.data)
+    return infos
+
+
+def get_pipeline_jobs(project_id, pipeline, kinds=None, **request_kwargs):
+    pool = gitlab_ci_http_pool
+    jobs_url = '/api/v4/projects/{}/pipelines/{}/jobs'.format(project_id, pipeline)
+    jobs = request_paginated_url(pool, jobs_url, **request_kwargs)
+    for job in jobs:
+        # Remove the unnecessary 'pipeline' field
+        del job['pipeline']
+        # Backup the actual name and set the job's kind and name depending on the naming scheme
+        job['full_name'] = job['name']
+        job['kind'] = 'other'
+    # Filter failed jobs then sort them
+    jobs = (job for job in jobs if job['status'] == 'failed')
+    if kinds:
+        jobs = (job for job in jobs if job['kind'] in kinds)
+    return sorted(jobs, key=lambda j: (j['name'], j['kind']))
+
+
+def get_gitlab_ci_pipelines(http_pool, process_pool, project_id, date, date_range=None):
+    '''
+    Returns all gitlab-ci pipelines ran after the giver `min_date`
+    '''
+    date_to = date
+    date_from = date - date_range if date_range else date
+    per_page = process_pool._processes
+    if per_page > 100:
+        raise ValueError('gitlab-ci supports a max pagination of 100 results per page. '
+                         'The pool passed as argument must have 100 or less processes.')
+    fields = {'per_page': per_page}
+    url = '/api/v4/projects/{}/pipelines'.format(project_id)
+    page = 1
+    pipelines = []
+
+    # Fetch pipelines `nb_processes` by `nb_processes` until `from_date` in reached
+    while True:
+        # List pipelines
+        fields['page'] = page
+        page += 1
+        try:
+            cur_pipelines = http_pool.request('GET', url, fields=fields)
+        except HTTPError:
+            print('error: there were an error while fetching the gitlab-ci pipelines. '
+                  'Please make sure your identification token is right and try again.')
+            return pipelines
+        if cur_pipelines.status != 200:
+            print('error: there were an error while fetching the gitlab-ci pipeline\'s details. '
+                  'Please make sure your identification token is right and try again.')
+            return pipelines
+        cur_pipelines = json.loads(cur_pipelines.data)
+
+        # Handle cases where the requested date is older than all pipelines
+        if len(cur_pipelines) == 0:
+            return pipelines
+
+        # Request more infos about each pipeline (for instance the date)
+        pipelines_infos = [process_pool.apply_async(get_pipeline_details, (project_id, p['id']))
+                           for p in cur_pipelines]
+        pipelines_infos = [p.get() for p in pipelines_infos]
+
+        # Add pipeline in the right period, break once the from_date is found
+        for p in pipelines_infos:
+            cur_date = datetime.strptime(p['finished_at'], '%Y-%m-%dT%H:%M:%S.%fZ').date()
+            if cur_date < date_from:
+                return pipelines
+            if cur_date > date_to:
+                continue
+            pipelines.append(p)
+
+    return pipelines
+
+
+def get_gitlab_ci(branches, date, date_range=None, kinds=None):
+    '''
+    Retreive failed jobs from gitlab-ci grouped by pipelines
+    for the specified date.
+    If `kinds` list is provided, only jobs matching one of the
+    given kind will be returned.
+    '''
+    header = {'PRIVATE-TOKEN': localconfig.gitlab_user_token}
+    project_id = localconfig.gitlab_project_id
+    failed_filter = {'fields': {'status': 'failed'}}
+
+    # Build pools
+    global gitlab_ci_http_pool
+    gitlab_ci_http_pool = HTTPSConnectionPool('gitlab.com', port=443, cert_reqs='CERT_REQUIRED',
+                                              ca_certs=certifi.where(), timeout=30, headers=header)
+    process_pool = ProcessPool(processes=64)
+
+    # Fetch pipelines 64 by 64 until `date_from` in reached
+    pipelines = get_gitlab_ci_pipelines(gitlab_ci_http_pool, process_pool, project_id, date, date_range)
+
+    # Drop unnecessary branches
+    pipelines = [p for p in pipelines if p['ref'] in branches]
+
+    # Get jobs ran by those pipelines
+    pipelines_jobs = [process_pool.apply_async(get_pipeline_jobs, (project_id, p['id'], kinds),
+                                               failed_filter) for p in pipelines]
+    for pipeline, jobs in zip(pipelines, pipelines_jobs):
+        pipeline['jobs'] = jobs.get()
+
+    # Group pipelines by branch
+    pipelines = {branch: [p for p in pipelines if p['ref'] == branch] for branch in branches}
+    return pipelines
+
+
 def calculate_notifications(results, outdated_pkgs):
     '''
     Prepare the notifications{} dict for the notifications to individual
-- 
2.21.0



More information about the buildroot mailing list