Source code for obiwan.qa.cpu_hours

"""
Uses 'SLURM sacct' to compute the number of CPU and MPP hours
"""

import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

[docs]def cpu_hrs(nodes,h,m,s, mpp=False,cori=True): """ Args: nodes: number of nodes h,m,s: hours,min,sec """ cores_per_node=32 if not cori: cores_per_node=24 mpp_factor= 1. if mpp: mpp_factor= 2.5 if not cori: mpp_factor= 2. return mpp_factor * cores_per_node * nodes * h + m/60. + s/3600.
def dobash(cmd): print('UNIX cmd: %s' % cmd) if os.system(cmd): raise ValueError if __name__ == '__main__': #sacct -A desi --user=kaylanb --format=JobID,State,NNodes,Elapsed,Start -S 11/17/17 -E 12/03/17|grep COMPLETED > my_sacct.txt from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument('--sacct_fn', type=str, required=True, help='output of sacct dumped to textfil') args = parser.parse_args() a=pd.read_csv(args.sacct_fn, delim_whitespace=True,header=None, names=['slurm_id','status','num_nodes','time','start']) for i,name in zip([0,1,2],['multi_hr','min','sec']): a[name]= a['time'].str.split(':').str[i] print(a['multi_hr'].str.split('-').str.len().value_counts()) hasExtra24= a['multi_hr'].str.split('-').str.len() > 1 a['extra_hrs']= np.zeros(a.shape[0]) a.loc[hasExtra24,'extra_hrs']= 24*a.loc[hasExtra24,'multi_hr'].str.split('-').str[0].astype(float) a['extra_hrs'].value_counts() a['hrs']= np.zeros(a.shape[0]) # No 01-05, just 05 a.loc[~hasExtra24,'hrs']= a.loc[~hasExtra24,'multi_hr'].astype(float) # When 01-05, just take 05 a.loc[hasExtra24,'hrs']= a.loc[hasExtra24,'multi_hr'].str.split('-').str[1].astype(float) a.loc[:,'hrs'] += a['extra_hrs'] for col in ['min','sec']: a.loc[:,col]= a.loc[:,col].astype(float) a['cpu_hrs']= cpu_hrs(a['num_nodes'],a['hrs'],a['min'],a['sec']) print('total cpu hours (M):',a['cpu_hrs'].sum()/1e6) print('total MPP hours (M):',a['cpu_hrs'].sum()/1e6*2.5) ## Plots fig,ax= plt.subplots(2,2,figsize=(6,6)) names=['num_nodes','hrs','min','sec'] i=0 for row in range(2): for col in range(2): sns.distplot(a[names[i]],ax=ax[row,col]) i+=1 fn='nodes_hrs_min_sec.png' plt.savefig(fn,dpi=150) print('Wrote %s' % fn)