Source code for obiwan.scaling.timing

"""Script to make the obiwan scaling plots in my thesis"""

if __name__ == "__main__":
    import matplotlib
    matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import subprocess
import os
import pickle
import json
import re
import pandas as pd

STAGES=['tims', 'mask_junk', 'srcs',
        'fitblobs', 'coadds', 'writecat']


def add_scatter(ax,x,y,c='b',m='o',lab='hello',s=80,drawln=False):
	ax.scatter(x,y, s=s, lw=2.,facecolors='none',edgecolors=c, marker=m,label=lab)
	if drawln: ax.plot(x,y, c=c,ls='-')

class Plots(object):
    def __init__(self,tm):
        self.tm= tm

    def tractor_profile_plots(tm,name='tmp.png'):
        fig,ax=plt.subplots()
        xvals= np.arange(tm['stage'].size)+1
        print(tm['parallel'])
        add_scatter(ax,xvals, tm['serial']/60., c='b',m='o',lab='serial',drawln=True)
        add_scatter(ax,xvals, tm['parallel']/60., c='g',m='o',lab='parallel',drawln=True)
        plt.legend(loc='lower right',scatterpoints=1)
        #add_scatter(ax,xvals, tm['total']/60., c='b',m='o',lab='total')
        ax.set_xticks(xvals)
        ax.set_xticklabels(tm['stage'],rotation=45, ha='right')
        ax.set_yscale('log')
        ax.set_ylim([1e-3,1e2])
        xlab=ax.set_ylabel('Wall Time (min)')
        ylab=ax.set_xlabel('Tractor Stage')
        plt.savefig(name, bbox_extra_artists=[xlab,ylab], bbox_inches='tight',dpi=150)
        plt.close()

    def plot_wall_node(d):
        name='wall_v_nodes.png'
        fig,ax=plt.subplots()
        xvals= np.arange(d['nodes'].size)+1
        add_scatter(ax,xvals, d['tims_mean']/60., c='b',m='o',lab='tims',drawln=True)
        add_scatter(ax,xvals, d['fit_mean']/60., c='g',m='o',lab='fit',drawln=True)
        add_scatter(ax,xvals, d['tot_mean']/60., c='k',m='o',lab='total',drawln=True)
        plt.legend(loc='lower right',scatterpoints=1)
        #add_scatter(ax,xvals, tm['total']/60., c='b',m='o',lab='total')
        ax.set_xticks(xvals)
        names= np.zeros(d['nodes'].size).astype(str)
        for i in range(names.size):
            names[i]= '%d/%d' % (d['cores'][i],d['nodes'][i])
        ax.set_xticklabels(names,rotation=45, ha='right')
        #ax.set_yscale('log')
        #ax.set_ylim([1e-3,1e3])
        ylab=ax.set_ylabel('Wall Time (min)')
        xlab=ax.set_xlabel('Cores/Nodes')
        plt.savefig(name, bbox_extra_artists=[xlab,ylab], bbox_inches='tight',dpi=150)
        plt.close()

def params_of_run(bigstring):
    def get_param(expr,bigstring):
        a=re.search(expr,bigstring)
        return (bigstring[slice(a.regs[0][0],a.regs[0][1])]
                .split('=')[1]
                .replace(',',''))
    d={}
    d['rsdir']= get_param(r'rowstart=[0-9]+,',bigstring)
    d['nobj']= get_param(r'nobj=[0-9]+,',bigstring)
    d['brick']= get_param(r"brick='[0-9]{4}[mp][0-9]{3}',",bigstring).replace("'",'')
    d['cores']= get_param(r'threads=[0-9]+,',bigstring)
    return d

def number_injected(bigstring,nobj=None):
    d={}
    a= re.search(r'INFO:decals_sim:sources.*?flagged as nearby [0-9]+?',bigstring)
    n_skip= (bigstring[slice(a.regs[0][0],a.regs[0][1])]
             .split(' ')[-1])
    d['frac_injected']= (nobj-int(n_skip))/float(nobj)
    return d

[docs]def time_total(bigstring): """Returns dict of seconds spent in total""" # rsdir ts={} for text in ['started','finshed']: a=re.search(r'obiwan %s at.*?\n' % text,bigstring) ymd,t = tuple(bigstring[slice(a.regs[0][0],a.regs[0][1])] .strip() .split(' ')[-2:]) ts[text]= pd.Timestamp('%s %s' % (ymd,t)) return dict(total_sec=(ts['finshed'] - ts['started']).total_seconds())
[docs]def time_per_stage(bigstring): """Returns dict of seconds spend in each stage""" # rsdir a=re.search(r'rowstart=[0-9]+,',bigstring) rsdir= (bigstring[slice(a.regs[0][0],a.regs[0][1])] .split('=')[1] .replace(',','')) rsdir= (bigstring[slice(a.regs[0][0],a.regs[0][1])] .split('=')[1] .replace(',','')) t={} for stage in STAGES: a=re.search(r'Resources for stage %s(.*\n)*?Grand total Wall:.*\n' % stage, bigstring) print('stage=%s, a=' % stage,a) lines= bigstring[slice(a.regs[0][0],a.regs[0][1])].split('\n') print('lines=',lines) lines= pd.Series(lines) line= lines[lines.str.contains('Grand total Wall')].str.split(r'\s+') assert(line.size == 1) assert(line.str[-1].values[0] == 'sec') t[stage]=line.str[-2].values[0] return t
def write_header(savenm): with open(savenm,'w') as foo: text= 'nobj brick rsdir frac_injected cores' for stage in STAGES: text += ' %s' % stage text += ' total_sec' foo.write(text+'\n') print('Wrote header %s' % savenm) def write_measurements(d,savenm='test.txt'): with open(savenm,'a') as foo: text= '%s %s %s %.3f %s' % (d['nobj'],d['brick'],d['rsdir'],d['frac_injected'],d['cores']) for stage in STAGES: text += ' %s' % d[stage] text += ' %s' % d['total_sec'] foo.write(text+'\n') print('Appended measurements %s' % savenm) if __name__ == '__main__': from argparse import ArgumentParser parser = ArgumentParser(description="test") parser.add_argument("--logfiles",action="store",required=True, help="list of logfiles for the scaling run, e.g. nobj=500,1000,1500 for many bricks and rsdirs") parser.add_argument("--savenm",action="store",help='text file name to write measurements to',required=True) args = parser.parse_args() # Extract if not os.path.exists(args.savenm): write_header(args.savenm) fns= np.loadtxt(args.logfiles,dtype=str) for logfile in fns: with open(logfile,'r') as foo: bigstring= foo.read() d= {**params_of_run(bigstring), **time_per_stage(bigstring), **time_total(bigstring), } d= {**d, **number_injected(bigstring,nobj=int(d['nobj'])) } write_measurements(d, args.savenm) # Plots df= pd.read_csv(args.savenm,sep=' ')