#!/usr/bin/env python
#--------------------------------------------------------------------------
# File and Version Information:
# $Id: TDFileContainer.py 11634 2016-04-05 18:47:50Z dubrovin@SLAC.STANFORD.EDU $
#
# Description:
# class TDFileContainer
#
#------------------------------------------------------------------------
"""TDFileContainer - text/table data file container - load/hold/provide access to data from text file.
It is assumed that text data file contains records of the same format and occasionally record-header
beginning with character # (hash in [0] position).
Example of the file content::
# Exp Run Date Time time(sec) time(nsec) fiduc Evnum Reg Seg Row Col ...
cxif5315 169 2015-02-22 02:20:47 1424600447 486382070 104421 0 EQU 17 153 48 ...
cxif5315 169 2015-02-22 02:20:47 1424600447 494719789 104424 1 EQU 1 161 32 ...
cxif5315 169 2015-02-22 02:20:47 1424600447 494719789 104424 1 EQU 17 170 51 ...
cxif5315 169 2015-02-22 02:20:47 1424600447 503058551 104427 2 EQU 25 170 310 ...
cxif5315 169 2015-02-22 02:20:47 1424600447 503058551 104427 2 EQU 25 180 292 ...
cxif5315 169 2015-02-22 02:20:47 1424600447 511393301 104430 3 EQU 1 162 27 ...
cxif5315 169 2015-02-22 02:20:47 1424600447 536405573 104439 6 ARC 8 11 41 ...
cxif5315 169 2015-02-22 02:20:47 1424600447 536405573 104439 6 ARC 8 10 20 ...
...
Header (without #) should have the same as data number of literal fields separated by spaces.
Records in the file should be grupped by unique group-id parameter,
for example a group of records may have the same group number or some unique index.
Originaly it is designed to work with text file containing record data generated by peak-finder.
It is adopted to work with any other object type beside peak data.
Usage::
# !!! NOTE: None is returned whenever requested information is missing.
# Import
from pyimgalgos.TDFileContainer import TDFileContainer
from pyimgalgos.TDPeakRecord import TDPeakRecord # use it by default in TDFileContainer
from pyimgalgos.TDNodeRecord import TDNodeRecord
from pyimgalgos.TDCheetahPeakRecord import TDCheetahPeakRecord
# Initialization
# for peakfinder records
fname = '/reg/neh/home1/dubrovin/LCLS/rel-mengning/work/pfv2-cxif5315-r0169-2015-09-14T14:28:04.txt'
fc = TDFileContainer(fname, indhdr='Evnum', objtype=TDPeakRecord, pbits=0)
# for index table:
fc = TDFileContainer(fname, indhdr='index', objtype=TDNodeRecord)
# for Cheetah file with peaks:
fc = TDFileContainer(fname, indhdr='frameNumber', objtype=TDCheetahPeakRecord)
gr_nums = fc.group_numbers()
ngrps = fc.number_of_groups()
grnum = fc.current_group_number()
gr_curr = fc.group(grpnum) # returns current or specified group
gr_next = fc.next() # returns next group
gr_prev = fc.previous() # returns previous group
hdr = fc.header()
# Print
fc.print_attrs()
fc.print_content(nlines=None) # prints nline (or all by default) lines from file conteiner
# ____________________________________
# Example of iterations over groups
for grnum in fc.group_num_iterator() :
group = fc.next()
group.print_attrs()
peaks = group.get_objs()
for pk in peaks :
pk.print_short()
# Information available through the TDPeakRecord object pk
# ________________________________________________________
# pk.exp, pk.run, pk.evnum, pk.reg
# pk.date, pk.time, pk.tsec, pk.tnsec, pk.fid
# pk.seg, pk.row, pk.col, pk.amax, pk.atot, pk.npix
# pk.rcent, pk.ccent, pk.rsigma, pk.csigma
# pk.rmin, pk.rmax, pk.cmin, pk.cmax
# pk.bkgd, pk.rms, pk.son
# pk.imrow, pk.imcol
# pk.x, pk.y, pk.r, pk.phi
# pk.sonc
# pk.dphi000
# pk.dphi180
# pk.line
# Example of direct access to group by its number
grpnum = 8 # but grpnum is not necessaraly conecutive number, it should be in fc.group_num_iterator() ...
group = fc.group(grpnum) # returns current or specified group
group.print_attrs()
This software was developed for the LCLS project.
If you use all or part of it, please give an appropriate acknowledgment.
@see classes
\n :py:class:`pyimgalgos.TDFileContainer` - file records container.
\n :py:class:`pyimgalgos.TDGroup` - holds a list of records associated with a single group.
\n :py:class:`pyimgalgos.TDPeakRecord` - provides access to the peak record.
\n :py:class:`pyimgalgos.TDNodeRecord` - provides access to the look-up table with crystal orientation record.
\n :py:class:`pyimgalgos.TDCheetahPeakRecord` - provides access to the Cheetah peak record.
@version $Id: TDFileContainer.py 11634 2016-04-05 18:47:50Z dubrovin@SLAC.STANFORD.EDU $
@author Mikhail S. Dubrovin
"""
#------------------------------
__version__ = "$Revision: 11634 $"
# $Source$
##-----------------------------
import os
#import sys
from time import time
from pyimgalgos.TDGroup import TDGroup
from pyimgalgos.TDPeakRecord import TDPeakRecord
##-----------------------------
##-----------------------------
class TDFileContainer :
""" Load and hold record list from file and provide access by group index
"""
def __init__(self, fname, indhdr='Evnum', objtype=TDPeakRecord, pbits=0) :
"""Constructor
Args:
fname (str) - text table data file name
indhdr (str) - header of the field used for group indexing
objtype (TD*Recor) - object type used for data record processing/access
pbits (int) - print control bit-word; pbits & 256 - tracking
"""
if pbits & 256 : print 'c-tor of class %s' % self.__class__.__name__
self.indhdr = indhdr
self.objtype = objtype
self.pbits = pbits
self.hdr = None
self.grnum = -1
#self.lst_of_recs = [] # list of recs loaded from record data file
self.lst_grnum = [] # list of group numbers in the data file
self.lst_begin = [] # list of record indexes in the lst_of_recs
self.lst_nrecords = [] # list of numbor of records in group
self._load_recs_from_file(fname)
self._group_indexing()
self._reset_indexes()
##-----------------------------
def _reset_indexes(self) :
""" resets indexes for iterator
"""
self.first_iteration = True
self.grnum_curr = self.lst_grnum[0] # reset current group after indexing
self.indlst_curr = 0 # reset current index of internal lists
##-----------------------------
def __del__(self) :
"""d-tor
"""
if self.pbits & 256 : print 'd-tor of class %s' % self.__class__.__name__
pass
##-----------------------------
def __call__(self) :
""" Alias to group_num_iterator()
"""
self.group_num_iterator()
##-----------------------------
def print_content(self, nlines=None) :
""" Prints content of the file-container; by default-entire file.
"""
if self.pbits & 256 : print """default method of class %s""" % self.__class__.__name__
print '\n', 120*'_', '\n%s holds data from file:\n %s\n' % (self.__class__.__name__, self.fname)
for i,rec in enumerate(self.lst_of_recs) :
if nlines is not None and i>nlines : break
print rec,
print 'etc.' if nlines is not None else 'End of file'
##-----------------------------
def print_attrs(self) :
print 'Attributes of the class %s object' % self.__class__.__name__
print ' fname : %s' % self.fname,\
'\n pbits : %d' % self.pbits,\
'\n hdr : %s' % self.hdr,\
'\n nrecs : %d' % len(self.lst_of_recs),\
'\n Auto-defined grnum index in the record data : %d' % self.igrnum
##-----------------------------
def _load_recs_from_file(self, fname) :
if not os.path.lexists(fname) : raise IOError('File %s is not found' % fname)
self.fname = fname
t0_sec = time()
f=open(fname,'r')
self.lst_of_recs = []
for rec in f : self.lst_of_recs.append(rec.replace(',',' '))
f.close()
if self.pbits & 256 : print 'File loading time %.3f sec' % (time()-t0_sec)
##-----------------------------
def _load_recs_from_file_v0(self, fname) :
if not os.path.lexists(fname) : raise IOError('File %s is not found' % fname)
self.fname = fname
t0_sec = time()
f=open(fname,'r')
self.lst_of_recs = f.readlines()
f.close()
if self.pbits & 256 : print 'File loading time %.3f sec' % (time()-t0_sec)
##-----------------------------
def _part_rec_parser(self, rec) :
""" 1. saves the 1st header in self.hdr, return None for header
2. defines index of the field self.indhdr (='Evnum')
3. returns None for empty recs (if any)
4. returns group number found in the record data
"""
if len(rec)==1 : return None # ignore empty records
if rec[0]=='#' : # rec is header or comment
if self.hdr is None :
if not (self.indhdr in rec) : return None
self.hdr = rec.lstrip('#').rstrip('\n')
self.igrnum = self.hdr.split().index(self.indhdr)
if self.pbits & 256 : print 'self.igrnum', self.igrnum
return None
# partly split data fields and return group number
fields = rec.split(None,self.igrnum+1)
return int(fields[self.igrnum])
##-----------------------------
def _group_indexing(self) :
"""loops over list of records, makes lists for indexing
"""
if self.pbits & 256 : print '_group_indexing'
t0_sec = time()
self.count = 0
for ind, rec in enumerate(self.lst_of_recs) :
grnum = self._part_rec_parser(rec)
if grnum is None : continue # in case of comments and empty recs
# check if record is from the next group and add it to the list
if grnum != self.grnum :
if not (self.grnum < 0) : # skip 1st record
self.lst_nrecords.append(self.count)
self.count = 1
self.grnum = grnum
self.lst_grnum.append(grnum)
self.lst_begin.append(ind)
#print 'New group number: %d' % grnum
else :
self.count += 1
#==== TEST ======
#if ind>100 : break
#print rec
#================
self.lst_nrecords.append(self.count) # add for last record
if self.pbits & 256 :
print 'Last group %d contains %d records' % (self.grnum, self.count)
print 'Group indexing time %.3f sec' % (time()-t0_sec)
##-----------------------------
# This is time consuming operation
# def list_of_groups(self) :
# """returns list of group objects
# """
# self._reset_indexes()
# return [self.next() for grnum in self.lst_grnum]
#
##-----------------------------
def group_numbers(self) :
"""returns list of group numbers in the file
"""
return self.lst_grnum
##-----------------------------
def group_num_iterator(self) :
"""resets indexes to the beginning of arrays and returns list of group numbers
"""
self._reset_indexes()
return self.lst_grnum
##-----------------------------
def number_of_groups(self) :
"""returns number of groups in file
"""
return len(self.lst_grnum)
##-----------------------------
def current_group_number(self) :
"""returns current group number
"""
return self.grnum_curr
##-----------------------------
def header(self) :
"""returns string header
"""
return self.hdr
##-----------------------------
def _group_for_index(self) :
"""returns group for specified range of indexes
"""
self.grnum_curr = self.lst_grnum [self.indlst_curr]
begin = self.lst_begin [self.indlst_curr]
nrecords = self.lst_nrecords[self.indlst_curr]
if self.pbits & 256 :
print 'grnum_curr=%d indlst_curr=%d begin=%d nrecords=%d' %\
(self.grnum_curr, self.indlst_curr, begin, nrecords)
evt_recs = self.lst_of_recs[begin:begin+nrecords]
#print '%s\nList of records for group %d' % (80*'_', self.grnum)
#for rec in recs : print rec
return TDGroup(evt_recs, self.objtype, pbits=self.pbits)
##-----------------------------
def group(self, grnum=None) :
"""returns current or specified group
"""
if self.pbits & 256 : print 'group(evnum=%s)' % str(grnum)
if grnum is not None :
if not (grnum in self.lst_grnum) : return None
self.indlst_curr = self.lst_grnum.index(grnum)
return self._group_for_index()
##-----------------------------
def next(self) :
"""returns next group
"""
if self.pbits & 256 : print 'next group'
if self.first_iteration :
self.first_iteration = False
return self._group_for_index() # do not increment indexes on first iteration
if self.indlst_curr < len(self.lst_grnum)-1 :
self.indlst_curr += 1
return self._group_for_index()
else :
if self.pbits : print 'WARNING: %s.next() reached the end of the list, return None'%\
self.__class__.__name__
return None
##-----------------------------
def previous(self) :
"""returns previous group
"""
if self.pbits & 256 : print 'previous group'
if self.first_iteration :
self.first_iteration = False
return self._group_for_index() # do not decrement indexes on first iteration
if self.indlst_curr > 0 :
self.indlst_curr -= 1
return self._group_for_index()
else :
if self.pbits : print 'WARNING: %s.previous() reached the beginning of the list, return None'%\
self.__class__.__name__
return None
##-----------------------------
##-----------------------------
## Aliases for depricated names
##-----------------------------
##-----------------------------
def event_numbers(self) :
"""Depricated, see group_numbers()"""
return self.group_numbers()
##-----------------------------
def evnum_iterator(self) :
"""Depricated, see group_num_iterator()"""
return self.group_num_iterator()
##-----------------------------
def number_of_events(self) :
"""Depricated, see number_of_groups()"""
return self.number_of_groups()
##-----------------------------
def current_event_number(self) :
"""Depricated, see current_group_number()"""
return self.current_group_number()
##-----------------------------
def event(self, evnum=None) :
"""Depricated, see group(evnum)"""
return self.group(grnum=evnum)
##-----------------------------
##-----------------------------
##-----------------------------
##-----------------------------
##-----------------------------
[docs]def do_work() :
""" Test
"""
fname = '/reg/neh/home1/dubrovin/LCLS/rel-mengning/work/pfv2-cxif5315-r0169-2015-09-14T14:28:04.txt'
fc = TDFileContainer(fname, indhdr='Evnum', objtype=TDPeakRecord, pbits=0)
fc.print_attrs()
fc()
# Direct access to TDGroup object
group = fc.group(8)
group.print_attrs()
t0_sec = time()
for grpnum in fc.group_num_iterator() :
group = fc.next()
print '%s\nGroup %d ' % (80*'_', grpnum)
for record in group() :
print ' ',
record.print_short()
for i, peak in enumerate(group()) :
print ' peak#%2d bkgd=%5.1f rms=%5.1f S/N=%5.1f' % (i, peak.bkgd, peak.rms, peak.son)
print '\nTime to iterate using next() %.3f sec' % (time()-t0_sec)
#t0_sec = time()
#groups = fc.list_of_groups()
#print 'Time to generate list of group objects %.3f sec' % (time()-t0_sec)
##-----------------------------
if __name__ == "__main__" :
do_work()
print('Test is completed')
#sys.exit('Processing is completed')
##-----------------------------