IData/include/Dataset.h

Go to the documentation of this file.
00001 #ifndef IDATA_DATASET_H
00002 #define IDATA_DATASET_H
00003 
00004 //--------------------------------------------------------------------------
00005 // File and Version Information:
00006 //      $Id: Dataset.h 10652 2015-09-09 20:12:56Z davidsch@SLAC.STANFORD.EDU $
00007 //
00008 // Description:
00009 //      Class Dataset.
00010 //
00011 //------------------------------------------------------------------------
00012 
00013 //-----------------
00014 // C/C++ Headers --
00015 //-----------------
00016 #include <string>
00017 #include <map>
00018 #include <vector>
00019 #include <utility>
00020 
00021 //----------------------
00022 // Base Class Headers --
00023 //----------------------
00024 
00025 //-------------------------------
00026 // Collaborating Class Headers --
00027 //-------------------------------
00028 
00029 //------------------------------------
00030 // Collaborating Class Declarations --
00031 //------------------------------------
00032 
00033 //              ---------------------
00034 //              -- Class Interface --
00035 //              ---------------------
00036 
00037 namespace IData {
00038 
00039 /// @addtogroup IData
00040 
00041 /**
00042  *  @ingroup IData
00043  *
00044  *  @brief C++ class representing dataset concept.
00045  *
00046  *  Dataset is defined currently as a collection of one or more runs
00047  *  from the same experiment. The minimum information that dataset
00048  *  needs to contain is experiment name/number and run numbers.
00049  *  In addition to that it provides additional options for locating
00050  *  data files or selecting specific format (HDF5 vs XTC) of data.
00051  *
00052  *  Some option (like experiment name) can be specified at the
00053  *  application-wide basis by using static methods of this class.
00054  *  Dataset instances can override those global options by providing
00055  *  their own values in a constructor.
00056  *
00057  *  Constructor of Dataset class takes either a file name or a string 
00058  *  representation of the dataset which is a list of options separated 
00059  *  by colon (:) characters:
00060  *
00061  *    @code
00062  *    option[:option[:option...]]
00063  *    @endcode
00064  *
00065  *  Each @c option can be a key-value pair separated by equal sign
00066  *  or just a key without value:
00067  *
00068  *    @code
00069  *    key1=value1:key2=value2:key3:key4
00070  *    @endcode
00071  *
00072  *  Here is the set of common key names and meaning of their corresponding
00073  *  values:
00074  *
00075  *  @li @b exp - specifies experiment, corresponding value can be either
00076  *      experiment number (exp=197), experiment name (exp=cxi59712), or
00077  *      instrument and experiment names separated by slash (exp=CXI/cxi59712)
00078  *  @li @b run - specifies set of runs belonging to dataset which is a
00079  *      comma-separated list of run ranges, range can be a single run number
00080  *      or two number separated by dash (run=12,15-18,25)
00081  *  @li @b xtc - selects XTC files as input  no value for this key
00082  *  @li @b smd - selects small XTC files as input (which is default), no value for this key
00083  *  @li @b idx - selects index files as input,  no value for this key
00084  *  @li @b h5 - selects HDF5 files as input, no value for this key
00085  *  @li @b live - selects live XTC files as input, no value for this key
00086  *  @li @b dir - specifies directory containing input files, by default
00087  *      files are located in the standard experiment directories under
00088  *      /reg/d/psdm directory
00089  *  @li @b one-stream - this option works with XTC or sml input only, if the option
00090  *      is given a value (number) then it specifies stream number to read,
00091  *      without value arbitrary single stream will be selected. If option
00092  *      is not specified then data from all stream are read.
00093  *  @li @b stream - specifies set of streams belonging to dataset which is a
00094  *      comma-separated list of stream ranges, range can be a single stream number
00095  *      or two number separated by dash (stream=0,2-4,11)
00096  *
00097  *  If the same key appears multiple times in the input string the latter
00098  *  values for this key override earlier values.
00099  *
00100  *  If the string passed to a constructor looks like a file name then dataset 
00101  *  constructor tries to guess various pieces of information from the file name
00102  *  itself. To look like a file name the string should either:
00103  *  - do not contain colon character and contain at least one dot
00104  *  - if colon character is in the string it follows by slash or digit (for 
00105  *    something like root://host:port/path)
00106  *
00107  *  <hr>
00108  *
00109  *  This software was developed for the LCLS project.  If you use all or 
00110  *  part of it, please give an appropriate acknowledgment.
00111  *
00112  *  @version $Id: Dataset.h 10652 2015-09-09 20:12:56Z davidsch@SLAC.STANFORD.EDU $
00113  *
00114  *  @author Andy Salnikov
00115  */
00116 
00117 class Dataset  {
00118 public:
00119 
00120   typedef std::pair<unsigned, unsigned> RunRange;
00121   typedef std::vector<RunRange> Runs;
00122   typedef std::pair<unsigned, unsigned> StreamRange;
00123   typedef std::vector<StreamRange> Streams;
00124   typedef std::vector<std::string> NameList;
00125 
00126   /**
00127    *  @brief Sets application-wide experiment name.
00128    *
00129    *  Experiment name can be specified with the syntax acceptable for exp key.
00130    *  Individual datasets can override application-wide value.
00131    *
00132    *  @param[in] exp  new application-wide experiment name
00133    *
00134    *  @throw ExpNameException thrown if specified name is not found
00135    */
00136   static void setAppExpName(const std::string& exp);
00137 
00138   /**
00139    *  @brief Sets default application-wide option.
00140    *
00141    *  Sets default application-wide value for an option. Individual datasets can override
00142    *  application-wide values. The key "run" is ignored by this method, warning
00143    *  message is printed. With key "exp" this is equivalent to calling setAppExpName(value).
00144    *
00145    *  @param[in] key   Key name
00146    *  @param[in] value New application-wide value for this key
00147    *
00148    *  @throw ExpNameException thrown if key is "exp" and specified experiment name is not found
00149    */
00150   static void setDefOption(const std::string& key, const std::string& value);
00151 
00152   /// Default constructor makes empty dataset
00153   Dataset();
00154 
00155   /**
00156    *  @brief Make dataset instance
00157    *
00158    *  Constructor takes string representation of a dataset as described in
00159    *  class-level documentation. Options specified in the string override
00160    *  default application-wide options.
00161    *
00162    *  @param[in] ds  String representation of dataset.
00163    *
00164    *  @throw ExpNameException thrown if specified name is not found
00165    *  @throw RunNumberException thrown if specified run list has incorrect format.
00166    */
00167   Dataset(const std::string& ds);
00168 
00169   // Destructor
00170   ~Dataset();
00171 
00172   /**
00173    *  @brief Returns true if the key is defined.
00174    *
00175    *  Key may be defined by either constructor or with a default
00176    *  application-wide option.
00177    *
00178    *  @param[in] key  Key name
00179    */
00180   bool exists(const std::string& key) const;
00181 
00182   /**
00183    *  @brief Returns value for given key or empty string.
00184    *
00185    *  @param[in] key  Key name
00186    */
00187   const std::string& value(const std::string& key) const;
00188 
00189   /// Returns experiment ID or 0 if it has not been defined.
00190   unsigned expID() const;
00191 
00192   /// Returns instrument name or empty string if it has not been defined.
00193   const std::string& instrument() const;
00194 
00195   /// Returns experiment name or empty string if it has not been defined.
00196   const std::string& experiment() const;
00197 
00198   /// Returns set of run numbers
00199   const Runs& runs() const;
00200 
00201   /// Returns set of stream numbers
00202   const Streams& streams() const;
00203 
00204   /// Returns set of run numbers
00205   std::vector<unsigned> runsAsList() const;
00206   
00207   /// Returns set of stream numbers
00208   std::vector<unsigned> streamsAsList() const;
00209 
00210   /// Returns true if dataset was specified as a single file name
00211   bool isFile() const { return m_isFile; }
00212   
00213   /// Return the directory name for files, if "dir" option is specified 
00214   /// the it is returned, otherwise some default lcoation for experiment 
00215   /// files is returned.
00216   std::string dirName() const;
00217 
00218   /// Return the list of file names for this dataset
00219   const NameList& files() const;
00220   
00221 protected:
00222 
00223   // parse XTC file name
00224   void parseXtcFileName(std::string path);
00225   
00226   // parse HDF file name
00227   void parseHdfFileName(std::string path);
00228   
00229 private:
00230 
00231   typedef std::map<std::string, std::string> Key2Val;
00232 
00233   bool m_isFile;             ///< True if dataset is a file name 
00234   Key2Val m_key2val;         ///< Mapping of keys to values
00235   Runs m_runs;               ///< Set of runs
00236   Streams m_streams;         ///< Set of streams
00237   unsigned m_expId;          ///< Experiment ID
00238   std::string m_instrName;   ///< Instrument name
00239   std::string m_expName;     ///< Experiment name
00240   mutable NameList m_files;  ///< List of file names for this dataset
00241 
00242   static Key2Val s_key2val;         ///< Application-wide options
00243   static unsigned s_expId;          ///< Application-wide experiment ID
00244   static std::string s_instrName;   ///< Application-wide instrument name
00245   static std::string s_expName;     ///< Application-wide experiment name
00246 
00247 };
00248 
00249 } // namespace IData
00250 
00251 #endif // IDATA_DATASET_H

Generated on 19 Dec 2016 for PSANAclasses by  doxygen 1.4.7