00001 #ifndef IDATA_DATASET_H 00002 #define IDATA_DATASET_H 00003 00004 //-------------------------------------------------------------------------- 00005 // File and Version Information: 00006 // $Id: Dataset.h 10652 2015-09-09 20:12:56Z davidsch@SLAC.STANFORD.EDU $ 00007 // 00008 // Description: 00009 // Class Dataset. 00010 // 00011 //------------------------------------------------------------------------ 00012 00013 //----------------- 00014 // C/C++ Headers -- 00015 //----------------- 00016 #include <string> 00017 #include <map> 00018 #include <vector> 00019 #include <utility> 00020 00021 //---------------------- 00022 // Base Class Headers -- 00023 //---------------------- 00024 00025 //------------------------------- 00026 // Collaborating Class Headers -- 00027 //------------------------------- 00028 00029 //------------------------------------ 00030 // Collaborating Class Declarations -- 00031 //------------------------------------ 00032 00033 // --------------------- 00034 // -- Class Interface -- 00035 // --------------------- 00036 00037 namespace IData { 00038 00039 /// @addtogroup IData 00040 00041 /** 00042 * @ingroup IData 00043 * 00044 * @brief C++ class representing dataset concept. 00045 * 00046 * Dataset is defined currently as a collection of one or more runs 00047 * from the same experiment. The minimum information that dataset 00048 * needs to contain is experiment name/number and run numbers. 00049 * In addition to that it provides additional options for locating 00050 * data files or selecting specific format (HDF5 vs XTC) of data. 00051 * 00052 * Some option (like experiment name) can be specified at the 00053 * application-wide basis by using static methods of this class. 00054 * Dataset instances can override those global options by providing 00055 * their own values in a constructor. 00056 * 00057 * Constructor of Dataset class takes either a file name or a string 00058 * representation of the dataset which is a list of options separated 00059 * by colon (:) characters: 00060 * 00061 * @code 00062 * option[:option[:option...]] 00063 * @endcode 00064 * 00065 * Each @c option can be a key-value pair separated by equal sign 00066 * or just a key without value: 00067 * 00068 * @code 00069 * key1=value1:key2=value2:key3:key4 00070 * @endcode 00071 * 00072 * Here is the set of common key names and meaning of their corresponding 00073 * values: 00074 * 00075 * @li @b exp - specifies experiment, corresponding value can be either 00076 * experiment number (exp=197), experiment name (exp=cxi59712), or 00077 * instrument and experiment names separated by slash (exp=CXI/cxi59712) 00078 * @li @b run - specifies set of runs belonging to dataset which is a 00079 * comma-separated list of run ranges, range can be a single run number 00080 * or two number separated by dash (run=12,15-18,25) 00081 * @li @b xtc - selects XTC files as input no value for this key 00082 * @li @b smd - selects small XTC files as input (which is default), no value for this key 00083 * @li @b idx - selects index files as input, no value for this key 00084 * @li @b h5 - selects HDF5 files as input, no value for this key 00085 * @li @b live - selects live XTC files as input, no value for this key 00086 * @li @b dir - specifies directory containing input files, by default 00087 * files are located in the standard experiment directories under 00088 * /reg/d/psdm directory 00089 * @li @b one-stream - this option works with XTC or sml input only, if the option 00090 * is given a value (number) then it specifies stream number to read, 00091 * without value arbitrary single stream will be selected. If option 00092 * is not specified then data from all stream are read. 00093 * @li @b stream - specifies set of streams belonging to dataset which is a 00094 * comma-separated list of stream ranges, range can be a single stream number 00095 * or two number separated by dash (stream=0,2-4,11) 00096 * 00097 * If the same key appears multiple times in the input string the latter 00098 * values for this key override earlier values. 00099 * 00100 * If the string passed to a constructor looks like a file name then dataset 00101 * constructor tries to guess various pieces of information from the file name 00102 * itself. To look like a file name the string should either: 00103 * - do not contain colon character and contain at least one dot 00104 * - if colon character is in the string it follows by slash or digit (for 00105 * something like root://host:port/path) 00106 * 00107 * <hr> 00108 * 00109 * This software was developed for the LCLS project. If you use all or 00110 * part of it, please give an appropriate acknowledgment. 00111 * 00112 * @version $Id: Dataset.h 10652 2015-09-09 20:12:56Z davidsch@SLAC.STANFORD.EDU $ 00113 * 00114 * @author Andy Salnikov 00115 */ 00116 00117 class Dataset { 00118 public: 00119 00120 typedef std::pair<unsigned, unsigned> RunRange; 00121 typedef std::vector<RunRange> Runs; 00122 typedef std::pair<unsigned, unsigned> StreamRange; 00123 typedef std::vector<StreamRange> Streams; 00124 typedef std::vector<std::string> NameList; 00125 00126 /** 00127 * @brief Sets application-wide experiment name. 00128 * 00129 * Experiment name can be specified with the syntax acceptable for exp key. 00130 * Individual datasets can override application-wide value. 00131 * 00132 * @param[in] exp new application-wide experiment name 00133 * 00134 * @throw ExpNameException thrown if specified name is not found 00135 */ 00136 static void setAppExpName(const std::string& exp); 00137 00138 /** 00139 * @brief Sets default application-wide option. 00140 * 00141 * Sets default application-wide value for an option. Individual datasets can override 00142 * application-wide values. The key "run" is ignored by this method, warning 00143 * message is printed. With key "exp" this is equivalent to calling setAppExpName(value). 00144 * 00145 * @param[in] key Key name 00146 * @param[in] value New application-wide value for this key 00147 * 00148 * @throw ExpNameException thrown if key is "exp" and specified experiment name is not found 00149 */ 00150 static void setDefOption(const std::string& key, const std::string& value); 00151 00152 /// Default constructor makes empty dataset 00153 Dataset(); 00154 00155 /** 00156 * @brief Make dataset instance 00157 * 00158 * Constructor takes string representation of a dataset as described in 00159 * class-level documentation. Options specified in the string override 00160 * default application-wide options. 00161 * 00162 * @param[in] ds String representation of dataset. 00163 * 00164 * @throw ExpNameException thrown if specified name is not found 00165 * @throw RunNumberException thrown if specified run list has incorrect format. 00166 */ 00167 Dataset(const std::string& ds); 00168 00169 // Destructor 00170 ~Dataset(); 00171 00172 /** 00173 * @brief Returns true if the key is defined. 00174 * 00175 * Key may be defined by either constructor or with a default 00176 * application-wide option. 00177 * 00178 * @param[in] key Key name 00179 */ 00180 bool exists(const std::string& key) const; 00181 00182 /** 00183 * @brief Returns value for given key or empty string. 00184 * 00185 * @param[in] key Key name 00186 */ 00187 const std::string& value(const std::string& key) const; 00188 00189 /// Returns experiment ID or 0 if it has not been defined. 00190 unsigned expID() const; 00191 00192 /// Returns instrument name or empty string if it has not been defined. 00193 const std::string& instrument() const; 00194 00195 /// Returns experiment name or empty string if it has not been defined. 00196 const std::string& experiment() const; 00197 00198 /// Returns set of run numbers 00199 const Runs& runs() const; 00200 00201 /// Returns set of stream numbers 00202 const Streams& streams() const; 00203 00204 /// Returns set of run numbers 00205 std::vector<unsigned> runsAsList() const; 00206 00207 /// Returns set of stream numbers 00208 std::vector<unsigned> streamsAsList() const; 00209 00210 /// Returns true if dataset was specified as a single file name 00211 bool isFile() const { return m_isFile; } 00212 00213 /// Return the directory name for files, if "dir" option is specified 00214 /// the it is returned, otherwise some default lcoation for experiment 00215 /// files is returned. 00216 std::string dirName() const; 00217 00218 /// Return the list of file names for this dataset 00219 const NameList& files() const; 00220 00221 protected: 00222 00223 // parse XTC file name 00224 void parseXtcFileName(std::string path); 00225 00226 // parse HDF file name 00227 void parseHdfFileName(std::string path); 00228 00229 private: 00230 00231 typedef std::map<std::string, std::string> Key2Val; 00232 00233 bool m_isFile; ///< True if dataset is a file name 00234 Key2Val m_key2val; ///< Mapping of keys to values 00235 Runs m_runs; ///< Set of runs 00236 Streams m_streams; ///< Set of streams 00237 unsigned m_expId; ///< Experiment ID 00238 std::string m_instrName; ///< Instrument name 00239 std::string m_expName; ///< Experiment name 00240 mutable NameList m_files; ///< List of file names for this dataset 00241 00242 static Key2Val s_key2val; ///< Application-wide options 00243 static unsigned s_expId; ///< Application-wide experiment ID 00244 static std::string s_instrName; ///< Application-wide instrument name 00245 static std::string s_expName; ///< Application-wide experiment name 00246 00247 }; 00248 00249 } // namespace IData 00250 00251 #endif // IDATA_DATASET_H