version  0.0.1
Defines the C++ API for MsPASS
ProcessingHistory.h
1 #ifndef _PROCESSING_HISTORY_H_
2 #define _PROCESSING_HISTORY_H_
3 #include <string>
4 #include <list>
5 #include <vector>
6 #include <map>
7 #include <boost/serialization/map.hpp>
8 #include <boost/serialization/serialization.hpp>
9 #include <boost/uuid/uuid.hpp>
10 #include <boost/uuid/uuid_generators.hpp>
11 #include <boost/uuid/uuid_io.hpp>
12 #include <boost/uuid/uuid_serialize.hpp>
13 #include <boost/archive/text_iarchive.hpp>
14 #include <boost/archive/text_oarchive.hpp>
15 #include "mspass/utility/ErrorLogger.h"
16 //#include "mspass/seismic/Ensemble.h"
17 namespace mspass{
18 namespace utility{
24 enum class ProcessingStatus
25 {
26  RAW,
27  ORIGIN,
28  VOLATILE,
29  SAVED,
30  UNDEFINED
31 };
40 enum class AtomicType
41 {
42  SEISMOGRAM,
43  TIMESERIES,
44  UNDEFINED
45 };
65 const std::string SAVED_ID_KEY("NODEDATA_AT_SAVE");
66 
67 
70 {
71 public:
73  {
74  jid=std::string();
75  jnm=std::string();
76  };
77  virtual ~BasicProcessingHistory(){};
78  BasicProcessingHistory(const std::string jobname,const std::string jobid)
79  {
80  jid=jobid;
81  jnm=jobname;
82  };
84  {
85  jid=parent.jid;
86  jnm=parent.jnm;
87  };
88 
96  virtual size_t number_of_stages(){return 0;};
97  std::string jobid() const
98  {
99  return jid;
100  };
101  void set_jobid(const std::string& newjid)
102  {
103  jid=newjid;
104  };
105  std::string jobname() const
106  {
107  return jnm;
108  };
109  void set_jobname(const std::string jobname)
110  {
111  jnm=jobname;
112  };
113  BasicProcessingHistory& operator=(const BasicProcessingHistory& parent)
114  {
115  if(this!=(&parent))
116  {
117  jnm=parent.jnm;
118  jid=parent.jid;
119  }
120  return *this;
121  }
122 protected:
123  std::string jid;
124  std::string jnm;
125 private:
126  friend boost::serialization::access;
127  template<class Archive>
128  void serialize(Archive& ar,const unsigned int version)
129  {
130  ar & jid;
131  ar & jnm;
132  };
133 };
142 class NodeData
143 {
144 public:
146  mspass::utility::ProcessingStatus status;
148  std::string uuid;
150  mspass::utility::AtomicType type;
152  int stage;
161  std::string algorithm;
171  std::string algid;
172  /* These standard elements could be defaulted, but we implement them
173  explicitly for clarity - implemented in the cc file. */
174  NodeData();
175  NodeData(const NodeData& parent);
176  NodeData& operator=(const NodeData& parent);
177  bool operator==(const NodeData& other);
178  bool operator!=(const NodeData& other);
179 private:
180  friend boost::serialization::access;
181  template<class Archive>
182  void serialize(Archive& ar,const unsigned int version)
183  {
184  ar & status;
185  ar & uuid;
186  ar & type;
187  ar & stage;
188  ar & algorithm;
189  ar & algid;
190  };
191 };
258 {
259 public:
260  ErrorLogger elog;
268  ProcessingHistory(const std::string jobnm,const std::string jid);
270  ProcessingHistory(const ProcessingHistory& parent);
278  bool is_empty() const;
280  bool is_raw()const;
282  bool is_origin() const;
284  bool is_volatile() const;
286  bool is_saved() const;
303  size_t number_of_stages() override;
350  void set_as_origin(const std::string alg,const std::string algid,
351  const std::string uuid,const AtomicType typ, bool define_as_raw=false);
399  std::string new_ensemble_process(const std::string alg,const std::string algid,
400  const AtomicType typ,const std::vector<ProcessingHistory*> parents,
401  const bool create_newid=true);
418  void add_one_input(const ProcessingHistory& data_to_add);
427  void add_many_inputs(const std::vector<ProcessingHistory*>& d);
428 
434  void merge(const ProcessingHistory& data_to_add);
453  void accumulate(const std::string alg,const std::string algid,
454  const AtomicType typ,const ProcessingHistory& newinput);
480  std::string clean_accumulate_uuids();
504  std::string new_map(const std::string alg,const std::string algid,
505  const AtomicType typ,
506  const ProcessingStatus newstatus=ProcessingStatus::VOLATILE);
535  std::string new_map(const std::string alg,const std::string algid,
536  const AtomicType typ,
537  const ProcessingHistory& data_to_clone,
538  const ProcessingStatus newstatus=ProcessingStatus::VOLATILE);
539 
575  std::string map_as_saved(const std::string alg,const std::string algid,
576  const AtomicType typ);
578  void clear();
585  std::multimap<std::string,mspass::utility::NodeData> get_nodes() const;
586 
595  int stage() const
596  {
597  return current_stage;
598  };
600  ProcessingStatus status() const
601  {
602  return current_status;
603  };
609  std::string id() const
610  {
611  return current_id;
612  };
614  std::pair<std::string,std::string> created_by() const
615  {
616  std::pair<std::string,std::string> result(algorithm,algid);
617  return result;
618  }
625  NodeData current_nodedata() const;
632  std::string newid();
638  int number_inputs()const;
647  int number_inputs(const std::string uuidstr)const;
648 
657  void set_id(const std::string newid);
658 
671  std::list<mspass::utility::NodeData> inputs(const std::string id_to_find) const;
672 
675 /* We make this protected to simplify expected extensions. In particular,
676 the process of reconstructing history is a complicated process we don't
677 want to add as baggage to regular data. Hence, tools to reconstruct history
678 (provenance) are expected to extend this class. */
679 protected:
680  /* This map defines connections of each data object to others. Key is the
681  uuid of a given object and the values (second) associated with
682  that key are the inputs used to create the data defined by the key uuid */
683  std::multimap<std::string,mspass::utility::NodeData> nodes;
684 private:
685  /* This set of private variables are the values of attributes for
686  the same concepts in the NodeData struct/class. We break them out as
687  single variables because they are not always set lumped together. Hence
688  there are also separate getters and setters for each. */
689  ProcessingStatus current_status;
690  /* uuid of current data object */
691  std::string current_id;
692  int current_stage;
693  AtomicType mytype;
694  std::string algorithm;
695  std::string algid;
696 
697 
698  friend boost::serialization::access;
699  template<class Archive>
700  void serialize(Archive& ar,const unsigned int version)
701  {
702  ar & boost::serialization::base_object<BasicProcessingHistory>(*this);
703  ar & nodes;
704  ar & current_status;
705  ar & current_id;
706  ar & current_stage;
707  ar & mytype;
708  ar & algorithm;
709  ar & algid;
710  ar & elog;
711  };
712 };
713 /* function prototypes of helpers */
714 
730 template <typename Tdata>
731  void append_input(const Tdata& d, ProcessingHistory& his)
732 {
733  if(d.live())
734  {
735  const ProcessingHistory *ptr=dynamic_cast<const ProcessingHistory*>(&d);
736  his.add_one_input(*ptr);
737  }
738 };
739 /* this pair of functions did things that were methods in an earlier
740 prototype. What they do is still useful but making them functions
741 reduced the baggage in the ProcessingHistory class. */
742 
756 std::list<std::tuple<int,std::string,std::string>>
757  algorithm_history(const ProcessingHistory& h);
777 std::list<std::string> algorithm_outputs(const ProcessingHistory& h,
778  const std::string alg, const std::string algid);
779 } // end utility namespace
780 } // End mspass namespace
781 #endif
Definition: ProcessingHistory.h:70
virtual size_t number_of_stages()
Definition: ProcessingHistory.h:96
Container to hold error logs for a data object.
Definition: ErrorLogger.h:61
Holds properties of data used as input to algorithm that created this object.
Definition: ProcessingHistory.h:143
std::string algorithm
Name of algorithm algorithm applied at this stage.
Definition: ProcessingHistory.h:161
std::string uuid
Definition: ProcessingHistory.h:148
mspass::utility::ProcessingStatus status
Definition: ProcessingHistory.h:146
int stage
Definition: ProcessingHistory.h:152
std::string algid
Definition: ProcessingHistory.h:171
mspass::utility::AtomicType type
Definition: ProcessingHistory.h:150
Lightweight class to preserve procesing chain of atomic objects.
Definition: ProcessingHistory.h:258
size_t number_of_stages() override
Return number of processing stages that have been applied to this object.
Definition: ProcessingHistory.cc:148
void set_as_origin(const std::string alg, const std::string algid, const std::string uuid, const AtomicType typ, bool define_as_raw=false)
Definition: ProcessingHistory.cc:161
NodeData current_nodedata() const
Definition: ProcessingHistory.cc:712
ProcessingStatus status() const
Definition: ProcessingHistory.h:600
ProcessingHistory(const std::string jobnm, const std::string jid)
bool is_saved() const
Definition: ProcessingHistory.cc:141
void clear()
Definition: ProcessingHistory.cc:677
bool is_raw() const
Definition: ProcessingHistory.cc:120
bool is_origin() const
Definition: ProcessingHistory.cc:127
void merge(const ProcessingHistory &data_to_add)
Merge the history nodes from another.
Definition: ProcessingHistory.cc:463
std::pair< std::string, std::string > created_by() const
Definition: ProcessingHistory.h:614
std::multimap< std::string, mspass::utility::NodeData > get_nodes() const
Definition: ProcessingHistory.cc:657
void add_many_inputs(const std::vector< ProcessingHistory * > &d)
Define several data objects as inputs.
Definition: ProcessingHistory.cc:334
std::string new_map(const std::string alg, const std::string algid, const AtomicType typ, const ProcessingStatus newstatus=ProcessingStatus::VOLATILE)
Define this algorithm as a one-to-one map of same type data.
ProcessingHistory()
Definition: ProcessingHistory.cc:86
int number_inputs() const
Definition: ProcessingHistory.cc:696
bool is_volatile() const
Definition: ProcessingHistory.cc:134
bool is_empty() const
Definition: ProcessingHistory.cc:114
int number_inputs(const std::string uuidstr) const
std::string new_ensemble_process(const std::string alg, const std::string algid, const AtomicType typ, const std::vector< ProcessingHistory * > parents, const bool create_newid=true)
Definition: ProcessingHistory.cc:187
std::string newid()
Definition: ProcessingHistory.cc:700
std::string map_as_saved(const std::string alg, const std::string algid, const AtomicType typ)
Prepare the current data for saving.
Definition: ProcessingHistory.cc:420
ProcessingHistory & operator=(const ProcessingHistory &parent)
Definition: ProcessingHistory.cc:741
void add_one_input(const ProcessingHistory &data_to_add)
Add one datum as an input for current data.
Definition: ProcessingHistory.cc:285
std::string id() const
Definition: ProcessingHistory.h:609
std::string new_map(const std::string alg, const std::string algid, const AtomicType typ, const ProcessingHistory &data_to_clone, const ProcessingStatus newstatus=ProcessingStatus::VOLATILE)
Define this algorithm as a one-to-one map.
void set_id(const std::string newid)
Definition: ProcessingHistory.cc:708
std::list< mspass::utility::NodeData > inputs(const std::string id_to_find) const
Return a list of data that define the inputs to a give uuids.
Definition: ProcessingHistory.cc:724
std::string clean_accumulate_uuids()
Clean up inconsistent uuids that can be produced by reduce.
Definition: ProcessingHistory.cc:584
void accumulate(const std::string alg, const std::string algid, const AtomicType typ, const ProcessingHistory &newinput)
Method to use with a spark reduce algorithm.
Definition: ProcessingHistory.cc:509
int stage() const
Definition: ProcessingHistory.h:595