version  0.0.1
Defines the C++ API for MsPASS
Loading...
Searching...
No Matches
ProcessingHistory.h
1#ifndef _PROCESSING_HISTORY_H_
2#define _PROCESSING_HISTORY_H_
3#include "mspass/utility/ErrorLogger.h"
4#include <boost/archive/text_iarchive.hpp>
5#include <boost/archive/text_oarchive.hpp>
6#include <boost/serialization/map.hpp>
7#include <boost/serialization/serialization.hpp>
8#include <boost/uuid/uuid.hpp>
9#include <boost/uuid/uuid_generators.hpp>
10#include <boost/uuid/uuid_io.hpp>
11#include <boost/uuid/uuid_serialize.hpp>
12#include <list>
13#include <map>
14#include <string>
15#include <vector>
16// #include "mspass/seismic/Ensemble.h"
17namespace mspass {
18namespace utility {
24enum class ProcessingStatus { RAW, ORIGIN, VOLATILE, SAVED, UNDEFINED };
33enum class AtomicType { SEISMOGRAM, TIMESERIES, UNDEFINED };
53const std::string SAVED_ID_KEY("NODEDATA_AT_SAVE");
54
57public:
59 jid = std::string();
60 jnm = std::string();
61 };
62 virtual ~BasicProcessingHistory() {};
63 BasicProcessingHistory(const std::string jobname, const std::string jobid) {
64 jid = jobid;
65 jnm = jobname;
66 };
68 jid = parent.jid;
69 jnm = parent.jnm;
70 };
71
79 virtual size_t number_of_stages() { return 0; };
80 std::string jobid() const { return jid; };
81 void set_jobid(const std::string &newjid) { jid = newjid; };
82 std::string jobname() const { return jnm; };
83 void set_jobname(const std::string jobname) { jnm = jobname; };
84 BasicProcessingHistory &operator=(const BasicProcessingHistory &parent) {
85 if (this != (&parent)) {
86 jnm = parent.jnm;
87 jid = parent.jid;
88 }
89 return *this;
90 }
91
92protected:
93 std::string jid;
94 std::string jnm;
95
96private:
97 friend boost::serialization::access;
98 template <class Archive>
99 void serialize(Archive &ar, const unsigned int version) {
100 ar & jid;
101 ar & jnm;
102 };
103};
113class NodeData {
114public:
116 mspass::utility::ProcessingStatus status;
118 std::string uuid;
120 mspass::utility::AtomicType type;
123 int stage;
132 std::string algorithm;
142 std::string algid;
143 /* These standard elements could be defaulted, but we implement them
144 explicitly for clarity - implemented in the cc file. */
145 NodeData();
146 NodeData(const NodeData &parent);
147 NodeData &operator=(const NodeData &parent);
148 bool operator==(const NodeData &other);
149 bool operator!=(const NodeData &other);
150
151private:
152 friend boost::serialization::access;
153 template <class Archive>
154 void serialize(Archive &ar, const unsigned int version) {
155 ar & status;
156 ar & uuid;
157 ar & type;
158 ar & stage;
159 ar & algorithm;
160 ar & algid;
161 };
162};
227public:
228 ErrorLogger elog;
236 ProcessingHistory(const std::string jobnm, const std::string jid);
246 bool is_empty() const;
249 bool is_raw() const;
252 bool is_origin() const;
255 bool is_volatile() const;
258 bool is_saved() const;
276 size_t number_of_stages() override;
323 void set_as_origin(const std::string alg, const std::string algid,
324 const std::string uuid, const AtomicType typ,
325 bool define_as_raw = false);
373 std::string
374 new_ensemble_process(const std::string alg, const std::string algid,
375 const AtomicType typ,
376 const std::vector<ProcessingHistory *> parents,
377 const bool create_newid = true);
394 void add_one_input(const ProcessingHistory &data_to_add);
403 void add_many_inputs(const std::vector<ProcessingHistory *> &d);
404
410 void merge(const ProcessingHistory &data_to_add);
429 void accumulate(const std::string alg, const std::string algid,
430 const AtomicType typ, const ProcessingHistory &newinput);
456 std::string clean_accumulate_uuids();
480 std::string
481 new_map(const std::string alg, const std::string algid, const AtomicType typ,
482 const ProcessingStatus newstatus = ProcessingStatus::VOLATILE);
511 std::string
512 new_map(const std::string alg, const std::string algid, const AtomicType typ,
513 const ProcessingHistory &data_to_clone,
514 const ProcessingStatus newstatus = ProcessingStatus::VOLATILE);
515
551 std::string map_as_saved(const std::string alg, const std::string algid,
552 const AtomicType typ);
554 void clear();
561 std::multimap<std::string, mspass::utility::NodeData> get_nodes() const;
562
571 int stage() const { return current_stage; };
573 ProcessingStatus status() const { return current_status; };
579 std::string id() const { return current_id; };
581 std::pair<std::string, std::string> created_by() const {
582 std::pair<std::string, std::string> result(algorithm, algid);
583 return result;
584 }
598 std::string newid();
604 int number_inputs() const;
613 int number_inputs(const std::string uuidstr) const;
614
623 void set_id(const std::string newid);
624
637 std::list<mspass::utility::NodeData>
638 inputs(const std::string id_to_find) const;
639
642 /* We make this protected to simplify expected extensions. In particular,
643 the process of reconstructing history is a complicated process we don't
644 want to add as baggage to regular data. Hence, tools to reconstruct history
645 (provenance) are expected to extend this class. */
646protected:
647 /* This map defines connections of each data object to others. Key is the
648 uuid of a given object and the values (second) associated with
649 that key are the inputs used to create the data defined by the key uuid */
650 std::multimap<std::string, mspass::utility::NodeData> nodes;
651
652private:
653 /* This set of private variables are the values of attributes for
654 the same concepts in the NodeData struct/class. We break them out as
655 single variables because they are not always set lumped together. Hence
656 there are also separate getters and setters for each. */
657 ProcessingStatus current_status;
658 /* uuid of current data object */
659 std::string current_id;
660 int current_stage;
661 AtomicType mytype;
662 std::string algorithm;
663 std::string algid;
664
665 friend boost::serialization::access;
666 template <class Archive>
667 void serialize(Archive &ar, const unsigned int version) {
668 ar &boost::serialization::base_object<BasicProcessingHistory>(*this);
669 ar & nodes;
670 ar & current_status;
671 ar & current_id;
672 ar & current_stage;
673 ar & mytype;
674 ar & algorithm;
675 ar & algid;
676 ar & elog;
677 };
678};
679/* function prototypes of helpers */
680
696template <typename Tdata>
697void append_input(const Tdata &d, ProcessingHistory &his) {
698 if (d.live()) {
699 const ProcessingHistory *ptr = dynamic_cast<const ProcessingHistory *>(&d);
700 his.add_one_input(*ptr);
701 }
702};
703/* this pair of functions did things that were methods in an earlier
704prototype. What they do is still useful but making them functions
705reduced the baggage in the ProcessingHistory class. */
706
720std::list<std::tuple<int, std::string, std::string>>
721algorithm_history(const ProcessingHistory &h);
741std::list<std::string> algorithm_outputs(const ProcessingHistory &h,
742 const std::string alg,
743 const std::string algid);
744} // namespace utility
745} // namespace mspass
746#endif
Definition ProcessingHistory.h:56
virtual size_t number_of_stages()
Definition ProcessingHistory.h:79
Container to hold error logs for a data object.
Definition ErrorLogger.h:60
Holds properties of data used as input to algorithm that created this object.
Definition ProcessingHistory.h:113
std::string algorithm
Name of algorithm algorithm applied at this stage.
Definition ProcessingHistory.h:132
std::string uuid
Definition ProcessingHistory.h:118
mspass::utility::ProcessingStatus status
Definition ProcessingHistory.h:116
int stage
Definition ProcessingHistory.h:123
std::string algid
Definition ProcessingHistory.h:142
mspass::utility::AtomicType type
Definition ProcessingHistory.h:120
Lightweight class to preserve procesing chain of atomic objects.
Definition ProcessingHistory.h:226
size_t number_of_stages() override
Return number of processing stages that have been applied to this object.
Definition ProcessingHistory.cc:139
void set_as_origin(const std::string alg, const std::string algid, const std::string uuid, const AtomicType typ, bool define_as_raw=false)
Definition ProcessingHistory.cc:149
NodeData current_nodedata() const
Definition ProcessingHistory.cc:659
ProcessingStatus status() const
Definition ProcessingHistory.h:573
ProcessingHistory(const std::string jobnm, const std::string jid)
bool is_saved() const
Definition ProcessingHistory.cc:133
void clear()
Definition ProcessingHistory.cc:631
bool is_raw() const
Definition ProcessingHistory.cc:114
bool is_origin() const
Definition ProcessingHistory.cc:120
void merge(const ProcessingHistory &data_to_add)
Merge the history nodes from another.
Definition ProcessingHistory.cc:439
std::multimap< std::string, mspass::utility::NodeData > get_nodes() const
Definition ProcessingHistory.cc:612
void add_many_inputs(const std::vector< ProcessingHistory * > &d)
Define several data objects as inputs.
Definition ProcessingHistory.cc:304
std::string new_map(const std::string alg, const std::string algid, const AtomicType typ, const ProcessingStatus newstatus=ProcessingStatus::VOLATILE)
Define this algorithm as a one-to-one map of same type data.
ProcessingHistory()
Definition ProcessingHistory.cc:82
int number_inputs() const
Definition ProcessingHistory.cc:648
std::pair< std::string, std::string > created_by() const
Definition ProcessingHistory.h:581
bool is_volatile() const
Definition ProcessingHistory.cc:127
bool is_empty() const
Definition ProcessingHistory.cc:109
int number_inputs(const std::string uuidstr) const
std::string new_ensemble_process(const std::string alg, const std::string algid, const AtomicType typ, const std::vector< ProcessingHistory * > parents, const bool create_newid=true)
Definition ProcessingHistory.cc:172
std::string newid()
Definition ProcessingHistory.cc:651
std::string map_as_saved(const std::string alg, const std::string algid, const AtomicType typ)
Prepare the current data for saving.
Definition ProcessingHistory.cc:395
ProcessingHistory & operator=(const ProcessingHistory &parent)
Definition ProcessingHistory.cc:687
void add_one_input(const ProcessingHistory &data_to_add)
Add one datum as an input for current data.
Definition ProcessingHistory.cc:263
std::string id() const
Definition ProcessingHistory.h:579
std::string new_map(const std::string alg, const std::string algid, const AtomicType typ, const ProcessingHistory &data_to_clone, const ProcessingStatus newstatus=ProcessingStatus::VOLATILE)
Define this algorithm as a one-to-one map.
void set_id(const std::string newid)
Definition ProcessingHistory.cc:658
std::list< mspass::utility::NodeData > inputs(const std::string id_to_find) const
Return a list of data that define the inputs to a give uuids.
Definition ProcessingHistory.cc:670
std::string clean_accumulate_uuids()
Clean up inconsistent uuids that can be produced by reduce.
Definition ProcessingHistory.cc:546
void accumulate(const std::string alg, const std::string algid, const AtomicType typ, const ProcessingHistory &newinput)
Method to use with a spark reduce algorithm.
Definition ProcessingHistory.cc:474
int stage() const
Definition ProcessingHistory.h:571