[docs]defstr_to_parameters_dict(parameter_str):""" Parse the parameter string defined by user into an ordered dict. The input str should be in the format like "a, b, c=d, e=f, ..." :param parameter_str: a parameter string defined by user :return: An OrderedDict of parameters and arguments. """parameters_dict=collections.OrderedDict()pairs=parameter_str.replace(" ","").split(",")unkeyword_index=0forpairinpairs:k_v=pair.split("=")# unkeyworded paraiflen(k_v)==1:key="arg_{arg_index:d}".format(arg_index=unkeyword_index)value=k_v[0]unkeyword_index+=1iflen(k_v)==2:key=k_v[0]value=k_v[1]iflen(k_v)>2:raiseMsPASSError("Wrong parameter string format: "+parameter_str+" Fatal")parameters_dict[key]=valuereturnparameters_dict
[docs]defparams_to_parameters_dict(*args,**kwargs):""" Capture a function's parameters, return a dict that stores parameters and arguments. Filepath arguments will be parsed into python object, and then turned into a dict. Now we support pf files and yaml files. :param args: Non-keyworded arguments :param kwargs: Keyworded arguments :return: An OrderedDict of parameters and arguments. """parameters_dict=collections.OrderedDict()# Iterate over non-keyworded args and store them in dict, each one is assigned a key "arg_1" "arg_2" ...i=0forvalueinargs:key="arg_{index:d}".format(index=i)parameters_dict[key]=str(value)i+=1# Iterate over keyworded args and store them in dictforkey,valueinkwargs.items():parameters_dict[key]=str(value)returnparameters_dict
[docs]defparse_filepath_in_parameters(parameters_dict):""" Parse the filepath parameters in a function's parameters dict, Filepath arguments will be parsed into python object, and then turned into a dict. Currently we support pf files and yaml files. :param parameters_dict: parameter dict of a function :return: An OrderedDict of parameters and arguments. """defcheck_and_parse_file(arg):if(isinstance(arg,os.PathLike)orisinstance(arg,str)orisinstance(arg,bytes))andos.path.isfile(arg):file_path=str(arg)iffile_path.endswith(".pf"):pf=AntelopePf(file_path)# Convert PF into an OrderedDict to coordinate with the GTreepf_value=AntelopePf2dict(pf)returnpf_valueeliffile_path.endswith(".yaml"):withopen(file_path,"r")asyaml_file:yaml_value=yaml.safe_load(yaml_file)returnyaml_value# Currently only support pf and yamlelse:raiseMsPASSError("Cannot handle file: "+file_path+"Fatal")returnargforkey,valueinparameters_dict.items():parameters_dict[key]=check_and_parse_file(value)returnparameters_dict
[docs]defparameter_to_GTree(*args,parameters_str=None,**kwargs):""" A helper function to parse parameters and build a GTree accordingly. This function would be used in GlobalHistoryManager to help record the parameters. :param args: Non-keyworded arguments :param kwargs: Keyworded arguments :param parameter_str: a parameter string defined by user :return: An OrderedDict of parameters and arguments. """ifparameters_str:# preprocess parameters and parse files, store in parsed_args_list and parsed_kwargs_dictparameters_dict=str_to_parameters_dict(parameters_str)else:parameters_dict=params_to_parameters_dict(*args,**kwargs)parameters_dict=parse_filepath_in_parameters(parameters_dict)gTree=ParameterGTree(parameters_dict)returngTree
[docs]classParameterGTree(collections.OrderedDict):""" Base class for family of objects used to hold an abstraction of a set of control parameters for a data processing function. The base class abstracts the concept of storing such data in an g-tree structure. In the documentation here the tree should be pictures as upright in the biological tree analog. i.e. up mean higher levels in the tree and down means dropping to lower levels in the tree. This class is inherited from collections.OrderedDict, so users can get access to the data using common index operation, for example: gTree['phases']['travel_time_calculator']['taup']['model_name'] In addition, user can operate adding, deleting, updating, creating in the same way as the operations on OrderedDict. Each node of the g-tree may have leaves and/or a set of branches. Most simple algorithms need only one node with leaves made up of name-value pairs. More complex algorithms often need the tree structure to describe more complicated data control structures. """def__init__(self,doc=None):""" Construct from a MongoDB document, which with pymongo is equivalenced to a python dict. Branches are defined by subdocuments. """ifdocisnotNoneandnotisinstance(doc,collections.OrderedDict):raiseMsPASSError("[Error] Doc must be an OrderedDict.")self.control=docifself.controlisnotNone:forkey,valinself.control.items():ifisinstance(val,collections.OrderedDict):branch=ParameterGTree(val)self[key]=branchelse:self[key]=valdef__setitem__(self,key,value):ifisinstance(value,collections.OrderedDict):# First check if the input value is already a GTree.# if so, we don't need to construct it.ifisinstance(value,ParameterGTree):branch=valueelse:branch=ParameterGTree(value)ifkeyinself.get_leaf_keys():raiseMsPASSError("[Warning] There already exists a leaf in this GTree with key '{leaf_key}', Please check again.".format(leaf_key=key))collections.OrderedDict.__setitem__(self,key,branch)else:ifkeyinself.get_branch_keys():raiseMsPASSError("[Warning] There already exists a branch in this GTree with key '{branch_key}', Please check again.".format(branch_key=key))collections.OrderedDict.__setitem__(self,key,value)def__getitem__(self,key):ifkeynotinself:raiseMsPASSError("[Warning] The key provided ({branch_key}) is not in this GTree, Please check again.".format(branch_key=key))returncollections.OrderedDict.__getitem__(self,key)
[docs]defupdate_control(self):""" Update the control doc according to the children in this level. As the hierarchy data may change in deeper level, so we need to check every sub tree. It is implemented by recursively updating the control doc of a tree. """new_control=self.copy()branch_keys=self.get_branch_keys()forkeyinbranch_keys:branch=self[key]new_control[key]=branch.asdict()self.control=new_control
[docs]defasdict(self):""" Return the dictionary representation of the ParameterGTree instance. This function will first update the internal dictionary control doc. Its return can be a build-in dict or a collections.OrderedDict, according to the input when building this GTree. Since ParameterGTree is inheritted from OrderedDict, an instance can be directed transfered into a dict/OrderedDict without calling this function. """self.update_control()returnself.control
[docs]defget_leaf_keys(self):""" Return the keys for all key-value pairs that are leaves at the current level of the parameter tree. For branches this method can be used to extract leaves at the current level. Return is a dict of key-value pairs that we are calling the leaves of the tree. """leaf_keys=list()forkey,valueinself.items():ifnotisinstance(value,ParameterGTree):leaf_keys.append(key)returnleaf_keys
[docs]defget_branch_keys(self):""" Return the keys for all branches from this level. Branches are keyed with a keyword string like leaves. Branches can be extracted with prune or we can walk the tree with a set of methods defined below. """branch_keys=list()forkey,valueinself.items():ifisinstance(value,ParameterGTree):branch_keys.append(key)returnbranch_keys
[docs]defget_branch(self,key):""" Extract the contents of a named branch. Returns a copy of the tree with the associated key from the branch name upward. The tree returned will have the root of the tree set as current. """ifkeynotinself.get_branch_keys():raiseMsPASSError("[Error] Wrong Key, Please check your input key again.")branch=self[key]returnbranch
[docs]defget_leaf(self,key):""" Returns a copy of the key-value pair defined by key. This function only search for the key in this layer, and won't return value stored in higher levels. To search in the entire tree, use "get". """ifkeynotinself.get_leaf_keys():raiseMsPASSError("[Error] Wrong Key, Please check your input key again.")leaf=self[key]returnleaf
[docs]defprune(self,key):""" Remove a branch or leaf defined by key from self. Return a copy of the branch/leaf pruned in the process (like get_branch/get_leaf but self is altered) """ifkeynotinself:raiseMsPASSError("[Error] Wrong Key, Please check your input key again.")ifkeyinself.get_leaf_keys():ret_val=self.get_leaf(key)ifkeyinself.get_branch_keys():ret_val=self.get_branch(key)collections.OrderedDict.popitem(self,key)returnret_val
[docs]defget(self,key,seperator="."):""" Fetch a value defined by key. For leaves at the root node the key can be a simple string. For a leaf attached at a higher level node we specify a chain of one or more branch names with keys between the specified seperator. Examples (all used default value of seperator): 1. If we had a leaf node with the key 'name' under the branch name 'phases' we use the compound key 'phases.name'. Such a tag could, for example, contain 'P' for the seismic to define this set of parameters as related to the P phase. 2. Suppose the phases branch was linked to higher level branch with the key 'travel_time_calculator' that had a leaf parameter 'taup' that is itself a branch name with terminal leaf keys under it. A real life example might be 'model_name'. We would refer to that leaf with the string 'phases.travel_time_calculator.taup.model_name'. That key might, for example, have he value 'iasp91' which could be passed to obspy's taup calculator. This method should also support key defined as a python list. The list would need to be a set of keys that would define the path to climb the tree to fetch the desired leaf. For this form the two examples above would be represented as follows: 1. ['phases','name'] 2. ['phases','travel_time_calculator','taup','model_name'] Users can also use the build-in index operation to access the children elements, which is more natural. for example: 1. ['phases']['name'] 2. ['phases']['travel_time_calculator']['taup']['model_name'] """key_list=keyifisinstance(key,str):key_list=key.split(seperator)iflen(key_list)==0:raiseMsPASSError("The key is empty, please check again.")root=selfforiinrange(len(key_list)-1):root=root.get_branch(key_list[i])returnroot.get_leaf(key_list[-1])
[docs]defput(self,key,value,separator="."):""" putter with same behavior for compound keys defined for get method. A put would create a new branch it implies if that branch is not already present. Same as the setter function, users can also use index to put new data in GTree here. Please note that when put data using indexes, new branches won't be created automatically, and users should add the intermediate branches themselves. """key_list=keyifisinstance(key,str):key_list=key.split(separator)iflen(key_list)==0:raiseMsPASSError("The key is empty, please check again.")root=selfforiinrange(len(key_list)-1):branch_level=key_list[i]ifbranch_levelinroot.get_leaf_keys():raiseMsPASSError("[Error] Invalid compound Key, there is a leaf with the same name in level "+branch_level+". Please check your input key again.")ifbranch_levelnotinroot.get_branch_keys():root[branch_level]=ParameterGTree()root=root.get_branch(branch_level)leaf_key=key_list[-1]ifleaf_keyinroot.get_branch_keys():raiseMsPASSError("[Error] Invalid compound Key, there is a branch with the same name in "+leaf_key+". Please check your input key again.")root[leaf_key]=value
''' This function here is not necessary, user can just use index to add new data, for example: root[branch_level] = ParameterGTree() def sprout(self, key, seperator="."): """ Add an empty branch with tag key. Compound keys are as described in get method above. Similar to the putter method, but create a branch in the end. """ key_list = key if isinstance(key, str): key_list = key.split(seperator) if len(key_list) == 0: raise MsPASSError("The key is empty, please check again.") root = self for i in range(len(key_list) - 1): branch_level = key_list[i] if branch_level in root.get_leaf_keys(): raise MsPASSError( "[Error] Invalid compound Key, there is a leaf with the same name in level " + branch_level + ". Please check your input key again." ) if branch_level not in root.get_branch_keys(): root[branch_level] = ParameterGTree() root = root.get_branch(branch_level) branch_key = key_list[-1] if branch_key in root.get_leaf_keys(): raise MsPASSError( "[Error] Invalid compound Key, there is a leaf with the same name in " + branch_key + ". Please check your input key again." ) root[branch_key] = ParameterGTree() '''