Source code for mspasspy.util.seispp

import os

import yaml
import numpy as np

from mspasspy.ccore.utility import MetadataDefinitions
from mspasspy.ccore.utility import MDtype
from mspasspy.ccore.utility import MsPASSError


[docs]def index_data(filebase, db, ext="d3C", verbose=False):
    """
    Import function for data from antelope export_to_mspass.

    This function is an import function for Seismogram objects created
    by the antelope program export_to_mspass.  That program writes
    header data as a yaml file and the sample data as a raw binary
    fwrite of the data matrix (stored in fortran order but written
    as a contiguous block of 3*npts (number of samples) double values.
    This function parses the yaml file and adds three critical metadata
    entries:  dfile, dir, and foff. To get foff values the function
    reads the binary data file and gets foff values by calls to tell.
    It then writes these entries into MongoDB in the wf collection
    of a database.   Readers that want to read this raw data will
    need to use dir, dfile, and foff to find the right file and read point.

    :param filebase: is the base name of the dataset to be read and indexed.
        The function will look for filebase.yaml for the header data and
        filebase.ext (Arg 3 defaulting to d3C).
    :param db: is the MongoDB database handler
    :param ext: is the file extension for the sample data (default is 'd3C').
    """
    # This loads default mspass schema
    mdef = MetadataDefinitions()
    yamlfile = filebase + ".yaml"
    fh = open(yamlfile)
    d = yaml.load(fh, Loader=yaml.FullLoader)
    if verbose:
        print("Read data for ", len(d), " objects")
    fh.close()
    # Set up to add to wf collection
    # This is not general, but works of this test with mongo running under docker
    collection = db.wf
    dfile = filebase + "." + ext
    fh = open(dfile)
    # This is needed by the numpy reader
    dtyp = np.dtype("f8")
    dir = os.path.dirname(os.path.realpath(dfile))
    dfile = os.path.basename(os.path.realpath(dfile))
    if verbose:
        print("Setting dir =", dir, " and dfile=", dfile, " for all input")
        print("Make sure this file exists before trying to read these data")
        print("This program only builds the wf collection in the database")
        print(
            "Readers of the raw data will access the sample data from the dir+dfile path"
        )
    for i in range(len(d)):
        pyd = {}  # this is essentially a required python declaration
        # Since the source is assumed an antelope css3.0 database we
        # assume these will be defined.   Relating them back to the original
        # source would be impossible without these in css3.0 so shouldn't be
        # an issue
        if verbose:
            print("Working on sta=", d[i]["sta"], " and evid=", d[i]["evid"])
        keys = d[i].keys()
        for k in keys:
            try:
                typ = mdef.type(k)
                if typ == MDtype.Double or typ == MDtype.Real64 or typ == MDtype.Real32:
                    pyd[k] = float(d[i][k])
                elif typ == MDtype.Int64 or typ == MDtype.Int32:
                    pyd[k] = int(d[i][k])
                elif typ == MDtype.String:
                    pyd[k] = str(d[i][k])
                elif type == MDtype.Boolean:
                    pyd[k] = bool(d[i][k])
                else:
                    # These are not optional - always print these if
                    # this happens to warn user
                    print("Warning(index_data):  undefined type for key=", k)
                    print("attribute will not be copied to database")
            except MsPASSError:
                # as above always print this as a warning
                print("Warning(index_data): key =", k, " is undefined - skipped")

        pyd["dir"] = dir
        pyd["dfile"] = dfile
        ns = pyd["npts"]
        ns3c = 3 * ns
        foff = fh.tell()
        pyd["foff"] = foff
        wfid = collection.insert_one(pyd).inserted_id
        junk = np.fromfile(fh, dtype=dtyp, count=ns3c)
    if verbose:
        print("Finished with file=", dfile)
        print(
            "Size of wf collection is now ",
            collection.count_documents({}),
            " documents",
        )