from mspasspy.ccore.seismic import (
TimeSeries,
Seismogram,
TimeSeriesEnsemble,
SeismogramEnsemble,
)
from mspasspy.ccore.utility import Metadata, ErrorSeverity
from mspasspy.ccore.algorithms.basic import TimeWindow
from bson import json_util
import numpy as np
[docs]def number_live(ensemble) -> int:
"""
Scans an ensemble and returns the number of live members. If the
ensemble is marked dead it immediately return 0. Otherwise it loops
through the members countinng the number live.
:param ensemble: ensemble to be scanned
:type ensemble: Must be a `TimeSeriesEnsemble` or `SeismogramEnsemble` or
it will throw a TypeError exception.
"""
if not isinstance(ensemble, (TimeSeriesEnsemble, SeismogramEnsemble)):
message = "number_live: illegal type for arg0={}\n".format(type(ensemble))
message += "Must be a TimeSeriesEnsemble or SeismogramEnsemble\n"
raise TypeError(message)
if ensemble.dead():
return 0
nlive = 0
for d in ensemble.member:
if d.live:
nlive += 1
return nlive
[docs]def regularize_sampling(ensemble, dt_expected, Nsamp=10000, abort_on_error=False):
"""
This is a utility function that can be used to validate that all the members
of an ensemble have a sample interval that is indistinguishable from a specified
constant (dt_expected) The test for constant is soft to allow handling
data created by some older digitizers that skewed the recorded sample interval to force
time computed from N*dt to match time stamps on successive data packets.
The formula used is the datum dt is declared constant if the difference from
the expected dt is less than or equal to dt_expected/2*(Nsamp-1). That means
the computed endtime difference from that using dt_expected is less than
or equal to dt/2.
The function by default will kill members that have mismatched sample
intervals and log an informational message to the datum's elog container.
In this mode the entire ensemble can end up marked dead if all the members
are killed (That situation can easily happen if the entire data set has the wrong dt.);
If the argument `abort_on_errors` is set True a ValueError exception will be
thrown if ANY member of the input ensemble.
An important alternative to this function is to pass a data set
through the MsPASS `resample` function found in mspasspy.algorithms.resample.
That function will guarantee all live data have the same sample interval
and not kill them like this function will. Use this function for
algorithms that can't be certain the input data will have been resampled
and need to be robust for what might otherwise be considered a user error.
:param ensemble: ensemble container of data to be scanned for irregular
sampling.
:type ensemble: `TimeSeriesEnsemble` or `SeismogramEnsemble`. The
function does not test for type and will abort with an undefined method
error if sent anything else.
:param dt_expected: constant data sample interval expected.
:type dt_expected: float
:param Nsamp: Nominal number of samples expected in the ensemble members.
This number is used to compute the soft test to allow for slippery clocks
discussed above. (Default 10000) e = regularize_sampling(e,dt)
assert e.live
:type Nsamp: integer
:param abort_on_error: Controls what the function does if it
encountered a datum with a sample interval different that dt_expected.
When True the function aborts with a ValueError exception if ANY
ensemble member does not have a matching sample interval. When
False (the default) the function uses the MsPASS error logging
to hold a message an kills any member datum with a problem.
:type abort_on_error: boolean
"""
alg = "regularize_sampling"
if ensemble.dead():
return ensemble
# this formula will flag any ensemble member for which the sample
# rate yields a computed end time that differs from the beam
# by more than one half sample
delta_dt_cutoff = dt_expected / (2.0 * (Nsamp - 1))
for i in range(len(ensemble.member)):
d = ensemble.member[i]
if d.live:
if not np.isclose(dt_expected, d.dt):
if abs(dt_expected - d.dt) > delta_dt_cutoff:
message = str()
message = "Member {} of input ensemble has different sample rate={} than expected dt={}".format(
i, d.dt, dt_expected
)
if abort_on_error:
raise ValueError(message)
else:
ensemble.member[i].elog.log_error(
alg, message, ErrorSeverity.Invalid
)
ensemble.member[i].kill()
if not abort_on_error:
nlive = number_live(ensemble)
if nlive <= 0:
message = "All members of this ensemble were killed.\n"
message += "expected dt may be wrong or you need to run the resample function on this data set"
ensemble.elog.log_error(alg, message, ErrorSeverity.Invalid)
ensemble.kill()
return ensemble
[docs]def ensemble_time_range(ensemble, metric="inner") -> TimeWindow:
"""
Scans a Seismic data ensemble returning a measure of the
time span of members. The metric returned ban be either
smallest time range containing all the data, the range
defined by the minimum start time and maximum endtime,
or an average defined by either the median or the
arithmetic mean of the vector of startime and endtime
values.
:param ensemble: ensemble container to be scanned for
time range.
:type ensemble: `TimeSeriesEnsemble` or `SeismogramEnsemble`.
:param metric: measure to use to define the time range.
Accepted values are:
"inner" - (default) return range defined by largest
start time to smallest end time.
"outer" - return range defined by minimum start time and
largest end time (maximum time span of data)
"median" - return range as the median of the extracted
vectors of start time and end time values.
"mean" - return range as arithmetic average of
start and end time vectors
:return: `TimeWindow` object with start and end times. If the
ensemble has all dead member the default constructed TimeWindow
object will be returned which has zero length.
"""
if not isinstance(ensemble, (TimeSeriesEnsemble, SeismogramEnsemble)):
message = "ensemble_time_range: illegal type for arg0={}\n".format(
type(ensemble)
)
message += "Must be a TimeSeriesEnsemble or SeismogramEnsemble\n"
raise TypeError(message)
if metric not in ["inner", "outer", "median", "mean"]:
message = "ensemble_time_range: illegal value for metric={}\n".format(metric)
message += "Must be one of: inner, outer, median, or mean"
raise ValueError(message)
stvector = []
etvector = []
for d in ensemble.member:
if d.live:
stvector.append(d.t0)
etvector.append(d.endtime())
if len(stvector) == 0:
return TimeWindow()
if metric == "inner":
stime = max(stvector)
etime = min(etvector)
elif metric == "outer":
stime = min(stvector)
etime = max(etvector)
elif metric == "median":
stime = np.median(stvector)
etime = np.median(etvector)
elif metric == "mean":
# note numpy's mean an average are different with average being
# more generic - intentionally use mean which is also consistent with
# the keyword used
stime = np.mean(stvector)
etime = np.mean(etvector)
# Intentionally not using else to allow an easier extension
return TimeWindow(stime, etime)