Source code for EQTransformer.core.mseed_predictor

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Jun 21 21:55:54 2020

@author: mostafamousavi

last update: 05/27/2021

"""

from __future__ import print_function
from __future__ import division
import os
os.environ['KERAS_BACKEND']='tensorflow'
from tensorflow.keras import backend as K
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math
import csv
from tensorflow import keras
import time
import h5py
from os import listdir
import platform
import shutil
from tqdm import tqdm
from datetime import datetime, timedelta
import contextlib
import sys
import warnings
from scipy import signal
from matplotlib.lines import Line2D
from obspy import read
from os.path import join
import json
import pickle
import faulthandler; faulthandler.enable()
import obspy
import logging
from obspy.signal.trigger import trigger_onset
from .EqT_utils import f1, SeqSelfAttention, FeedForward, LayerNormalization
warnings.filterwarnings("ignore")
from tensorflow.python.util import deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False

try:
    f = open('setup.py')
    for li, l in enumerate(f):
        if li == 8:
            EQT_VERSION = l.split('"')[1]
except Exception:
    EQT_VERSION = "0.1.61"
    

[docs]def mseed_predictor(input_dir='downloads_mseeds', input_model="sampleData&Model/EqT1D8pre_048.h5", stations_json= "station_list.json", output_dir="detections", detection_threshold=0.3, P_threshold=0.1, S_threshold=0.1, number_of_plots=10, plot_mode='time', loss_weights=[0.03, 0.40, 0.58], loss_types=['binary_crossentropy', 'binary_crossentropy', 'binary_crossentropy'], normalization_mode='std', batch_size=500, overlap = 0.3, gpuid=None, gpu_limit=None, overwrite=False, output_probabilities=False): """ To perform fast detection directly on mseed data. Parameters ---------- input_dir: str Directory name containing hdf5 and csv files-preprocessed data. input_model: str Path to a trained model. stations_json: str Path to a JSON file containing station information. output_dir: str Output directory that will be generated. detection_threshold: float, default=0.3 A value in which the detection probabilities above it will be considered as an event. P_threshold: float, default=0.1 A value which the P probabilities above it will be considered as P arrival. S_threshold: float, default=0.1 A value which the S probabilities above it will be considered as S arrival. number_of_plots: float, default=10 The number of plots for detected events outputed for each station data. plot_mode: str, default=time The type of plots: time only time series or time_frequency time and spectrograms. loss_weights: list, default=[0.03, 0.40, 0.58] Loss weights for detection P picking and S picking respectively. loss_types: list, default=['binary_crossentropy', 'binary_crossentropy', 'binary_crossentropy'] Loss types for detection P picking and S picking respectively. normalization_mode: str, default=std Mode of normalization for data preprocessing max maximum amplitude among three components std standard deviation. batch_size: int, default=500 Batch size. This wont affect the speed much but can affect the performance. A value beteen 200 to 1000 is recommanded. overlap: float, default=0.3 If set the detection and picking are performed in overlapping windows. gpuid: int Id of GPU used for the prediction. If using CPU set to None. gpu_limit: int Set the maximum percentage of memory usage for the GPU. overwrite: Boolean, default=False Overwrite your results automatically. output_probabilities: Boolean, default=False Write probability in output_dir/prob.h5 for future plotting Structure: prediction_probabilities.hdf5{begintime: {Earthquake: probability, P_arrival: probability, S_arrival: probability}} Notice: It you turn this parameter on, it will generate larges file (A test shows ~150 Mb file generated for a three-components station for 3 months) Returns -------- output_dir/STATION_OUTPUT/X_prediction_results.csv: A table containing all the detection, and picking results. Duplicated events are already removed. output_dir/STATION_OUTPUT/X_report.txt: A summary of the parameters used for prediction and performance. output_dir/STATION_OUTPUT/figures: A folder containing plots detected events and picked arrival times. time_tracks.pkl: A file containing the time track of the continous data and its type. Note -------- This does not allow uncertainty estimation or writing the probabilities out. """ args = { "input_dir": input_dir, "input_model": input_model, "stations_json": stations_json, "output_dir": output_dir, "detection_threshold": detection_threshold, "P_threshold": P_threshold, "S_threshold": S_threshold, "number_of_plots": number_of_plots, "plot_mode": plot_mode, "loss_weights": loss_weights, "loss_types": loss_types, "normalization_mode": normalization_mode, "overlap": overlap, "batch_size": batch_size, "gpuid": gpuid, "gpu_limit": gpu_limit, "output_probabilities": output_probabilities } if args['gpuid']: os.environ['CUDA_VISIBLE_DEVICES'] = '{}'.format(args['gpuid']) tf.Session(config=tf.ConfigProto(log_device_placement=True)) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = float(args['gpu_limit']) K.tensorflow_backend.set_session(tf.Session(config=config)) logging.basicConfig(level=logging.DEBUG, format='%(asctime)s [%(levelname)s] [%(name)s] %(message)s', datefmt='%m-%d %H:%M') class DummyFile(object): file = None def __init__(self, file): self.file = file def write(self, x): # Avoid print() second call (useless \n) if len(x.rstrip()) > 0: tqdm.write(x, file=self.file) @contextlib.contextmanager def nostdout(): save_stdout = sys.stdout sys.stdout = DummyFile(sys.stdout) yield sys.stdout = save_stdout eqt_logger = logging.getLogger("EQTransformer") eqt_logger.info(f"Running EqTransformer {EQT_VERSION}") eqt_logger.info(f"*** Loading the model ...") model = load_model(args['input_model'], custom_objects={'SeqSelfAttention': SeqSelfAttention, 'FeedForward': FeedForward, 'LayerNormalization': LayerNormalization, 'f1': f1 }) model.compile(loss = args['loss_types'], loss_weights = args['loss_weights'], optimizer = Adam(lr = 0.001), metrics = [f1]) eqt_logger.info(f"*** Loading is complete!") out_dir = os.path.join(os.getcwd(), str(args['output_dir'])) if os.path.isdir(out_dir): eqt_logger.info(f"*** {out_dir} already exists!") if overwrite == True: inp = "y" eqt_logger.info(f"Overwriting your previous results") else: inp = input(" --> Type (Yes or y) to create a new empty directory! This will erase your previous results so make a copy if you want them.") if inp.lower() == "yes" or inp.lower() == "y": shutil.rmtree(out_dir) os.makedirs(out_dir) else: print("Okay.") return if platform.system() == 'Windows': station_list = [ev.split(".")[0] for ev in listdir(args['input_dir']) if ev.split("\\")[-1] != ".DS_Store"]; else: station_list = [ev.split(".")[0] for ev in listdir(args['input_dir']) if ev.split("/")[-1] != ".DS_Store"]; station_list = sorted(set(station_list)) data_track = dict() eqt_logger.info(f"There are files for {len(station_list)} stations in {args['input_dir']} directory.") for ct, st in enumerate(station_list): save_dir = os.path.join(out_dir, str(st)+'_outputs') out_probs = os.path.join(save_dir, 'prediction_probabilities.hdf5') save_figs = os.path.join(save_dir, 'figures') if os.path.isdir(save_dir): shutil.rmtree(save_dir) os.makedirs(save_dir) try: os.remove(out_probs) except Exception: pass if args['number_of_plots']: os.makedirs(save_figs) if args['output_probabilities']: HDF_PROB = h5py.File(out_probs, 'a') plt_n = 0 csvPr_gen = open(os.path.join(save_dir,'X_prediction_results.csv'), 'w') predict_writer = csv.writer(csvPr_gen, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) predict_writer.writerow(['file_name', 'network', 'station', 'instrument_type', 'station_lat', 'station_lon', 'station_elv', 'event_start_time', 'event_end_time', 'detection_probability', 'detection_uncertainty', 'p_arrival_time', 'p_probability', 'p_uncertainty', 'p_snr', 's_arrival_time', 's_probability', 's_uncertainty', 's_snr' ]) csvPr_gen.flush() eqt_logger.info(f"Started working on {st}, {ct+1} out of {len(station_list)} ...") start_Predicting = time.time() if platform.system() == 'Windows': file_list = [join(st, ev) for ev in listdir(args["input_dir"]+"\\"+st) if ev.split("\\")[-1].split(".")[-1].lower() == "mseed"]; else: file_list = [join(st, ev) for ev in listdir(args["input_dir"]+"/"+st) if ev.split("/")[-1].split(".")[-1].lower() == "mseed"]; mon = [ev.split('__')[1]+'__'+ev.split('__')[2] for ev in file_list ]; uni_list = list(set(mon)) uni_list.sort() time_slots, comp_types = [], [] for _, month in enumerate(uni_list): eqt_logger.info(f"{month}") matching = [s for s in file_list if month in s] meta, time_slots, comp_types, data_set = _mseed2nparry(args, matching, time_slots, comp_types, st) params_pred = {'batch_size': args['batch_size'], 'norm_mode': args['normalization_mode']} pred_generator = PreLoadGeneratorTest(meta["trace_start_time"], data_set, **params_pred) predD, predP, predS = model.predict_generator(pred_generator) detection_memory = [] for ix in range(len(predD)): matches, pick_errors, yh3 = _picker(args, predD[ix][:, 0], predP[ix][:, 0], predS[ix][:, 0]) if (len(matches) >= 1) and ((matches[list(matches)[0]][3] or matches[list(matches)[0]][6])): snr = [_get_snr(data_set[meta["trace_start_time"][ix]], matches[list(matches)[0]][3], window = 100), _get_snr(data_set[meta["trace_start_time"][ix]], matches[list(matches)[0]][6], window = 100)] pre_write = len(detection_memory) detection_memory=_output_writter_prediction(meta, predict_writer, csvPr_gen, matches, snr, detection_memory, ix) post_write = len(detection_memory) if args['output_probabilities']: HDF_PROB.create_dataset(f'{meta["trace_start_time"][ix]}/Earthquake', data=predD[ix][:, 0], dtype= np.float32) HDF_PROB.create_dataset(f'{meta["trace_start_time"][ix]}/P_arrival', data=predP[ix][:, 0], dtype= np.float32) HDF_PROB.create_dataset(f'{meta["trace_start_time"][ix]}/S_arrival', data=predS[ix][:, 0], dtype= np.float32) HDF_PROB.flush() if plt_n < args['number_of_plots'] and post_write > pre_write: _plotter_prediction(data_set[meta["trace_start_time"][ix]], args, save_figs, predD[ix][:, 0], predP[ix][:, 0], predS[ix][:, 0], meta["trace_start_time"][ix], matches) plt_n += 1 end_Predicting = time.time() data_track[st]=[time_slots, comp_types] delta = (end_Predicting - start_Predicting) hour = int(delta / 3600) delta -= hour * 3600 minute = int(delta / 60) delta -= minute * 60 seconds = delta if args['output_probabilities']: HDF_PROB.close() dd = pd.read_csv(os.path.join(save_dir,'X_prediction_results.csv')) print(f'\n', flush=True) eqt_logger.info(f"Finished the prediction in: {hour} hours and {minute} minutes and {round(seconds, 2)} seconds.") eqt_logger.info(f'*** Detected: '+str(len(dd))+' events.') eqt_logger.info(f' *** Wrote the results into --> " ' + str(save_dir)+' "') # print(' *** Finished the prediction in: {} hours and {} minutes and {} seconds.'.format(hour, minute, round(seconds, 2)), flush=True) # print(' *** Detected: '+str(len(dd))+' events.', flush=True) # print(' *** Wrote the results into --> " ' + str(save_dir)+' "', flush=True) with open(os.path.join(save_dir,'X_report.txt'), 'a') as the_file: the_file.write('================== PREDICTION FROM MSEED ===================='+'\n') the_file.write('================== Overal Info =============================='+'\n') the_file.write('date of report: '+str(datetime.now())+'\n') the_file.write('input_model: '+str(args['input_model'])+'\n') the_file.write('input_dir: '+str(args['input_dir'])+'\n') the_file.write('output_dir: '+str(save_dir)+'\n') the_file.write('================== Prediction Parameters ====================='+'\n') the_file.write('finished the prediction in: {} hours and {} minutes and {} seconds \n'.format(hour, minute, round(seconds, 2))) the_file.write('detected: '+str(len(dd))+' events.'+'\n') the_file.write('loss_types: '+str(args['loss_types'])+'\n') the_file.write('loss_weights: '+str(args['loss_weights'])+'\n') the_file.write('================== Other Parameters =========================='+'\n') the_file.write('normalization_mode: '+str(args['normalization_mode'])+'\n') the_file.write('overlap: '+str(args['overlap'])+'\n') the_file.write('batch_size: '+str(args['batch_size'])+'\n') the_file.write('detection_threshold: '+str(args['detection_threshold'])+'\n') the_file.write('P_threshold: '+str(args['P_threshold'])+'\n') the_file.write('S_threshold: '+str(args['S_threshold'])+'\n') the_file.write('number_of_plots: '+str(args['number_of_plots'])+'\n') the_file.write('gpuid: '+str(args['gpuid'])+'\n') the_file.write('gpu_limit: '+str(args['gpu_limit'])+'\n') with open('time_tracks.pkl', 'wb') as f: pickle.dump(data_track, f, pickle.HIGHEST_PROTOCOL)
def _mseed2nparry(args, matching, time_slots, comp_types, st_name): ' read miniseed files and from a list of string names and returns 3 dictionaries of numpy arrays, meta data, and time slice info' json_file = open(args['stations_json']) stations_ = json.load(json_file) st = obspy.core.Stream() tsw = False for m in matching: temp_st = read(os.path.join(str(args['input_dir']), m),debug_headers=True) if tsw == False and temp_st: tsw = True for tr in temp_st: time_slots.append((tr.stats.starttime, tr.stats.endtime)) try: temp_st.merge(fill_value=0) except Exception: temp_st =_resampling(temp_st) temp_st.merge(fill_value=0) temp_st.detrend('demean') st += temp_st st.filter(type='bandpass', freqmin = 1.0, freqmax = 45, corners=2, zerophase=True) st.taper(max_percentage=0.001, type='cosine', max_length=2) if len([tr for tr in st if tr.stats.sampling_rate != 100.0]) != 0: try: st.interpolate(100, method="linear") except Exception: st=_resampling(st) st.trim(min([tr.stats.starttime for tr in st]), max([tr.stats.endtime for tr in st]), pad=True, fill_value=0) start_time = st[0].stats.starttime end_time = st[0].stats.endtime meta = {"start_time":start_time, "end_time": end_time, "trace_name":m } chanL = [tr.stats.channel[-1] for tr in st] comp_types.append(len(chanL)) tim_shift = int(60-(args['overlap']*60)) next_slice = start_time+60 data_set={} sl = 0; st_times = [] while next_slice <= end_time: npz_data = np.zeros([6000, 3]) st_times.append(str(start_time).replace('T', ' ').replace('Z', '')) w = st.slice(start_time, next_slice) if 'Z' in chanL: npz_data[:,2] = w[chanL.index('Z')].data[:6000] if ('E' in chanL) or ('1' in chanL): try: npz_data[:,0] = w[chanL.index('E')].data[:6000] except Exception: npz_data[:,0] = w[chanL.index('1')].data[:6000] if ('N' in chanL) or ('2' in chanL): try: npz_data[:,1] = w[chanL.index('N')].data[:6000] except Exception: npz_data[:,1] = w[chanL.index('2')].data[:6000] data_set.update( {str(start_time).replace('T', ' ').replace('Z', '') : npz_data}) start_time = start_time+tim_shift next_slice = next_slice+tim_shift sl += 1 meta["trace_start_time"] = st_times try: meta["receiver_code"]=st[0].stats.station meta["instrument_type"]=st[0].stats.channel[:2] meta["network_code"]=stations_[st[0].stats.station]['network'] meta["receiver_latitude"]=stations_[st[0].stats.station]['coords'][0] meta["receiver_longitude"]=stations_[st[0].stats.station]['coords'][1] meta["receiver_elevation_m"]=stations_[st[0].stats.station]['coords'][2] except Exception: meta["receiver_code"]=st_name meta["instrument_type"]=stations_[st_name]['channels'][0][:2] meta["network_code"]=stations_[st_name]['network'] meta["receiver_latitude"]=stations_[st_name]['coords'][0] meta["receiver_longitude"]=stations_[st_name]['coords'][1] meta["receiver_elevation_m"]=stations_[st_name]['coords'][2] return meta, time_slots, comp_types, data_set
[docs]class PreLoadGeneratorTest(keras.utils.Sequence): """ Keras generator with preprocessing. For testing. Pre-load version. Parameters ---------- list_IDsx: str List of trace names. file_name: str Path to the input hdf5 file. dim: tuple Dimension of input traces. batch_size: int, default=32. Batch size. n_channels: int, default=3. Number of channels. norm_mode: str, default=max The mode of normalization, 'max' or 'std' Returns -------- Batches of two dictionaries: {'input': X}: pre-processed waveform as input {'detector': y1, 'picker_P': y2, 'picker_S': y3}: outputs including three separate numpy arrays as labels for detection, P, and S respectively. """ def __init__(self, list_IDs, inp_data, batch_size=32, norm_mode = 'std'): 'Initialization' self.batch_size = batch_size self.list_IDs = list_IDs self.inp_data = inp_data self.on_epoch_end() self.norm_mode = norm_mode def __len__(self): 'Denotes the number of batches per epoch' try: return int(np.floor(len(self.list_IDs) / self.batch_size)) except ZeroDivisionError: print("Your data duration in mseed file is too short! Try either longer files or reducing batch_size. ") def __getitem__(self, index): 'Generate one batch of data' indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size] list_IDs_temp = [self.list_IDs[k] for k in indexes] X = self.__data_generation(list_IDs_temp) return ({'input': X})
[docs] def on_epoch_end(self): 'Updates indexes after each epoch' self.indexes = np.arange(len(self.list_IDs))
def _normalize(self, data, mode = 'max'): data -= np.mean(data, axis=0, keepdims=True) if mode == 'max': max_data = np.max(data, axis=0, keepdims=True) assert(max_data.shape[-1] == data.shape[-1]) max_data[max_data == 0] = 1 data /= max_data elif mode == 'std': std_data = np.std(data, axis=0, keepdims=True) assert(std_data.shape[-1] == data.shape[-1]) std_data[std_data == 0] = 1 data /= std_data return data def __data_generation(self, list_IDs_temp): 'readint the waveforms' X = np.zeros((self.batch_size, 6000, 3)) # Generate data for i, ID in enumerate(list_IDs_temp): data = self.inp_data[ID] data = self._normalize(data, self.norm_mode) X[i, :, :] = data return X
def _output_writter_prediction(meta, predict_writer, csvPr, matches, snr, detection_memory, idx): """ Writes the detection & picking results into a CSV file. Parameters ---------- dataset: hdf5 obj Dataset object of the trace. predict_writer: obj For writing out the detection/picking results in the CSV file. csvPr: obj For writing out the detection/picking results in the CSV file. matches: dic It contains the information for the detected and picked event. snr: list of two floats Estimated signal to noise ratios for picked P and S phases. detection_memory : list Keep the track of detected events. Returns ------- detection_memory : list Keep the track of detected events. """ station_name = meta["receiver_code"] station_lat = meta["receiver_latitude"] station_lon = meta["receiver_longitude"] station_elv = meta["receiver_elevation_m"] start_time = meta["trace_start_time"][idx] station_name = "{:<4}".format(station_name) network_name = meta["network_code"] network_name = "{:<2}".format(network_name) instrument_type = meta["instrument_type"] instrument_type = "{:<2}".format(instrument_type) try: start_time = datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S.%f') except Exception: start_time = datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S') def _date_convertor(r): if isinstance(r, str): mls = r.split('.') if len(mls) == 1: new_t = datetime.strptime(r, '%Y-%m-%d %H:%M:%S') else: new_t = datetime.strptime(r, '%Y-%m-%d %H:%M:%S.%f') else: new_t = r return new_t for match, match_value in matches.items(): ev_strt = start_time+timedelta(seconds= match/100) ev_end = start_time+timedelta(seconds= match_value[0]/100) doublet = [ st for st in detection_memory if abs((st-ev_strt).total_seconds()) < 2] if len(doublet) == 0: det_prob = round(match_value[1], 2) if match_value[3]: p_time = start_time+timedelta(seconds= match_value[3]/100) else: p_time = None p_prob = match_value[4] if p_prob: p_prob = round(p_prob, 2) if match_value[6]: s_time = start_time+timedelta(seconds= match_value[6]/100) else: s_time = None s_prob = match_value[7] if s_prob: s_prob = round(s_prob, 2) predict_writer.writerow([meta["trace_name"], network_name, station_name, instrument_type, station_lat, station_lon, station_elv, _date_convertor(ev_strt), _date_convertor(ev_end), det_prob, None, _date_convertor(p_time), p_prob, None, snr[0], _date_convertor(s_time), s_prob, None, snr[1] ]) csvPr.flush() detection_memory.append(ev_strt) return detection_memory def _get_snr(data, pat, window=200): """ Estimates SNR. Parameters ---------- data : numpy array 3 component data. pat: positive integer Sample point where a specific phase arrives. window: positive integer, default=200 The length of the window for calculating the SNR (in the sample). Returns -------- snr : {float, None} Estimated SNR in db. """ snr = None if pat: try: if int(pat) >= window and (int(pat)+window) < len(data): nw1 = data[int(pat)-window : int(pat)]; sw1 = data[int(pat) : int(pat)+window]; snr = round(10*math.log10((np.percentile(sw1,95)/np.percentile(nw1,95))**2), 1) elif int(pat) < window and (int(pat)+window) < len(data): window = int(pat) nw1 = data[int(pat)-window : int(pat)]; sw1 = data[int(pat) : int(pat)+window]; snr = round(10*math.log10((np.percentile(sw1,95)/np.percentile(nw1,95))**2), 1) elif (int(pat)+window) > len(data): window = len(data)-int(pat) nw1 = data[int(pat)-window : int(pat)]; sw1 = data[int(pat) : int(pat)+window]; snr = round(10*math.log10((np.percentile(sw1,95)/np.percentile(nw1,95))**2), 1) except Exception: pass return snr def _detect_peaks(x, mph=None, mpd=1, threshold=0, edge='rising', kpsh=False, valley=False): """ Detect peaks in data based on their amplitude and other features. Parameters ---------- x : 1D array_like data. mph : {None, number}, default=None detect peaks that are greater than minimum peak height. mpd : int, default=1 detect peaks that are at least separated by minimum peak distance (in number of data). threshold : int, default=0 detect peaks (valleys) that are greater (smaller) than `threshold in relation to their immediate neighbors. edge : str, default=rising for a flat peak, keep only the rising edge ('rising'), only the falling edge ('falling'), both edges ('both'), or don't detect a flat peak (None). kpsh : bool, default=False keep peaks with same height even if they are closer than `mpd`. valley : bool, default=False if True (1), detect valleys (local minima) instead of peaks. Returns ------- ind : 1D array_like indeces of the peaks in `x`. Modified from ---------- .. [1] http://nbviewer.ipython.org/github/demotu/BMC/blob/master/notebooks/DetectPeaks.ipynb """ x = np.atleast_1d(x).astype('float64') if x.size < 3: return np.array([], dtype=int) if valley: x = -x # find indices of all peaks dx = x[1:] - x[:-1] # handle NaN's indnan = np.where(np.isnan(x))[0] if indnan.size: x[indnan] = np.inf dx[np.where(np.isnan(dx))[0]] = np.inf ine, ire, ife = np.array([[], [], []], dtype=int) if not edge: ine = np.where((np.hstack((dx, 0)) < 0) & (np.hstack((0, dx)) > 0))[0] else: if edge.lower() in ['rising', 'both']: ire = np.where((np.hstack((dx, 0)) <= 0) & (np.hstack((0, dx)) > 0))[0] if edge.lower() in ['falling', 'both']: ife = np.where((np.hstack((dx, 0)) < 0) & (np.hstack((0, dx)) >= 0))[0] ind = np.unique(np.hstack((ine, ire, ife))) # handle NaN's if ind.size and indnan.size: # NaN's and values close to NaN's cannot be peaks ind = ind[np.in1d(ind, np.unique(np.hstack((indnan, indnan-1, indnan+1))), invert=True)] # first and last values of x cannot be peaks if ind.size and ind[0] == 0: ind = ind[1:] if ind.size and ind[-1] == x.size-1: ind = ind[:-1] # remove peaks < minimum peak height if ind.size and mph is not None: ind = ind[x[ind] >= mph] # remove peaks - neighbors < threshold if ind.size and threshold > 0: dx = np.min(np.vstack([x[ind]-x[ind-1], x[ind]-x[ind+1]]), axis=0) ind = np.delete(ind, np.where(dx < threshold)[0]) # detect small peaks closer than minimum peak distance if ind.size and mpd > 1: ind = ind[np.argsort(x[ind])][::-1] # sort ind by peak height idel = np.zeros(ind.size, dtype=bool) for i in range(ind.size): if not idel[i]: # keep peaks with the same height if kpsh is True idel = idel | (ind >= ind[i] - mpd) & (ind <= ind[i] + mpd) \ & (x[ind[i]] > x[ind] if kpsh else True) idel[i] = 0 # Keep current peak # remove the small peaks and sort back the indices by their occurrence ind = np.sort(ind[~idel]) return ind def _picker(args, yh1, yh2, yh3): """ Performs detection and picking. Parameters ---------- args : dic A dictionary containing all of the input parameters. yh1 : 1D array Detection probabilities. yh2 : 1D array P arrival probabilities. yh3 : 1D array S arrival probabilities. Returns ------- matches : dic Contains the information for the detected and picked event. matches : dic {detection statr-time:[ detection end-time, detection probability, detectin uncertainty, P arrival, P probabiliy, P uncertainty, S arrival, S probability, S uncertainty]} yh3 : 1D array normalized S_probability """ detection = trigger_onset(yh1, args['detection_threshold'], args['detection_threshold']) pp_arr = _detect_peaks(yh2, mph=args['P_threshold'], mpd=1) ss_arr = _detect_peaks(yh3, mph=args['S_threshold'], mpd=1) P_PICKS = {} S_PICKS = {} EVENTS = {} matches = {} pick_errors = {} if len(pp_arr) > 0: P_uncertainty = None for pick in range(len(pp_arr)): pauto = pp_arr[pick] if pauto: P_prob = np.round(yh2[int(pauto)], 3) P_PICKS.update({pauto : [P_prob, P_uncertainty]}) if len(ss_arr) > 0: S_uncertainty = None for pick in range(len(ss_arr)): sauto = ss_arr[pick] if sauto: S_prob = np.round(yh3[int(sauto)], 3) S_PICKS.update({sauto : [S_prob, S_uncertainty]}) if len(detection) > 0: D_uncertainty = None for ev in range(len(detection)): D_prob = np.mean(yh1[detection[ev][0]:detection[ev][1]]) D_prob = np.round(D_prob, 3) EVENTS.update({ detection[ev][0] : [D_prob, D_uncertainty, detection[ev][1]]}) # matching the detection and picks def pair_PS(l1, l2, dist): l1.sort() l2.sort() b = 0 e = 0 ans = [] for a in l1: while l2[b] and b < len(l2) and a - l2[b] > dist: b += 1 while l2[e] and e < len(l2) and l2[e] - a <= dist: e += 1 ans.extend([[a,x] for x in l2[b:e]]) best_pair = None for pr in ans: ds = pr[1]-pr[0] if abs(ds) < dist: best_pair = pr dist = ds return best_pair for ev in EVENTS: bg = ev ed = EVENTS[ev][2] if int(ed-bg) >= 10: candidate_Ss = {} for Ss, S_val in S_PICKS.items(): if Ss > bg and Ss < ed: candidate_Ss.update({Ss : S_val}) if len(candidate_Ss) > 1: candidate_Ss = {list(candidate_Ss.keys())[0] : candidate_Ss[list(candidate_Ss.keys())[0]]} if len(candidate_Ss) == 0: candidate_Ss = {None:[None, None]} candidate_Ps = {} for Ps, P_val in P_PICKS.items(): if list(candidate_Ss)[0]: if Ps > bg-100 and Ps < list(candidate_Ss)[0]-10: candidate_Ps.update({Ps : P_val}) else: if Ps > bg-100 and Ps < ed: candidate_Ps.update({Ps : P_val}) if len(candidate_Ps) > 1: Pr_st = 0 buffer = {} for PsCan, P_valCan in candidate_Ps.items(): if P_valCan[0] > Pr_st: buffer = {PsCan : P_valCan} Pr_st = P_valCan[0] candidate_Ps = buffer if len(candidate_Ps) == 0: candidate_Ps = {None:[None, None]} if list(candidate_Ss)[0] or list(candidate_Ps)[0]: matches.update({ bg:[ed, EVENTS[ev][0], EVENTS[ev][1], list(candidate_Ps)[0], candidate_Ps[list(candidate_Ps)[0]][0], candidate_Ps[list(candidate_Ps)[0]][1], list(candidate_Ss)[0], candidate_Ss[list(candidate_Ss)[0]][0], candidate_Ss[list(candidate_Ss)[0]][1], ] }) return matches, pick_errors, yh3 def _resampling(st): 'perform resampling on Obspy stream objects' need_resampling = [tr for tr in st if tr.stats.sampling_rate != 100.0] if len(need_resampling) > 0: # print('resampling ...', flush=True) for indx, tr in enumerate(need_resampling): if tr.stats.delta < 0.01: tr.filter('lowpass',freq=45,zerophase=True) tr.resample(100) tr.stats.sampling_rate = 100 tr.stats.delta = 0.01 tr.data.dtype = 'int32' st.remove(tr) st.append(tr) return st def _normalize(data, mode = 'max'): """ Normalize 3D arrays. Parameters ---------- data : 3D numpy array 3 component traces. mode : str, default='std' Mode of normalization. 'max' or 'std' Returns ------- data : 3D numpy array normalized data. """ data -= np.mean(data, axis=0, keepdims=True) if mode == 'max': max_data = np.max(data, axis=0, keepdims=True) assert(max_data.shape[-1] == data.shape[-1]) max_data[max_data == 0] = 1 data /= max_data elif mode == 'std': std_data = np.std(data, axis=0, keepdims=True) assert(std_data.shape[-1] == data.shape[-1]) std_data[std_data == 0] = 1 data /= std_data return data def _plotter_prediction(data, args, save_figs, yh1, yh2, yh3, evi, matches): """ Generates plots of detected events with the prediction probabilities and arrival picks. Parameters ---------- data: NumPy array 3 component raw waveform. evi: str Trace name. args: dic A dictionary containing all of the input parameters. save_figs: str Path to the folder for saving the plots. yh1: 1D array Detection probabilities. yh2: 1D array P arrival probabilities. yh3: 1D array S arrival probabilities. matches: dic Contains the information for the detected and picked event. """ font0 = {'family': 'serif', 'color': 'white', 'stretch': 'condensed', 'weight': 'normal', 'size': 12, } spt, sst, detected_events = [], [], [] for match, match_value in matches.items(): detected_events.append([match, match_value[0]]) if match_value[3]: spt.append(match_value[3]) else: spt.append(None) if match_value[6]: sst.append(match_value[6]) else: sst.append(None) if args['plot_mode'] == 'time_frequency': fig = plt.figure(constrained_layout=False) widths = [6, 1] heights = [1, 1, 1, 1, 1, 1, 1.8] spec5 = fig.add_gridspec(ncols=2, nrows=7, width_ratios=widths, height_ratios=heights, left=0.1, right=0.9, hspace=0.1) ax = fig.add_subplot(spec5[0, 0]) plt.plot(data[:, 0], 'k') plt.xlim(0, 6000) x = np.arange(6000) if platform.system() == 'Windows': plt.title(save_figs.split("\\")[-2].split("_")[0]+":"+str(evi)) else: plt.title(save_figs.split("/")[-2].split("_")[0]+":"+str(evi)) ax.set_xticks([]) plt.rcParams["figure.figsize"] = (10, 10) legend_properties = {'weight':'bold'} pl = None sl = None if len(spt) > 0 and np.count_nonzero(data[:, 0]) > 10: ymin, ymax = ax.get_ylim() for ipt, pt in enumerate(spt): if pt and ipt == 0: pl = plt.vlines(int(pt), ymin, ymax, color='c', linewidth=2, label='Picked P') elif pt and ipt > 0: pl = plt.vlines(int(pt), ymin, ymax, color='c', linewidth=2) if len(sst) > 0 and np.count_nonzero(data[:, 0]) > 10: for ist, st in enumerate(sst): if st and ist == 0: sl = plt.vlines(int(st), ymin, ymax, color='m', linewidth=2, label='Picked S') elif st and ist > 0: sl = plt.vlines(int(st), ymin, ymax, color='m', linewidth=2) ax = fig.add_subplot(spec5[0, 1]) if pl or sl: custom_lines = [Line2D([0], [0], color='k', lw=0), Line2D([0], [0], color='c', lw=2), Line2D([0], [0], color='m', lw=2)] plt.legend(custom_lines, ['E', 'Picked P', 'Picked S'], fancybox=True, shadow=True) plt.axis('off') ax = fig.add_subplot(spec5[1, 0]) f, t, Pxx = signal.stft(data[:, 0], fs=100, nperseg=80) Pxx = np.abs(Pxx) plt.pcolormesh(t, f, Pxx, alpha=None, cmap='hot', shading='flat', antialiased=True) plt.ylim(0, 40) plt.text(1, 1, 'STFT', fontdict=font0) plt.ylabel('Hz', fontsize=12) ax.set_xticks([]) ax = fig.add_subplot(spec5[2, 0]) plt.plot(data[:, 1] , 'k') plt.xlim(0, 6000) ax.set_xticks([]) if len(spt) > 0 and np.count_nonzero(data[:, 1]) > 10: ymin, ymax = ax.get_ylim() for ipt, pt in enumerate(spt): if pt and ipt == 0: pl = plt.vlines(int(pt), ymin, ymax, color='c', linewidth=2, label='Picked P') elif pt and ipt > 0: pl = plt.vlines(int(pt), ymin, ymax, color='c', linewidth=2) if len(sst) > 0 and np.count_nonzero(data[:, 1]) > 10: for ist, st in enumerate(sst): if st and ist == 0: sl = plt.vlines(int(st), ymin, ymax, color='m', linewidth=2, label='Picked S') elif st and ist > 0: sl = plt.vlines(int(st), ymin, ymax, color='m', linewidth=2) ax = fig.add_subplot(spec5[2, 1]) if pl or sl: custom_lines = [Line2D([0], [0], color='k', lw=0), Line2D([0], [0], color='c', lw=2), Line2D([0], [0], color='m', lw=2)] plt.legend(custom_lines, ['N', 'Picked P', 'Picked S'], fancybox=True, shadow=True) plt.axis('off') ax = fig.add_subplot(spec5[3, 0]) f, t, Pxx = signal.stft(data[:, 1], fs=100, nperseg=80) Pxx = np.abs(Pxx) plt.pcolormesh(t, f, Pxx, alpha=None, cmap='hot', shading='flat', antialiased=True) plt.ylim(0, 40) plt.text(1, 1, 'STFT', fontdict=font0) plt.ylabel('Hz', fontsize=12) ax.set_xticks([]) ax = fig.add_subplot(spec5[4, 0]) plt.plot(data[:, 2], 'k') plt.xlim(0, 6000) ax.set_xticks([]) if len(spt) > 0 and np.count_nonzero(data[:, 2]) > 10: ymin, ymax = ax.get_ylim() for ipt, pt in enumerate(spt): if pt and ipt == 0: pl = plt.vlines(int(pt), ymin, ymax, color='c', linewidth=2, label='Picked P') elif pt and ipt > 0: pl = plt.vlines(int(pt), ymin, ymax, color='c', linewidth=2) if len(sst) > 0 and np.count_nonzero(data[:, 2]) > 10: for ist, st in enumerate(sst): if st and ist == 0: sl = plt.vlines(int(st), ymin, ymax, color='m', linewidth=2, label='Picked S') elif st and ist > 0: sl = plt.vlines(int(st), ymin, ymax, color='m', linewidth=2) ax = fig.add_subplot(spec5[4, 1]) if pl or sl: custom_lines = [Line2D([0], [0], color='k', lw=0), Line2D([0], [0], color='c', lw=2), Line2D([0], [0], color='m', lw=2)] plt.legend(custom_lines, ['Z', 'Picked P', 'Picked S'], fancybox=True, shadow=True) plt.axis('off') ax = fig.add_subplot(spec5[5, 0]) f, t, Pxx = signal.stft(data[:, 2], fs=100, nperseg=80) Pxx = np.abs(Pxx) plt.pcolormesh(t, f, Pxx, alpha=None, cmap='hot', shading='flat', antialiased=True) plt.ylim(0, 40) plt.text(1, 1, 'STFT', fontdict=font0) plt.ylabel('Hz', fontsize=12) ax.set_xticks([]) ax = fig.add_subplot(spec5[6, 0]) x = np.linspace(0, data.shape[0], data.shape[0], endpoint=True) plt.plot(x, yh1, '--', color='g', alpha = 0.5, linewidth=2, label='Earthquake') plt.plot(x, yh2, '--', color='b', alpha = 0.5, linewidth=2, label='P_arrival') plt.plot(x, yh3, '--', color='r', alpha = 0.5, linewidth=2, label='S_arrival') plt.tight_layout() plt.ylim((-0.1, 1.1)) plt.xlim(0, 6000) plt.ylabel('Probability', fontsize=12) plt.xlabel('Sample', fontsize=12) plt.yticks(np.arange(0, 1.1, step=0.2)) axes = plt.gca() axes.yaxis.grid(color='lightgray') ax = fig.add_subplot(spec5[6, 1]) custom_lines = [Line2D([0], [0], linestyle='--', color='g', lw=2), Line2D([0], [0], linestyle='--', color='b', lw=2), Line2D([0], [0], linestyle='--', color='r', lw=2)] plt.legend(custom_lines, ['Earthquake', 'P_arrival', 'S_arrival'], fancybox=True, shadow=True) plt.axis('off') font = {'family': 'serif', 'color': 'dimgrey', 'style': 'italic', 'stretch': 'condensed', 'weight': 'normal', 'size': 12, } plt.text(1, 0.2, 'EQTransformer', fontdict=font) if EQT_VERSION: plt.text(2000, 0.05, str(EQT_VERSION), fontdict=font) plt.xlim(0, 6000) fig.tight_layout() fig.savefig(os.path.join(save_figs, str(evi).replace(':', '-')+'.png')) plt.close(fig) plt.clf() else: ########################################## ploting only in time domain fig = plt.figure(constrained_layout=True) widths = [1] heights = [1.6, 1.6, 1.6, 2.5] spec5 = fig.add_gridspec(ncols=1, nrows=4, width_ratios=widths, height_ratios=heights) ax = fig.add_subplot(spec5[0, 0]) plt.plot(data[:, 0], 'k') x = np.arange(6000) plt.xlim(0, 6000) if platform.system() == 'Windows': plt.title(save_figs.split("\\")[-2].split("_")[0]+":"+str(evi)) else: plt.title(save_figs.split("/")[-2].split("_")[0]+":"+str(evi)) plt.ylabel('Amplitude\nCounts') plt.rcParams["figure.figsize"] = (8,6) legend_properties = {'weight':'bold'} pl = sl = None if len(spt) > 0 and np.count_nonzero(data[:, 0]) > 10: ymin, ymax = ax.get_ylim() for ipt, pt in enumerate(spt): if pt and ipt == 0: pl = plt.vlines(int(pt), ymin, ymax, color='c', linewidth=2, label='Picked P') elif pt and ipt > 0: pl = plt.vlines(int(pt), ymin, ymax, color='c', linewidth=2) if len(sst) > 0 and np.count_nonzero(data[:, 0]) > 10: for ist, st in enumerate(sst): if st and ist == 0: sl = plt.vlines(int(st), ymin, ymax, color='m', linewidth=2, label='Picked S') elif st and ist > 0: sl = plt.vlines(int(st), ymin, ymax, color='m', linewidth=2) if pl or sl: box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) custom_lines = [Line2D([0], [0], color='k', lw=0), Line2D([0], [0], color='c', lw=2), Line2D([0], [0], color='m', lw=2)] plt.legend(custom_lines, ['E', 'Picked P', 'Picked S'], loc='center left', bbox_to_anchor=(1, 0.5), fancybox=True, shadow=True) ax = fig.add_subplot(spec5[1, 0]) plt.plot(data[:, 1] , 'k') plt.xlim(0, 6000) plt.ylabel('Amplitude\nCounts') if len(spt) > 0 and np.count_nonzero(data[:, 1]) > 10: ymin, ymax = ax.get_ylim() for ipt, pt in enumerate(spt): if pt and ipt == 0: pl = plt.vlines(int(pt), ymin, ymax, color='c', linewidth=2, label='Picked P') elif pt and ipt > 0: pl = plt.vlines(int(pt), ymin, ymax, color='c', linewidth=2) if len(sst) > 0 and np.count_nonzero(data[:, 1]) > 10: for ist, st in enumerate(sst): if st and ist == 0: sl = plt.vlines(int(st), ymin, ymax, color='m', linewidth=2, label='Picked S') elif st and ist > 0: sl = plt.vlines(int(st), ymin, ymax, color='m', linewidth=2) if pl or sl: box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) custom_lines = [Line2D([0], [0], color='k', lw=0), Line2D([0], [0], color='c', lw=2), Line2D([0], [0], color='m', lw=2)] plt.legend(custom_lines, ['N', 'Picked P', 'Picked S'], loc='center left', bbox_to_anchor=(1, 0.5), fancybox=True, shadow=True) ax = fig.add_subplot(spec5[2, 0]) plt.plot(data[:, 2], 'k') plt.xlim(0, 6000) plt.ylabel('Amplitude\nCounts') ax.set_xticks([]) if len(spt) > 0 and np.count_nonzero(data[:, 2]) > 10: ymin, ymax = ax.get_ylim() for ipt, pt in enumerate(spt): if pt and ipt == 0: pl = plt.vlines(int(pt), ymin, ymax, color='c', linewidth=2, label='Picked P') elif pt and ipt > 0: pl = plt.vlines(int(pt), ymin, ymax, color='c', linewidth=2) if len(sst) > 0 and np.count_nonzero(data[:, 2]) > 10: for ist, st in enumerate(sst): if st and ist == 0: sl = plt.vlines(int(st), ymin, ymax, color='m', linewidth=2, label='Picked S') elif st and ist > 0: sl = plt.vlines(int(st), ymin, ymax, color='m', linewidth=2) if pl or sl: box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) custom_lines = [Line2D([0], [0], color='k', lw=0), Line2D([0], [0], color='c', lw=2), Line2D([0], [0], color='m', lw=2)] plt.legend(custom_lines, ['Z', 'Picked P', 'Picked S'], loc='center left', bbox_to_anchor=(1, 0.5), fancybox=True, shadow=True) ax = fig.add_subplot(spec5[3, 0]) x = np.linspace(0, data.shape[0], data.shape[0], endpoint=True) plt.plot(x, yh1, '--', color='g', alpha = 0.5, linewidth=1.5, label='Earthquake') plt.plot(x, yh2, '--', color='b', alpha = 0.5, linewidth=1.5, label='P_arrival') plt.plot(x, yh3, '--', color='r', alpha = 0.5, linewidth=1.5, label='S_arrival') plt.tight_layout() plt.ylim((-0.1, 1.1)) plt.xlim(0, 6000) plt.ylabel('Probability') plt.xlabel('Sample') plt.legend(loc='lower center', bbox_to_anchor=(0., 1.17, 1., .102), ncol=3, mode="expand", prop=legend_properties, borderaxespad=0., fancybox=True, shadow=True) plt.yticks(np.arange(0, 1.1, step=0.2)) axes = plt.gca() axes.yaxis.grid(color='lightgray') font = {'family': 'serif', 'color': 'dimgrey', 'style': 'italic', 'stretch': 'condensed', 'weight': 'normal', 'size': 12, } plt.text(6500, 0.5, 'EQTransformer', fontdict=font) if EQT_VERSION: plt.text(7000, 0.1, str(EQT_VERSION), fontdict=font) fig.tight_layout() fig.savefig(os.path.join(save_figs, str(evi).replace(':', '-')+'.png')) plt.close(fig) plt.clf()