diff --git a/.gitignore b/.gitignore index 6a5b616..29d7646 100644 --- a/.gitignore +++ b/.gitignore @@ -133,3 +133,9 @@ dmypy.json # Pyre type checker .pyre/ + +# macOS specific files +.DS_Store + +# don't sync test outputs +testdata/*.png diff --git a/physioqc/metrics/cardiac.py b/physioqc/metrics/cardiac.py index 5eb08ee..2f8613f 100644 --- a/physioqc/metrics/cardiac.py +++ b/physioqc/metrics/cardiac.py @@ -1,8 +1,439 @@ """Denoising metrics for cardio recordings.""" +import matplotlib.pyplot as plt import numpy as np +from matplotlib.patches import Polygon +from numpy.polynomial import Polynomial +from peakdet import operations +from scipy.signal import resample, welch +from scipy.stats import kurtosis, pearsonr, skew from .. import references from ..due import due +from .chest_belt import romanosqi +from .utils import butterbpfiltfilt, butterlpfiltfilt, hamming -pass + +def cardiacprefilter(rawcard, Fs, lowerpass=0.1, upperpass=10.0, order=1, debug=False): + if debug: + print( + f"cardiacprefilter: Fs={Fs} order={order}, lowerpass={lowerpass}, upperpass={upperpass}" + ) + return butterbpfiltfilt(Fs, lowerpass, upperpass, rawcard, order, debug=debug) + + +def cardenvelopefilter(squarevals, Fs, upperpass=0.25, order=8, debug=False): + if debug: + print(f"cardenvelopefilter: Fs={Fs} order={order}, upperpass={upperpass}") + return butterlpfiltfilt(Fs, upperpass, squarevals, order, debug=debug) + + +def trendgen(thexvals, thefitcoffs, demean): + """ + + Parameters + ---------- + thexvals: array-like + The x-axis value at which to calculate the trend + thefitcoffs: array-like + The polynomial coefficients used to calculate the trend + demean: bool + Flag to determine if data should be demeaned + + Returns + ------- + + """ + theshape = thefitcoffs.shape + order = theshape[0] - 1 + thepoly = thexvals + thefit = 0.0 * thexvals + if order > 0: + for i in range(1, order + 1): + thefit += thefitcoffs[order - i] * thepoly + thepoly = np.multiply(thepoly, thexvals) + if demean: + thefit = thefit + thefitcoffs[order] + return thefit + + +def detrend(inputdata, order=1, demean=False): + """ + + Parameters + ---------- + inputdata: array-like + The waveform to be corrected + order: int + The order of the polynomial + demean: bool + Flag to determine if data should be demeaned + + Returns + ------- + outputdata: array-like + The input waveform with the polynomial baseline removed + + """ + thetimepoints = np.arange(0.0, len(inputdata), 1.0) - len(inputdata) / 2.0 + try: + thecoffs = Polynomial.fit(thetimepoints, inputdata, order).convert().coef[::-1] + except np.lib.polynomial.RankWarning: + thecoffs = [0.0, 0.0] + thefittc = trendgen(thetimepoints, thecoffs, demean) + return inputdata - thefittc + + +@due.dcite(references.ELGENDI_2016) +def calcplethskeqwness( + waveform, + Fs, + S_windowsecs=5.0, + detrendorder=8, + debug=False, +): + """ + + Parameters + ---------- + waveform: array-like + The cardiac waveform to be assessed + Fs: float + The sample rate of the data + S_windowsecs: float + Skewness window duration in seconds. Defaults to 5.0 (optimal for discrimination of "good" from "acceptable" + and "unfit" according to Elgendi) + detrendorder: int + Order of detrending polynomial to apply to plethysmogram. + debug: boolean + Turn on extended output + + Returns + ------- + S_sqi_mean: float + The mean value of the quality index over all time + S_std_mean: float + The standard deviation of the quality index over all time + S_waveform: array + The quality metric over all timepoints + + + Calculates the windowed skewness quality metric described in Elgendi, M. in + "Optimal Signal Quality Index for Photoplethysmogram Signals". Bioengineering 2016, Vol. 3, Page 21 3, 21 (2016). + """ + + # waveform = physio_or_numpy(waveform) + + # detrend the waveform + dt_waveform = detrend(waveform, order=detrendorder, demean=True) + + # calculate S_sqi and K_sqi over a sliding window. Window size should be an odd number of points. + S_windowpts = int(np.round(S_windowsecs * Fs, 0)) + S_windowpts += 1 - S_windowpts % 2 + S_waveform = dt_waveform * 0.0 + + if debug: + print("S_windowsecs, S_windowpts:", S_windowsecs, S_windowpts) + for i in range(0, len(dt_waveform)): + startpt = np.max([0, i - S_windowpts // 2]) + endpt = np.min([i + S_windowpts // 2, len(dt_waveform)]) + S_waveform[i] = skew(dt_waveform[startpt : endpt + 1], nan_policy="omit") + + S_sqi_mean = np.mean(S_waveform) + S_sqi_std = np.std(S_waveform) + + return S_sqi_mean, S_sqi_std, S_waveform + + +@due.dcite(references.ELGENDI_2016) +def calcplethkurtosis( + waveform, + Fs, + K_windowsecs=60.0, + detrendorder=8, + debug=False, +): + """ + + Parameters + ---------- + waveform: array-like + The cardiac waveform to be assessed + Fs: float + The sample rate of the data + K_windowsecs: float + Skewness window duration in seconds. Defaults to 2.0 (after Selveraj) + detrendorder: int + Order of detrending polynomial to apply to plethysmogram. + debug: boolean + Turn on extended output + + Returns + ------- + K_sqi_mean: float + The mean value of the quality index over all time + K_std_mean: float + The standard deviation of the quality index over all time + K_waveform: array + The quality metric over all timepoints + + + Calculates the windowed kurtosis quality metric described in Elgendi, M. in + "Optimal Signal Quality Index for Photoplethysmogram Signals". Bioengineering 2016, Vol. 3, Page 21 3, 21 (2016). + """ + # waveform = physio_or_numpy(waveform) + + # detrend the waveform + dt_waveform = detrend(waveform, order=detrendorder, demean=True) + + # calculate S_sqi and K_sqi over a sliding window. Window size should be an odd number of points. + K_windowpts = int(np.round(K_windowsecs * Fs, 0)) + K_windowpts += 1 - K_windowpts % 2 + K_waveform = dt_waveform * 0.0 + + if debug: + print("K_windowsecs, K_windowpts:", K_windowsecs, K_windowpts) + for i in range(0, len(dt_waveform)): + startpt = np.max([0, i - K_windowpts // 2]) + endpt = np.min([i + K_windowpts // 2, len(dt_waveform)]) + K_waveform[i] = kurtosis(dt_waveform[startpt : endpt + 1], fisher=False) + + K_sqi_mean = np.mean(K_waveform) + K_sqi_std = np.std(K_waveform) + + return K_sqi_mean, K_sqi_std, K_waveform + + +def approximateentropy(waveform, m, r): + def _maxdist(x_i, x_j): + return max([abs(ua - va) for ua, va in zip(x_i, x_j)]) + + def _phi(m): + x = [[waveform[j] for j in range(i, i + m - 1 + 1)] for i in range(N - m + 1)] + C = [ + len([1 for x_j in x if _maxdist(x_i, x_j) <= r]) / (N - m + 1.0) + for x_i in x + ] + return (N - m + 1.0) ** (-1) * sum(np.log(C)) + + N = len(waveform) + + return abs(_phi(m + 1) - _phi(m)) + + +@due.dcite(references.ELGENDI_2016) +def calcplethentropy( + waveform, + Fs, + E_windowsecs=1.0, + detrendorder=8, + debug=False, +): + """ + + Parameters + ---------- + waveform: array-like + The cardiac waveform to be assessed + Fs: float + The sample rate of the data + E_windowsecs: float + Entropy window duration in seconds. Defaults to 0.5 (after Selveraj) + detrendorder: int + Order of detrending polynomial to apply to plethysmogram. + debug: boolean + Turn on extended output + + Returns + ------- + E_sqi_mean: float + The mean value of the quality index over all time + E_std_mean: float + The standard deviation of the quality index over all time + E_waveform: array + The quality metric over all timepoints + + + Calculates the windowed entropy quality metric described in Elgendi, M. in + "Optimal Signal Quality Index for Photoplethysmogram Signals". Bioengineering 2016, Vol. 3, Page 21 3, 21 (2016). + """ + # waveform = physio_or_numpy(waveform) + + # detrend the waveform + dt_waveform = detrend(waveform, order=detrendorder, demean=True) + + # calculate S_sqi and K_sqi over a sliding window. Window size should be an odd number of points. + E_windowpts = int(np.round(E_windowsecs * Fs, 0)) + E_windowpts += 1 - E_windowpts % 2 + E_waveform = dt_waveform * 0.0 + + if debug: + print("E_windowsecs, E_windowpts:", E_windowsecs, E_windowpts) + for i in range(0, len(dt_waveform)): + startpt = np.max([0, i - E_windowpts // 2]) + endpt = np.min([i + E_windowpts // 2, len(dt_waveform)]) + r = 0.2 * np.std(dt_waveform[startpt : endpt + 1]) + E_waveform[i] = approximateentropy(dt_waveform[startpt : endpt + 1], 2, r) + + E_sqi_mean = np.mean(E_waveform) + E_sqi_std = np.std(E_waveform) + + return E_sqi_mean, E_sqi_std, E_waveform + + +def cardiacsqi(rawresp, debug=False): + """ + Calculate the breath by breath quality of the respiratory signal + Parameters + ---------- + rawresp: physio_like + The raw respiratory signal + debug: bool + Print debug information and plot intermediate results + + Returns + ------- + breathlist: list + List of "breathinfo" dictionaries for each detected breath. Each breathinfo dictionary contains: + "startime", "centertime", and "endtime" of each detected breath in seconds from the start of the waveform, + and "correlation" - the Pearson correlation of that breath waveform with the average waveform. + + """ + targetfs = 50.0 + prefilterlimits = [0.01, 5.0] + seglength = 4.0 + segstep = 0.5 + envelopelpffreq = 0.1 + slidingfilterpctwidth = 10.0 + # The fastest credible cardiac rate is 200 bpm -> 0.3 seconds/heartbeat, so set the dist to be 75% + # of that in points + minperiod = 0.3 + distfrac = 0.75 + + return romanosqi( + rawresp, + targetfs=targetfs, + prefilterlimits=prefilterlimits, + envelopelpffreq=envelopelpffreq, + slidingfilterpctwidth=slidingfilterpctwidth, + minperiod=minperiod, + distfrac=distfrac, + seglength=seglength, + segstep=segstep, + label="cardiac", + debug=debug, + ) + + +def plotheartbeatqualities(heartbeatlist, totaltime=None): + # set up the color codes + color_0p9 = "#888888" + color_0p8 = "#aa6666" + color_0p7 = "#cc4444" + color_bad = "#ff0000" + + if totaltime is None: + totaltime = heartbeatlist[-1]["endtime"] + + # unpack the heartbeat information + numheartbeats = len(heartbeatlist) + theheartbeatlocs = np.zeros((numheartbeats), dtype=np.float64) + theheartbeatcorrs = np.zeros((numheartbeats), dtype=np.float64) + for thisheartbeat in range(numheartbeats): + theheartbeatlocs[thisheartbeat] = heartbeatlist[thisheartbeat]["centertime"] + theheartbeatcorrs[thisheartbeat] = heartbeatlist[thisheartbeat]["correlation"] + + # plot the heartbeat correlations, with lines indicating thresholds + plt.plot(theheartbeatlocs, theheartbeatcorrs, ls="-", marker="o") + plt.hlines( + [0.9, 0.8, 0.7, 0.6], + 0.0, + totaltime, + colors=[color_0p9, color_0p8, color_0p7, color_bad], + linestyles="solid", + label=["th=0.9", "th=0.8", "th=0.7", "th=0.6"], + ) + plt.title("Quality evaluation for each heartbeat") + plt.xlabel("Time in seconds") + plt.ylabel("Correlation with average heartbeat") + plt.xlim([0, totaltime]) + plt.ylim([0, 1.05]) + plt.show() + + +def plotcardiacwaveformwithquality(waveform, heartbeatlist, Fs, plottype="rectangle"): + # now plot the cardiac waveform, color coded for quality + + # set up the color codes + color_0p9 = "#ff000000" + color_0p8 = "#ff000044" + color_0p7 = "#ff000088" + color_bad = "#ff0000aa" + + # unpack the heartbeat information + numheartbeats = len(heartbeatlist) + theheartbeatlocs = np.zeros((numheartbeats), dtype=np.float64) + theheartbeatcorrs = np.zeros((numheartbeats), dtype=np.float64) + for thisheartbeat in range(numheartbeats): + theheartbeatlocs[thisheartbeat] = heartbeatlist[thisheartbeat]["centertime"] + theheartbeatcorrs[thisheartbeat] = heartbeatlist[thisheartbeat]["correlation"] + + # initialize the plot + fig = plt.figure() + ax = fig.add_subplot(111) + + # plot the whole line if we're doing rectangle occlusion + if plottype == "rectangle": + xvals = np.linspace(0.0, len(waveform) / Fs, len(waveform), endpoint=False) + plt.plot(xvals, waveform, color="black") + yrange = np.max(waveform) - np.min(waveform) + ymax = np.min(waveform) + yrange * 1.05 + ymin = np.max(waveform) - yrange * 1.05 + for thisheartbeat in range(numheartbeats): + if theheartbeatcorrs[thisheartbeat] > 0.9: + thecolor = color_0p9 + elif theheartbeatcorrs[thisheartbeat] > 0.8: + thecolor = color_0p8 + elif theheartbeatcorrs[thisheartbeat] > 0.7: + thecolor = color_0p7 + else: + thecolor = color_bad + startpt = int(heartbeatlist[thisheartbeat]["starttime"] * Fs) + endpt = int(heartbeatlist[thisheartbeat]["endtime"] * Fs) + if plottype == "rectangle": + therectangle = Polygon( + ( + (heartbeatlist[thisheartbeat]["starttime"], ymin), + (heartbeatlist[thisheartbeat]["starttime"], ymax), + (heartbeatlist[thisheartbeat]["endtime"], ymax), + (heartbeatlist[thisheartbeat]["endtime"], ymin), + ), + fc=thecolor, + ec=thecolor, + lw=0, + ) + ax.add_patch(therectangle) + pass + else: + if endpt == len(waveform) - 1: + endpt -= 1 + xvals = np.linspace( + startpt / Fs, (endpt + 1) / Fs, endpt - startpt + 1, endpoint=False + ) + yvals = waveform[startpt : endpt + 1] + plt.plot(xvals, yvals, color=thecolor) + plt.ylim([ymin, ymax]) + plt.title("Cardiac waveform, color coded by quantifiability") + plt.xlabel("Time in seconds") + plt.ylabel("Amplitude (arbitrary units)") + plt.xlim([0.0, len(waveform) / Fs]) + plt.show() + + +def summarizeheartbeats(heartbeatlist): + numheartbeats = len(heartbeatlist) + for thisheartbeat in range(numheartbeats): + theheartbeatinfo = heartbeatlist[thisheartbeat] + print( + f"{thisheartbeat},{theheartbeatinfo['starttime']},{theheartbeatinfo['endtime']},{theheartbeatinfo['centertime']},{theheartbeatinfo['correlation']}" + ) diff --git a/physioqc/metrics/chest_belt.py b/physioqc/metrics/chest_belt.py index 6875da7..975edce 100644 --- a/physioqc/metrics/chest_belt.py +++ b/physioqc/metrics/chest_belt.py @@ -1,8 +1,483 @@ """Denoising metrics for chest belt recordings.""" +import matplotlib.pyplot as plt import numpy as np +from matplotlib.patches import Polygon +from peakdet import Physio, operations +from scipy.signal import resample, welch +from scipy.stats import pearsonr from .. import references from ..due import due +from .utils import hamming -pass + +def envelopedetect(data, upperpass=0.05, order=5): + """ + Detects the amplitude envelope of a pseudoperiodic waveform (respiration, cardiac, etc.) + Parameters + ---------- + data + upperpass: float + Upper pass cutoff frequency in Hz + order: int + Butterworth filter order + Returns + ------- + einferior: ndarray + The lower edge of the signal envelope + esuperior: ndarray + The upper edge of the signal envelope + + """ + npdata = data.data + targetfs = data.fs + esuperior = ( + 2.0 + * operations.filter_physio( + Physio(np.square(np.where(npdata > 0.0, npdata, 0.0)), fs=targetfs), + [upperpass], + "lowpass", + order=order, + ).data + ) + esuperior = np.sqrt(np.where(esuperior > 0.0, esuperior, 0.0)) + einferior = ( + 2.0 + * operations.filter_physio( + Physio(np.square(np.where(npdata < 0.0, npdata, 0.0)), fs=targetfs), + [upperpass], + "lowpass", + order=order, + ).data + ) + einferior = -np.sqrt(np.where(einferior > 0.0, einferior, 0.0)) + return einferior, esuperior + + +def respiratorysqi(rawresp, debug=False): + """ + Calculate the breath by breath quality of the respiratory signal + Parameters + ---------- + rawresp: physio_like + The raw respiratory signal + debug: bool + Print debug information and plot intermediate results + + Returns + ------- + breathlist: list + List of "breathinfo" dictionaries for each detected breath. Each breathinfo dictionary contains: + "startime", "centertime", and "endtime" of each detected breath in seconds from the start of the waveform, + and "correlation" - the Pearson correlation of that breath waveform with the average waveform. + + """ + targetfs = 25.0 + prefilterlimits = [0.01, 2.0] + seglength = 12.0 + segstep = 2.0 + envelopelpffreq = 0.05 + slidingfilterpctwidth = 10.0 + + # The fastest credible breathing rate is 20 breaths/min -> 3 seconds/breath, so set the dist to be 50% + # of that in points + minperiod = 3.0 + distfrac = 0.5 + + return romanosqi( + rawresp, + targetfs=targetfs, + prefilterlimits=prefilterlimits, + envelopelpffreq=envelopelpffreq, + slidingfilterpctwidth=slidingfilterpctwidth, + minperiod=minperiod, + distfrac=distfrac, + seglength=seglength, + segstep=segstep, + label="respiratory", + debug=debug, + ) + + +@due.dcite(references.ROMANO_2023) +def romanosqi( + rawresp, + targetfs=25.0, + prefilterlimits=(0.01, 2.0), + envelopelpffreq=0.05, + slidingfilterpctwidth=10.0, + minperiod=3.0, + distfrac=0.5, + seglength=12.0, + segstep=2.0, + label="respiratory", + debug=False, +): + """ + Implementation of Romano's method from A Signal Quality Index for Improving the Estimation of + Breath-by-Breath Respiratory Rate During Sport and Exercise, + IEEE SENSORS JOURNAL, VOL. 23, NO. 24, 15 DECEMBER 2023 + + NB: In part A, Romano does not specify the upper pass frequency for the respiratory envelope filter. + 0.05Hz looks pretty good for respiration. + In part B, the width of the sliding window bandpass filter is not specified. We use a range of +/- 5% of the + center frequency. + + Parameters + ---------- + rawresp: physio_like + The raw respiration signal + targetfs: float + Sample rate for internal calculations + prefilterlimits: tuple + Lower and upper frequency limits for prefiltering the input signal + envelopelpffreq: float + Cutoff frequency of the RMS envelope filter in Hz + slidingfilterpctwidth: float + Width of the sliding window bandpass filter in percent of the center frequency + seglength: float + Length of the window for measuring the waveform center frequency in seconds + segstep: float + Step size of the window for measuring the waveform center frequency in seconds + debug: bool + Print debug information and plot intermediate results + + Returns + ------- + breathlist: list + List of "breathinfo" dictionaries for each detected breath. Each breathinfo dictionary contains: + "startime", "centertime", and "endtime" of each detected breath in seconds from the start of the waveform, + and "correlation" - the Pearson correlation of that breath waveform with the average waveform. + """ + + # get the sample frequency down to around 25 Hz for respiratory waveforms + rawresp = operations.interpolate_physio(rawresp, target_fs=targetfs) + timeaxis = np.linspace( + 0.0, rawresp.data.shape[0] / targetfs, num=rawresp.data.shape[0], endpoint=False + ) + + # A. Signal Preprocessing + # Apply third order Butterworth bandpass, 0.01-2Hz + prefiltered = operations.filter_physio( + rawresp, + prefilterlimits, + "bandpass", + order=3, + ) + if debug: + plt.plot(timeaxis, rawresp.data) + plt.plot(timeaxis, prefiltered.data) + plt.title(f"Raw and prefiltered {label} signal") + plt.legend(["Raw", "Prefiltered"]) + plt.show() + if debug: + print("prefiltered: ", prefiltered.data) + + # calculate the derivative + derivative = np.gradient(prefiltered.data, 1.0 / targetfs) + + # normalize the derivative to the range of ~-1 to 1 + derivmax = np.max(derivative) + derivmin = np.min(derivative) + derivrange = derivmax - derivmin + if debug: + print(f"{derivmax=}, {derivmin=}, {derivrange=}") + normderiv = 2.0 * (derivative - derivmin) / derivrange - 1.0 + if debug: + plt.plot(timeaxis, normderiv) + plt.title(f"Normalized derivative of {label} signal") + plt.legend(["Normalized derivative"]) + plt.show() + + # amplitude correct by flattening the envelope function + einferior, esuperior = envelopedetect( + Physio(normderiv, fs=targetfs), upperpass=envelopelpffreq, order=3 + ) + if debug: + plt.plot(timeaxis, normderiv) + plt.plot(timeaxis, esuperior) + plt.plot(timeaxis, einferior) + plt.legend(["Normalized derivative", "esuperior", "einferior"]) + plt.show() + rmsnormderiv = (normderiv - einferior) / (esuperior - einferior) + if debug: + plt.plot(timeaxis, rmsnormderiv) + plt.title(f"Normalized derivative of {label} signal after envelope correction") + plt.show() + + # B. Detection of peaks in sliding window + segsamples = int(seglength * targetfs) + stepsamples = int(segstep * targetfs) + totaltclength = len(rawresp) + numsegs = int((totaltclength - segsamples) // stepsamples) + if (totaltclength - segsamples) % segsamples != 0: + numsegs += 1 + peakfreqs = np.zeros((numsegs), dtype=np.float64) + respfilteredderivs = rmsnormderiv * 0.0 + respfilteredweights = rmsnormderiv * 0.0 + segpeaks = [] + for i in range(numsegs): + if i < numsegs - 1: + segstart = i * stepsamples + segend = segstart + segsamples + else: + segstart = len(rawresp) - segsamples + segend = segstart + segsamples + segment = rmsnormderiv[segstart:segend] + 0.0 + segment *= hamming(segsamples) + segment -= np.mean(segment) + thex, they = welch(segment, targetfs, nfft=4096) + peakfreqs[i] = thex[np.argmax(they)] + respfilterorder = 1 + lowerfac = 1.0 - slidingfilterpctwidth / 200.0 + upperfac = 1.0 + slidingfilterpctwidth / 200.0 + lowerpass = peakfreqs[i] * lowerfac + upperpass = peakfreqs[i] * upperfac + if debug: + print(peakfreqs[i], lowerfac, lowerpass, upperfac, upperpass) + filteredsegment = operations.filter_physio( + Physio(segment, fs=targetfs), + [upperpass], + "lowpass", + order=respfilterorder, + ).data + thedist = int(targetfs * distfrac / peakfreqs[i]) + segpeaks += ( + ( + operations.peakfind_physio( + filteredsegment, thresh=0.05, dist=thedist + ).peaks + + segstart + ) + / targetfs + ).tolist() + filteredsegment -= np.mean(filteredsegment) + if i < numsegs - 1: + respfilteredderivs[ + i * stepsamples : (i * stepsamples) + segsamples + ] += filteredsegment + respfilteredweights[i * stepsamples : (i * stepsamples) + segsamples] += 1.0 + else: + respfilteredderivs[-segsamples:] += filteredsegment + respfilteredweights[-segsamples:] += 1.0 + respfilteredderivs /= respfilteredweights + respfilteredderivs /= np.std(respfilteredderivs) + if debug: + print(peakfreqs) + plt.plot(timeaxis, rmsnormderiv) + plt.plot(timeaxis, respfilteredderivs) + plt.title("Normalized derivative before and after targeted bandpass filtering") + plt.legend(["Normalized derivative", "Filtered normalized derivative"]) + plt.show() + + # C. Breaths segmentation + # The fastest credible breathing rate is 20 breaths/min -> 3 seconds/breath, so set the dist to be 50% + # of that in points + minperiod = 1.0 / np.min(peakfreqs) + thedist = int(minperiod * targetfs * distfrac) + peakinfo = operations.peakfind_physio(respfilteredderivs, thresh=0.05, dist=thedist) + if debug: + ax = operations.plot_physio(peakinfo) + plt.show() + thepeaks = peakinfo.peaks + + # D. Similarity Analysis and Exclusion of Unreliable Breaths + numbreaths = len(thepeaks) - 1 + scaledpeaklength = 100 + thescaledbreaths = np.zeros((numbreaths, scaledpeaklength), dtype=np.float64) + thebreathlocs = np.zeros((numbreaths), dtype=np.float64) + thebreathcorrs = np.zeros((numbreaths), dtype=np.float64) + breathlist = [] + for thisbreath in range(numbreaths): + breathinfo = {} + startpt = thepeaks[thisbreath] + endpt = thepeaks[thisbreath + 1] + thebreathlocs[thisbreath] = (endpt + startpt) / (targetfs * 2.0) + breathinfo["starttime"] = startpt / targetfs + breathinfo["endtime"] = endpt / targetfs + breathinfo["centertime"] = thebreathlocs[thisbreath] + thescaledbreaths[thisbreath, :] = resample( + respfilteredderivs[startpt:endpt], scaledpeaklength + ) + thescaledbreaths[thisbreath, :] -= np.min(thescaledbreaths[thisbreath, :]) + thescaledbreaths[thisbreath, :] /= np.max(thescaledbreaths[thisbreath, :]) + breathlist.append(breathinfo) + if debug: + plt.plot(thescaledbreaths[thisbreath, :], lw=0.1, color="#888888") + averagebreath = np.mean(thescaledbreaths, axis=0) + if debug: + plt.plot(averagebreath, lw=2, color="black") + plt.show() + for thisbreath in range(numbreaths): + thebreathcorrs[thisbreath] = pearsonr( + averagebreath, thescaledbreaths[thisbreath, :] + ).statistic + breathlist[thisbreath]["correlation"] = thebreathcorrs[thisbreath] + return breathlist + + +def plotbreathqualities(breathlist, totaltime=None): + """ + Make an informational plot of quantifiability vs time for each detected breath + + Parameters + ---------- + breathlist: list + A list of breathinfo dictionaries output from respiratorysqi + totaltime: float + The maximum time to include in the plot. If totaltime is None (default), plot every breath. + + Returns + ------- + + """ + # set up the color codes + color_0p9 = "#888888" + color_0p8 = "#aa6666" + color_0p7 = "#cc4444" + color_bad = "#ff0000" + + if totaltime is None: + totaltime = breathlist[-1]["endtime"] + + # unpack the breath information + numbreaths = len(breathlist) + thebreathlocs = np.zeros((numbreaths), dtype=np.float64) + thebreathcorrs = np.zeros((numbreaths), dtype=np.float64) + for thisbreath in range(numbreaths): + thebreathlocs[thisbreath] = breathlist[thisbreath]["centertime"] + thebreathcorrs[thisbreath] = breathlist[thisbreath]["correlation"] + + # plot the breath correlations, with lines indicating thresholds + plt.plot(thebreathlocs, thebreathcorrs, ls="-", marker="o") + plt.hlines( + [0.9, 0.8, 0.7, 0.6], + 0.0, + totaltime, + colors=[color_0p9, color_0p8, color_0p7, color_bad], + linestyles="solid", + label=["th=0.9", "th=0.8", "th=0.7", "th=0.6"], + ) + plt.title("Quality evaluation for each breath") + plt.xlabel("Time in seconds") + plt.ylabel("Correlation with average breath") + plt.xlim([0, totaltime]) + plt.ylim([0, 1.05]) + plt.show() + + +def plotbreathwaveformwithquality( + data, breathlist, label="respiratory", plottype="rectangle" +): + """ + Make an informational plot of the respiratory waveform with the quantifiability of each detected breath as a + color overlay. + + Parameters + ---------- + data: physio_like + The respiratory waveform + breathlist: list + A list of breathinfo dictionaries output from respiratorysqi + plottype: str + The type of plot to make. If plottype is rectangle (default), overlay a colored rectangle to show the + quantifiability of each detected breath. If anything else, use the waveform line color to indicate the + quantifiability. + Returns + ------- + + """ + waveform = data.data + Fs = data.fs + + # now plot the respiratory waveform, color coded for quality + + # set up the color codes + color_0p9 = "#ff000000" + color_0p8 = "#ff000044" + color_0p7 = "#ff000088" + color_bad = "#ff0000aa" + + # unpack the breath information + numbreaths = len(breathlist) + thebreathlocs = np.zeros((numbreaths), dtype=np.float64) + thebreathcorrs = np.zeros((numbreaths), dtype=np.float64) + for thisbreath in range(numbreaths): + thebreathlocs[thisbreath] = breathlist[thisbreath]["centertime"] + thebreathcorrs[thisbreath] = breathlist[thisbreath]["correlation"] + + # initialize the plot + fig = plt.figure() + ax = fig.add_subplot(111) + + # plot the whole line if we're doing rectangle occlusion + if plottype == "rectangle": + xvals = np.linspace(0.0, len(waveform) / Fs, len(waveform), endpoint=False) + plt.plot(xvals, waveform, color="black") + yrange = np.max(waveform) - np.min(waveform) + ymax = np.min(waveform) + yrange * 1.05 + ymin = np.max(waveform) - yrange * 1.05 + for thisbreath in range(numbreaths): + if thebreathcorrs[thisbreath] > 0.9: + if plottype == "rectangle": + thecolor = color_0p9 + else: + thecolor = "black" + elif thebreathcorrs[thisbreath] > 0.8: + thecolor = color_0p8 + elif thebreathcorrs[thisbreath] > 0.7: + thecolor = color_0p7 + else: + thecolor = color_bad + startpt = int(breathlist[thisbreath]["starttime"] * Fs) + endpt = int(breathlist[thisbreath]["endtime"] * Fs) + if plottype == "rectangle": + therectangle = Polygon( + ( + (breathlist[thisbreath]["starttime"], ymin), + (breathlist[thisbreath]["starttime"], ymax), + (breathlist[thisbreath]["endtime"], ymax), + (breathlist[thisbreath]["endtime"], ymin), + ), + fc=thecolor, + ec=thecolor, + lw=0, + ) + ax.add_patch(therectangle) + pass + else: + if endpt == len(waveform) - 1: + endpt -= 1 + xvals = np.linspace( + startpt / Fs, (endpt + 1) / Fs, endpt - startpt + 1, endpoint=False + ) + yvals = waveform[startpt : endpt + 1] + plt.plot(xvals, yvals, color=thecolor) + plt.ylim([ymin, ymax]) + plt.title(f"{label} waveform, color coded by quantifiability") + plt.xlabel("Time in seconds") + plt.ylabel("Amplitude (arbitrary units)") + plt.xlim([0.0, len(waveform) / Fs]) + plt.show() + + +def summarizebreaths(breathlist): + """ + Print summary information about each detected breath. + + Parameters + ---------- + breathlist: list + A list of breathinfo dictionaries output from respiratorysqi + + Returns + ------- + + """ + numbreaths = len(breathlist) + for thisbreath in range(numbreaths): + thebreathinfo = breathlist[thisbreath] + print( + f"{thisbreath},{thebreathinfo['starttime']},{thebreathinfo['endtime']},{thebreathinfo['centertime']},{thebreathinfo['correlation']}" + ) diff --git a/physioqc/metrics/utils.py b/physioqc/metrics/utils.py index 8752c64..11deb9d 100644 --- a/physioqc/metrics/utils.py +++ b/physioqc/metrics/utils.py @@ -1,13 +1,19 @@ """Miscellaneous utility functions for metric calculation.""" import functools +import json import logging +import os +import numpy as np +import pandas as pd from peakdet.physio import Physio LGR = logging.getLogger(__name__) LGR.setLevel(logging.INFO) +from scipy import signal + def print_metric_call(metric, args): """ @@ -85,3 +91,349 @@ def has_peaks(signal: Physio) -> bool: LGR.warn("Signal has only one peak! Better to rerun peak detection.") return signal._metadata["peaks"].size > 1 + + +# - butterworth filters +# @conditionaljit() +def butterlpfiltfilt(Fs, upperpass, inputdata, order, debug=False): + r"""Performs a bidirectional (zero phase) Butterworth lowpass filter on an input vector + and returns the result. Ends are padded to reduce transients. + + Parameters + ---------- + Fs : float + Sample rate in Hz + :param Fs: + + upperpass : float + Upper end of passband in Hz + :param upperpass: + + inputdata : 1D numpy array + Input data to be filtered + :param inputdata: + + order : int + Order of Butterworth filter. + :param order: + + debug : boolean, optional + When True, internal states of the function will be printed to help debugging. + :param debug: + + Returns + ------- + filtereddata : 1D float array + The filtered data + + """ + if upperpass > Fs / 2.0: + upperpass = Fs / 2.0 + if debug: + print( + "butterlpfiltfilt - Fs, upperpass, len(inputdata), order:", + Fs, + upperpass, + len(inputdata), + order, + ) + sos = signal.butter(order, 2.0 * upperpass, btype="lowpass", output="sos", fs=Fs) + return signal.sosfiltfilt(sos, inputdata).real + + +# @conditionaljit() +def butterhpfiltfilt(Fs, lowerpass, inputdata, order, debug=False): + r"""Performs a bidirectional (zero phase) Butterworth highpass filter on an input vector + and returns the result. Ends are padded to reduce transients. + + Parameters + ---------- + Fs : float + Sample rate in Hz + :param Fs: + + lowerpass : float + Lower end of passband in Hz + :param lowerpass: + + inputdata : 1D numpy array + Input data to be filtered + :param inputdata: + + order : int + Order of Butterworth filter. + :param order: + + debug : boolean, optional + When True, internal states of the function will be printed to help debugging. + :param debug: + + Returns + ------- + filtereddata : 1D float array + The filtered data + """ + if lowerpass < 0.0: + lowerpass = 0.0 + if debug: + print( + "butterhpfiltfilt - Fs, lowerpass, len(inputdata), order:", + Fs, + lowerpass, + len(inputdata), + order, + ) + sos = signal.butter(order, 2.0 * lowerpass, btype="highpass", output="sos", fs=Fs) + return signal.sosfiltfilt(sos, inputdata).real + + +# @conditionaljit() +def butterbpfiltfilt(Fs, lowerpass, upperpass, inputdata, order, debug=False): + r"""Performs a bidirectional (zero phase) Butterworth bandpass filter on an input vector + and returns the result. Ends are padded to reduce transients. + + Parameters + ---------- + Fs : float + Sample rate in Hz + :param Fs: + + lowerpass : float + Lower end of passband in Hz + :param lowerpass: + + upperpass : float + Upper end of passband in Hz + :param upperpass: + + inputdata : 1D numpy array + Input data to be filtered + :param inputdata: + + order : int + Order of Butterworth filter. + :param order: + + debug : boolean, optional + When True, internal states of the function will be printed to help debugging. + :param debug: + + Returns + ------- + filtereddata : 1D float array + The filtered data + """ + if upperpass > Fs / 2.0: + upperpass = Fs / 2.0 + if lowerpass < 0.0: + lowerpass = 0.0 + if debug: + print( + f"butterbpfiltfilt - {Fs=}, {lowerpass=}, {upperpass=}, {len(inputdata)=}, {order=}" + ) + sos = signal.butter( + order, [2.0 * lowerpass, 2.0 * upperpass], btype="bandpass", output="sos", fs=Fs + ) + if debug: + print(sos) + return signal.sosfiltfilt(sos, inputdata).real + + +def readbidstsv(inputfilename, colspec=None, warn=True, debug=False): + r"""Read time series out of a BIDS tsv file + + Parameters + ---------- + inputfilename : str + The root name of the tsv and accompanying json file (no extension) + colspec: list + A comma separated list of column names to return + debug : bool + Output additional debugging information + + Returns + ------- + samplerate : float + Sample rate in Hz + starttime : float + Time of first point, in seconds + columns : str array + Names of the timecourses contained in the file + data : 2D numpy array + Timecourses from the file + + NOTE: If file does not exist or is not valid, all return values are None + + """ + thefileroot, theext = os.path.splitext(inputfilename) + if theext == ".gz": + thefileroot, thenextext = os.path.splitext(thefileroot) + if thenextext is not None: + theext = thenextext + theext + + if debug: + print("thefileroot:", thefileroot) + print("theext:", theext) + if os.path.exists(thefileroot + ".json") and ( + os.path.exists(thefileroot + ".tsv.gz") or os.path.exists(thefileroot + ".tsv") + ): + with open(thefileroot + ".json", "r") as json_data: + d = json.load(json_data) + try: + samplerate = float(d["SamplingFrequency"]) + except: + print("no samplerate found in json, setting to 1.0") + samplerate = 1.0 + if warn: + print( + "Warning - SamplingFrequency not found in " + + thefileroot + + ".json. This is not BIDS compliant." + ) + try: + starttime = float(d["StartTime"]) + except: + print("no starttime found in json, setting to 0.0") + starttime = 0.0 + if warn: + print( + "Warning - StartTime not found in " + + thefileroot + + ".json. This is not BIDS compliant." + ) + try: + columns = d["Columns"] + except: + if debug: + print( + "no columns found in json, will take labels from the tsv file" + ) + columns = None + if warn: + print( + "Warning - Columns not found in " + + thefileroot + + ".json. This is not BIDS compliant." + ) + else: + columnsource = "json" + if os.path.exists(thefileroot + ".tsv.gz"): + compression = "gzip" + theextension = ".tsv.gz" + else: + compression = None + theextension = ".tsv" + if warn: + print( + "Warning - " + + thefileroot + + ".tsv is uncompressed. This is not BIDS compliant." + ) + + df = pd.read_csv( + thefileroot + theextension, + compression=compression, + names=columns, + header=None, + sep="\t", + quotechar='"', + ) + + # check for header line + if any(df.iloc[0].apply(lambda x: isinstance(x, str))): + headerlinefound = True + # reread the data, skipping the first row + df = pd.read_csv( + thefileroot + theextension, + compression=compression, + names=columns, + header=0, + sep="\t", + quotechar='"', + ) + if warn: + print( + "Warning - Column header line found in " + + thefileroot + + ".tsv. This is not BIDS compliant." + ) + else: + headerlinefound = False + + if columns is None: + columns = list(df.columns.values) + columnsource = "tsv" + if debug: + print( + samplerate, + starttime, + columns, + np.transpose(df.to_numpy()).shape, + (compression == "gzip"), + warn, + headerlinefound, + ) + + # select a subset of columns if they were specified + if colspec is None: + return ( + samplerate, + starttime, + columns, + np.transpose(df.to_numpy()), + (compression == "gzip"), + columnsource, + ) + else: + collist = colspec.split(",") + try: + selectedcols = df[collist] + except KeyError: + print("specified column list cannot be found in", inputfilename) + return [None, None, None, None, None, None] + columns = list(selectedcols.columns.values) + return ( + samplerate, + starttime, + columns, + np.transpose(selectedcols.to_numpy()), + (compression == "gzip"), + columnsource, + ) + else: + print("file pair does not exist") + return [None, None, None, None, None, None] + + +hammingwindows = {} + + +def hamming(length, debug=False): + # return 0.54 - 0.46 * np.cos((np.arange(0.0, float(length), 1.0) / float(length)) * 2.0 * np.pi) + r"""Returns a Hamming window function of the specified length. Once calculated, windows + are cached for speed. + + Parameters + ---------- + length : int + The length of the window function + :param length: + + debug : boolean, optional + When True, internal states of the function will be printed to help debugging. + :param debug: + + Returns + ------- + windowfunc : 1D float array + The window function + """ + try: + return hammingwindows[str(length)] + except KeyError: + hammingwindows[str(length)] = 0.54 - 0.46 * np.cos( + (np.arange(0.0, float(length), 1.0) / float(length)) * 2.0 * np.pi + ) + if debug: + print("initialized hamming window for length", length) + return hammingwindows[str(length)] diff --git a/physioqc/references.py b/physioqc/references.py index a5d2e02..7968e4c 100644 --- a/physioqc/references.py +++ b/physioqc/references.py @@ -2,6 +2,10 @@ from .due import Doi +ELGENDI_2016 = Doi("10.3390/bioengineering3040021") + +ROMANO_2023 = Doi("10.1109/JSEN.2023.3330444") + CHANG_GLOVER_2009 = Doi("10.1016/j.neuroimage.2009.04.048") POWER_2018 = Doi("10.1073/pnas.1720985115") diff --git a/simplecardtest b/simplecardtest new file mode 100755 index 0000000..6eec92a --- /dev/null +++ b/simplecardtest @@ -0,0 +1,21 @@ +#!/usr/bin/env python + +import numpy as np +from peakdet import Physio + +import physioqc.metrics.cardiac as cardiac +import physioqc.metrics.utils as utils + +samplerate, starttime, columns, data, compression, columnsource = utils.readbidstsv( + "testdata/sub-007_ses-05_task-rest_run-01_physio.tsv.gz", colspec="cardiac" +) + +rawcard = Physio(np.nan_to_num(data.flatten()), fs=samplerate) + +print(rawcard.data) +print(rawcard.data.shape) + +cardiaclist = cardiac.cardiacsqi(rawcard, debug=True) +cardiac.plotheartbeatqualities(cardiaclist, totaltime=(len(rawcard.data) / samplerate)) +cardiac.plotcardiacwaveformwithquality(rawcard, cardiaclist, samplerate) +cardiac.summarizeheartbeats(cardiaclist) diff --git a/simpleresptest b/simpleresptest new file mode 100755 index 0000000..9b759bb --- /dev/null +++ b/simpleresptest @@ -0,0 +1,22 @@ +#!/usr/bin/env python + +import numpy as np +from peakdet import Physio + +import physioqc.metrics.chest_belt as chest_belt +import physioqc.metrics.utils as utils + +samplerate, starttime, columns, data, compression, columnsource = utils.readbidstsv( + "testdata/sub-007_ses-05_task-rest_run-01_physio.tsv.gz", + colspec="respiratory_effort", +) + +rawresp = Physio(np.nan_to_num(data.flatten()), fs=samplerate) + +print(rawresp.data) + +breathlist = chest_belt.respiratorysqi(rawresp, debug=True) +chest_belt.plotbreathqualities(breathlist, totaltime=(len(rawresp) / samplerate)) +chest_belt.plotbreathwaveformwithquality(rawresp, breathlist) +chest_belt.plotbreathwaveformwithquality(rawresp, breathlist, plottype="other") +chest_belt.summarizebreaths(breathlist) diff --git a/testdata/sub-007_ses-05_task-rest_run-01_physio.json b/testdata/sub-007_ses-05_task-rest_run-01_physio.json new file mode 100644 index 0000000..bffbacf --- /dev/null +++ b/testdata/sub-007_ses-05_task-rest_run-01_physio.json @@ -0,0 +1,12 @@ +{ + "SamplingFrequency": 10000.0, + "StartTime": 37.3526, + "Columns": [ + "time", + "respiratory_effort", + "trigger", + "cardiac", + "respiratory_CO2", + "respiratory_O2" + ] +} diff --git a/testdata/sub-007_ses-05_task-rest_run-01_physio.tsv.gz b/testdata/sub-007_ses-05_task-rest_run-01_physio.tsv.gz new file mode 100644 index 0000000..f12764a Binary files /dev/null and b/testdata/sub-007_ses-05_task-rest_run-01_physio.tsv.gz differ