Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
bdb22fe
docstring for hr_to_mr function
berrakozer Sep 9, 2021
2cbf60c
test for hr_to_mr function
berrakozer Sep 9, 2021
f5b5f87
hr_to_mr function added to utils.py
berrakozer Sep 9, 2021
8ae23f6
docstring for mr_to_hr function
berrakozer Sep 9, 2021
c574b32
test for mr_to_hr function
berrakozer Sep 9, 2021
c90c4f0
mr_to_hr function added to utils.py and testfor mr_to_hr edited, test…
berrakozer Sep 9, 2021
e2d9d8e
renamed functions and introduced numpy conventions to the docstrings
berrakozer Sep 13, 2021
f2600bd
edited the parameters and returns in docstrings of both functions
berrakozer Sep 13, 2021
72b2687
altered hr_to_mr_number_and_esd function and test to accept string in…
berrakozer Sep 13, 2021
4aa333e
edited hr_to_mr function and test
berrakozer Sep 14, 2021
11eb646
edited mr_to_hr function and test
berrakozer Sep 14, 2021
259bbf1
docstring for the (new) function round_number_esd
berrakozer Sep 23, 2021
8b05b71
added test_round_number_esd function
berrakozer Sep 23, 2021
67a0b4a
added round_number_esd function to utils.py, test passes
berrakozer Sep 23, 2021
e294ce3
edited the mr_to_hr_number_and_esd docstring
berrakozer Sep 23, 2021
1411959
edited test_mr_to_hr_number_and_esd
berrakozer Sep 23, 2021
fb1e2e5
edited mr_to_hr_number_and_esd and the test, test passing
berrakozer Sep 23, 2021
f91eef3
Edited round_number_esd function and the test, test passes
berrakozer Sep 24, 2021
b5209ec
Edited test_mr_to_hr_number_and_esd, test passes
berrakozer Sep 24, 2021
ccd1faf
adjusted the cases where value is smaller than value error in utils.…
berrakozer Sep 24, 2021
fc9d266
edited hr_to_mr_number_and_esd function and test
berrakozer Sep 24, 2021
fc72a66
moved round_number_esd upwards and used it in hr_to_mr_number_and_esd…
berrakozer Sep 28, 2021
70e6bd5
edited docstring of hr_to_mr_number_and_esd
berrakozer Sep 28, 2021
c3e4709
edited docstring of mr_to_hr_number_and_esd, included proper rounding…
berrakozer Sep 28, 2021
94a66a2
Update test_utils.py
sbillinge Sep 30, 2021
e4d6f78
Update test_utils.py
sbillinge Sep 30, 2021
52b2288
fixed test_mr_to_hr_number_and_esd, test passes
berrakozer Oct 5, 2021
5bd70fb
Merge branch 'main' into sd_function_issue54
sbillinge Dec 26, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 162 additions & 1 deletion pydatarecognition/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,168 @@ def get_formatted_crossref_reference(doi):
return ref, ref_date


def correlate(y1, y2, corr_type='pearson'):
def round_number_esd(number, esd):
'''
Rounds each element in number and each element in esd (estimated standard deviation) arrays.

Esd is rounded to one significant figure if esd > 1.44E**x.(NB: 1.45E**x will become 2E**x)
If esd <= 1.44E**x the esd is rounded to two significant figures.
Number is rounded to the order of the rounded esd.

Parameters
----------
number : array-like
The array containing numbers to be rounded.
esd : array-like
The array containing esds to be rounded.

Returns
-------
list
The list containing rounded numbers as floats and/or integers.
list
The list containing rounded esds as floats and/or integers.

'''
# Empty lists to append to.
number_rounded, esd_rounded = [], []

# Loop through all elements in number (and esd) arrays.
for i in range(len(number)):

# Getting the value and value error to be rounded.
val, val_err = number[i], esd[i]

# Turn val_err into scientific notation.
val_err_sci = f"{val_err:.5E}"

# If val < val_err than set the value to 0 and the significant figures to 1.
if val < val_err:
val, sig_figs = 0, 1

# Inspect first significant figure of val_err_sci.
# If the first significant figure is 1, we need to inspect the next.
# Else, we can set the number of significant figures to 1.
elif int(val_err_sci[0]) == 1:
# Inspect the second significant figure.
# Take care of cases where we always want 2 significant figures:
# val_err < 1.4E**x
if int(val_err_sci[2]) < 4:
sig_figs = 2

# Inspect the edge case of the second significant figure.
# Make sure that we round up, if val_err >= 1.45E**x,
# and set the number of significant figures to 1.
# Else round down (that is, if val_err < 1.45E**x).
# and the set number of significant figures to 2.
elif int(val_err_sci[2]) == 4:
if int(val_err_sci[3]) >= 5:
val_err_sci = f"{val_err_sci[0:2]}5{val_err_sci[3::]}"
val_err = float(val_err_sci)
sig_figs = 1
else:
sig_figs = 2
else:
sig_figs = 1
else:
sig_figs = 1

# Get the order of magnitude of the val_err.
n = int(np.log10(val_err))

# Take into account if we need to 'correct' the order of magnitude.
# Related to the 'scale' below.
if val_err >= 1:
n += 1

# Set the scale, considering number of significant figures,
# and the order of magnitude.
scale = 10 ** (sig_figs - n)

# Use floor rounding. Add 0.5 to make sure that we round up for halfs.
# (However, remember that np.floor always rounds down...)
val = np.floor(val * scale + 0.5) / scale
val_err = np.floor(val_err * scale + 0.5) / scale

# Take into account, if the val_err >= 1.
# Then, we get rid of any decimals.
if val_err >= 1:
val, val_err = int(val), int(val_err)

# Append to rounded number and esd to lists.
number_rounded.append(val)
esd_rounded.append(val_err)

return number_rounded, esd_rounded


def hr_to_mr_number_and_esd(number_esd):
'''
splits human readable numbers with estimated standard deviations (e.g. 343.44(45)) into rounded machine readable
numbers and estimated standard deviations (e.g. 343.4 and 0.5).

Parameters
----------
number_esd : array_like
The array-like object that contains numbers with their estimated standard deviations as strings
in the following format: ["343.44(45)", "324908.435(67)", "0.0783(1)"]

Returns
-------
list
The list with the rounded numbers as floats, e.g. [343.4, 324908.44, 0.0783]

list
The list with rounded estimated standard deviations as floats, e.g. [0.5, 0.07, 0.0001]

'''
number = [e.split("(")[0] for e in number_esd]
esd = [e.split("(")[1].split(")")[0] for e in number_esd]
esd_oom = []
for i in range(len(number)):
if len(number[i].split(".")) == 1:
esd_oom.append(1)
else:
esd_oom.append(10**-len(number[i].split(".")[1]))
esd_oom = np.array(esd_oom, dtype='float')
esd = list(np.array(esd, dtype='float') * np.array(esd_oom, dtype='float'))
number_floats = [float(e) for e in number]
number_rounded, esd_rounded = round_number_esd(number_floats, esd)

return number_rounded, esd_rounded


def mr_to_hr_number_and_esd(number, esd):
'''
rounds and merges machine readable numbers and estimated standard deviations (e.g. 343.4 and 0.5)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good! I like it!

into human readable numbers with estimated standard deviations (e.g. 343.4(5)).

Parameters
----------
number : array-like
The array that contains numbers.
esd : array-like
The array that contains esds.

Returns
-------
list
The list of strings with human readable rounded numbers and esds.

'''
number_rounded, esd_rounded = round_number_esd(number, esd)
esd_hr = []
for e in esd_rounded:
if e < 1:
esd_hr.append(int(str(e).split(".")[1]))
else:
esd_hr.append(e)
number_esd = [f"{number_rounded[i]}({esd_hr[i]})" for i in range(len(number_rounded))]

return number_esd


def correlate(y1, y2, corr_type='pearson'):
'''

Parameters
Expand Down
33 changes: 33 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
import pytest
from datetime import date
from habanero import Crossref
from pydatarecognition.utils import data_sample, pearson_correlate, xy_resample, get_formatted_crossref_reference, \
hr_to_mr_number_and_esd, mr_to_hr_number_and_esd, round_number_esd

from scipy.stats import pearsonr, spearmanr, kendalltau
from pydatarecognition.utils import (data_sample, pearson_correlate, xy_resample, get_formatted_crossref_reference,
correlate)
Expand Down Expand Up @@ -72,6 +75,36 @@ def mockreturn(*args, **kwargs):
assert actual == expected


def test_hr_to_mr_number_and_esd():
number_esd = ["343.44(45)", "324908.435(67)", "0.0783(1)", "11(1)", "51(13)", "243(6)", "3205(300)"]
actual = hr_to_mr_number_and_esd(number_esd)
expected = [343.4, 324908.44, 0.0783, 11, 51, 243, 3200], [0.5, 0.07, 0.0001, 1, 13, 6, 300]
assert actual == expected


def test_mr_to_hr_number_and_esd():
number = [123.5, 123.3, 123.417, 123.367, 123.12, 132.1, 19, 125, 154, 1200, 2, 1, 2.1, 2.1, 2.14, 7.26, 50, 11]
esd = [0.5, 0.5, 0.326, 0.2, 0.13, 0.236, 2, 20, 20, 207, 1, 1.4, 0.2, 0.25, 0.14, 100, 100, 2]
actual = mr_to_hr_number_and_esd(number, esd)
expected = ['123.5(5)', '123.3(5)', '123.4(3)', '123.4(2)', '123.12(13)', '132.1(2)', '19(2)', '130(20)', '150(20)',
'1200(200)', '2(1)', '0(1)', '2.1(2)', '2.1(3)', '2.14(14)', '0(100)', '0(100)', '11(2)']
assert actual == expected


def test_round_number_esd():
number = [123.45, 123.3, 123.35, 123.31, 123.12, 132.124, 19, 123, 145, 1234,
1.99, 1, 2.145, 2.146, 2.144, 10, 11, 10.6]
esd = [0.4521, 0.4673, 0.309, 0.213, 0.125, 0.145, 2.4, 21.32, 14.5, 145,
0.99, 1.11, 0.145, 0.146, 0.144, 100.99, 111, 1.72]
number_exp = [123.5, 123.3, 123.4, 123.3, 123.12, 132.1, 19, 120, 150, 1200,
2, 0, 2.1, 2.1, 2.14, 0, 0, 11]
esd_exp = [0.5, 0.5, 0.3, 0.2, 0.13, 0.2, 2, 20, 20, 200,
1, 1, 0.2, 0.2, 0.14, 100, 100, 2]
actual = round_number_esd(number, esd)
expected = (number_exp, esd_exp)
assert actual == expected


def test_correlate():
y1, y2 = np.linspace(0, 10, 11), [0.1, 0.9, 2, 3.2, 4.3, 4.8, 5.9, 7, 7.9, 9, 9.8]
actual = correlate(y1, y2)
Expand Down