survey/data_cleaning_abbreviation.py at main · grazingrays/survey · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import pandas as pd
from pathlib import Path


responseloc = Path("./data")

responses = pd.read_csv(responseloc / "GISAS_instrument_survey.csv")

# column name abbreviations
columns = {"type": 'What type of instrument is it?',
           "specialty": 'What characteristic sets your instrument apart from all or most others?\n',
           "type_group": "type_group",
           "hardware_group": "hardware_group",
           "hardware": 'What type of hardware allows you to control the relative positions and orientations of beam and sample?\n',
           "orientation": 'What sample orientations do you use on a regular basis?\n',
           "software": 'What interface(s) do you have to control your instrument?\n',
           "making_changes": 'If you wanted to change a procedure at your instrument, e.g. a sequence of scans for alignment, how is that possible?\n',
           "alignment": 'What sequence of steps do you use to align individual samples?\n',
           "alignment_specialty": 'Is there an aspect of sample alignment you payed particular attention to? Something you might do differently than anybody else?',
           "alignment_script": 'Please share your alignment routine (or representative section thereof) via \n- link to a public repo or \n- by contributing to this gist: \n  https://gist.github.com/ahoermann/d44605100109964ccdbd2f...',
           "beamshadow": 'What measures are available to quantify shadowing of the beam?\n',
           "sample_holder": 'What do you use as a sample holder?\n',
           "calibration": 'What signal do you use for angle and sample-to-detector calibration?\n',
           "calibration_specialty": 'Is there an aspect of calibration you payed particular attention to? Something you might do differently than anybody else?',
           "calibration_script": 'Please share your calibration routine (or representative section thereof) via \n- link to a public repo or \n- by contributing to this gist: \n  https://gist.github.com/ahoermann/d44605100109964ccdbd...',
           "institution": "What large scale facility or other organization is your instrument located at?\n",
           "name": "What is the name of your instrument?\n",
           "standardization_need": "How great a need do you see for standardization in GISAS?\n",
           "beam_measurement": "What option(s) do you have to measure the direct beam?\n",
           "specular_measurement": 'What option(s) do you have to measure the specularly reflected beam?\n',
           "participation?": 'Would you like to participate in this standardization project beyond participation in this survey?\n',
           "recommended_resource": 'Have we overlooked anything? Is there a resource you’d like to point us to?\n'
           }

column_mapping = {v: k for k, v in columns.items()}
responses = responses.rename(columns = column_mapping)
def group_type(response):
    if "SANS" in response:
        typegroup = "(GI)SANS"
    elif "total scattering" in response:
        typegroup = "(GI)SAXS at a synchrotron"
    elif "at a synchrotron" in response:
        typegroup = "(GI)SAXS at a synchrotron"
    else:
        typegroup = "lab (GI)SAXS"
    return typegroup

# personal communication: "liquid surface spectrometer" identified as OPLS
responses["name"] = responses["name"].str.replace("liquid surface spectrometer", "SMI (OPLS endstation)")

responses["type_group"] =  responses["type"].apply(group_type)

responses.replace(to_replace = {'Sample normal parallel to x axis as incident angle 0;': 'vertical',
                               'Sample normal parallel to y axis as incident angle 0;': 'horizontal',
                                'Sample normal parallel to y axis as incident angle 0;Sample normal parallel to x axis as incident angle 0;': 'both',
                                'Sample normal parallel to y axis as incident angle 0;parallel to x possible but not by default;': 'horizontal',
                                'Sample normal parallel in both x & y;': 'both',
                                "Conventional sans. For the grazing, I would prefer horizontally mounted samples;": "horizontal",
                               }, inplace = True)


responses["institution"]  = responses["institution"].str.replace("NSLS II",
                                                                                   "National Synchrotron Light Source II")
responses["institution"]  = responses["institution"].str.replace("NSLS-II",
                                                                                   "National Synchrotron Light Source II")
responses["institution"]  = responses["institution"].str.replace(", UK",
                                                                                   "")
responses["institution"]  = responses["institution"].str.replace("Lab SAXS instrument at Anton Paar",
                                                                                   "Anton Paar")

def country(institution):
    mapping = {"USA": ["National Synchrotron Light Source II",
                       "Advanced Light Souce LBNL",
                       "Advanced Photon Source, Argonne National Lab",
                       "University of Colorado",
                       "Advanced Photon Source (APS-U), Argonne National Laboratory",
                       ],
               "Italy": ["Synchrotron"],
               "France": ["ESRF", "Laboratoire Léon Brillouin, CEA Saclay",
                          "synchrotron SOLEIL"],
               "UK": ["Diamond Light Source", "ISIS"],
               "Canada": ["Canadian Light Source"],
               "Spain": ["ALBA synchrotron"],
               "Austria": ["Institute of Science and Technology Austria",
                           "Graz University of Technology",
                           "Anton Paar"],
               "Germany": ["BESSY II / PTB", "PETRA III", "PETRA III (DESY, Hamburg)"],
               "Sweden": ["ESS"],
               "Australia": ["ANSTO"],
               "Japan": ["BL15A  PF KEK Japan"]}
    reverse_map = {}
    for k, v in mapping.items():
        for elem in v:
            reverse_map[elem] = k
    return reverse_map[institution]

responses["country"] = responses.institution.apply(lambda x: country(x))

responses["software"] = responses["software"].str.replace("Home-grown scripts in [language];",
                                                                            "Home-grown scripts")
responses["software"] = responses["software"].str.replace("Other, please specify:;",
                                                                            "Other")
responses["software"] = responses["software"].str.replace("Bluesky + EPICS;",
                                                                            "EPICS + Bluesky;")

responses["software"] = responses["software"].str.replace("In the near future, BLISS.",
                                                                            "Bliss")

def group_hardware(response):
    responses = response.split(";")

    response_group = []
    for response in responses:
        if "Individual motor stages for translation and rotation of the sample (beam fixed)" in response:
            response_group.append("motor stages (sample)")
        if "A hexapod, moving the sample" in response:
            response_group.append("hexapod (sample)")
        if "influencing both sample and beam" in response:
            response_group.append("motor stages (sample and beam)")
    if len(response_group) == 0:
        response_group.append("other")
    return ";".join(response_group)

responses["hardware_group"] = responses["hardware"].apply(group_hardware)
responses["hardware"] = responses["hardware"].str.replace(
    "Individual motor stages for translation and rotation of the sample (beam fixed)",
    "motor stages (sample)")
responses["hardware"] = responses["hardware"].str.replace(
    "A hexapod, moving the sample",
    "hexapod (sample)")
responses["hardware"] = responses["hardware"].str.replace(
    "A linear rail under the hexapod head to allow for different positions along the sample to be scanned, avoiding beam damaged areas;", "linear rail under the hexapod;")

responses["hardware"] = responses["hardware"].str.replace(
    "Anton Paar GISAXS Stage 2.0 (beam is always tilted);",
    "Anton Paar GISAXS Stage 2.0 (beam tilted);"
    )

responses["alignment"] = responses["alignment"].str.replace(
    "alternation of rocking and height scans on the direct and at the reflected for refinement. Eventually, if necessary, roll and yaw scans and lateral translations.",
    "Alternating rocking and y scans (direct + reflected).\nOptional: roll, yaw scans, lateral translations")
responses["alignment"] = responses["alignment"].str.replace(
    "Knife edge scans, rocking curves and roll alignment with also potentially yaw alignment if required;",
    "Knife edge scans, rocking curves, roll alignment.\nOptional: yaw alignment;")

responses["alignment"] = responses["alignment"].str.replace("Have not done for gisans",
                                                                              " N/A")
responses["alignment"] = responses["alignment"].str.replace(
      "Using the specular reflection;",
      "specular reflection;"
       )
responses["alignment"] = responses["alignment"].str.replace(
      "Alternating rocking",
      r"$\alpha$"
       )
responses["alignment"] = responses["alignment"].str.replace(
      "roll alignment",
      r"$\varphi$ alignment"
       )


responses["beamshadow"] = responses["beamshadow"].str.replace("x-ray eye;", "X-ray eye;")
responses["beamshadow"] = responses["beamshadow"].str.replace("And direct beam ;", ";")
responses["beamshadow"] = responses["beamshadow"].str.replace("None, we align the sample using the specular reflection only;", "specular reflection;")
responses["beamshadow"] = responses["beamshadow"].str.replace("Additional specular alignment;", "specular reflection;")

responses["calibration"] = responses["calibration"].str.replace(
    "The position of the specular reflection.;",
    "specular reflection;"
     )
responses["calibration"] = responses["calibration"].str.replace(
    "Known positions of peaks or rings of a calibrant.;",
    "calibrant;"
        )
responses["calibration"] = responses["calibration"].str.replace(
  "Peaks or rings of a calibrant measured at different distances;",
  "calibrant at different distances;"
      )
responses["calibration"] = responses["calibration"].str.replace(
"and position of direct beam and sample/detector distance measurement;",
    "position of direct beam and\nsample/detector distance measurement;"
    )


responses.to_csv("./data/gisas_instrument_survey_abbrv.csv")

selected = "alignment_specialty"
separated = responses[selected].str.rstrip(";")
separated = separated.str.split(pat =";", expand = True)
conc = pd.Series(separated.values.flatten())

#print(len(sep))
#print(separated.alignment.unique())
print((responses[selected].unique()))
print(responses[selected].nunique())