@@ -27,6 +27,7 @@ class _GMT_DATASET(ctp.Structure): # noqa: N801
27
27
>>> with GMTTempFile(suffix=".txt") as tmpfile:
28
28
... # Prepare the sample data file
29
29
... with Path(tmpfile.name).open(mode="w") as fp:
30
+ ... print("# x y z name", file=fp)
30
31
... print(">", file=fp)
31
32
... print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp)
32
33
... print("4.0 5.0 6.0 TEXT4 TEXT567", file=fp)
@@ -43,7 +44,8 @@ class _GMT_DATASET(ctp.Structure): # noqa: N801
43
44
... print(ds.min[: ds.n_columns], ds.max[: ds.n_columns])
44
45
... # The table
45
46
... tbl = ds.table[0].contents
46
- ... print(tbl.n_columns, tbl.n_segments, tbl.n_records)
47
+ ... print(tbl.n_columns, tbl.n_segments, tbl.n_records, tbl.n_headers)
48
+ ... print(tbl.header[: tbl.n_headers])
47
49
... print(tbl.min[: tbl.n_columns], ds.max[: tbl.n_columns])
48
50
... for i in range(tbl.n_segments):
49
51
... seg = tbl.segment[i].contents
@@ -52,7 +54,8 @@ class _GMT_DATASET(ctp.Structure): # noqa: N801
52
54
... print(seg.text[: seg.n_rows])
53
55
1 3 2
54
56
[1.0, 2.0, 3.0] [10.0, 11.0, 12.0]
55
- 3 2 4
57
+ 3 2 4 1
58
+ [b'x y z name']
56
59
[1.0, 2.0, 3.0] [10.0, 11.0, 12.0]
57
60
[1.0, 4.0]
58
61
[2.0, 5.0]
@@ -169,6 +172,7 @@ def to_strings(self) -> np.ndarray[Any, np.dtype[np.str_]]:
169
172
170
173
def to_dataframe (
171
174
self ,
175
+ header : int | None = None ,
172
176
column_names : pd .Index | None = None ,
173
177
dtype : type | Mapping [Any , type ] | None = None ,
174
178
index_col : str | int | None = None ,
@@ -187,6 +191,10 @@ def to_dataframe(
187
191
----------
188
192
column_names
189
193
A list of column names.
194
+ header
195
+ Row number containing column names. ``header=None`` means not to parse the
196
+ column names from table header. Ignored if the row number is larger than the
197
+ number of headers in the table.
190
198
dtype
191
199
Data type. Can be a single type for all columns or a dictionary mapping
192
200
column names to types.
@@ -207,6 +215,7 @@ def to_dataframe(
207
215
>>> with GMTTempFile(suffix=".txt") as tmpfile:
208
216
... # prepare the sample data file
209
217
... with Path(tmpfile.name).open(mode="w") as fp:
218
+ ... print("# col1 col2 col3 colstr", file=fp)
210
219
... print(">", file=fp)
211
220
... print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp)
212
221
... print("4.0 5.0 6.0 TEXT4 TEXT567", file=fp)
@@ -218,12 +227,12 @@ def to_dataframe(
218
227
... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
219
228
... ds = lib.read_virtualfile(vouttbl, kind="dataset")
220
229
... text = ds.contents.to_strings()
221
- ... df = ds.contents.to_dataframe()
230
+ ... df = ds.contents.to_dataframe(header=0 )
222
231
>>> text
223
232
array(['TEXT1 TEXT23', 'TEXT4 TEXT567', 'TEXT8 TEXT90',
224
233
'TEXT123 TEXT456789'], dtype='<U18')
225
234
>>> df
226
- 0 1 2 3
235
+ col1 col2 col3 colstr
227
236
0 1.0 2.0 3.0 TEXT1 TEXT23
228
237
1 4.0 5.0 6.0 TEXT4 TEXT567
229
238
2 7.0 8.0 9.0 TEXT8 TEXT90
@@ -248,14 +257,19 @@ def to_dataframe(
248
257
if len (textvector ) != 0 :
249
258
vectors .append (pd .Series (data = textvector , dtype = pd .StringDtype ()))
250
259
260
+ if header is not None :
261
+ tbl = self .table [0 ].contents # Use the first table!
262
+ if header < tbl .n_headers :
263
+ column_names = tbl .header [header ].decode ().split ()
264
+
251
265
if len (vectors ) == 0 :
252
266
# Return an empty DataFrame if no columns are found.
253
267
df = pd .DataFrame (columns = column_names )
254
268
else :
255
269
# Create a DataFrame object by concatenating multiple columns
256
270
df = pd .concat (objs = vectors , axis = "columns" )
257
271
if column_names is not None : # Assign column names
258
- df .columns = column_names
272
+ df .columns = column_names [: df . shape [ 1 ]]
259
273
if dtype is not None : # Set dtype for the whole dataset or individual columns
260
274
df = df .astype (dtype )
261
275
if index_col is not None : # Use a specific column as index
0 commit comments