Skip to content

Commit 2f2449d

Browse files
committed
implemented read_stata and LArray.to_stata
1 parent 0593fd9 commit 2f2449d

File tree

5 files changed

+92
-1
lines changed

5 files changed

+92
-1
lines changed

doc/source/api.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,7 @@ Read
625625
read_hdf
626626
read_eurostat
627627
read_sas
628+
read_stata
628629

629630
Write
630631
-----
@@ -635,6 +636,7 @@ Write
635636
LArray.to_csv
636637
LArray.to_excel
637638
LArray.to_hdf
639+
LArray.to_stata
638640

639641
Excel
640642
=====

doc/source/changes/version_0_30.rst.inc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,9 @@ Backward incompatible changes
101101
New features
102102
^^^^^^^^^^^^
103103

104+
* implemented :py:obj:`read_stata()` and :py:obj:`LArray.to_stata()` to read arrays from and write arrays to Stata .dta
105+
files.
106+
104107
* added :py:obj:`LArray.isin()` method to check whether each element of an array is contained in a list (or array) of
105108
values.
106109

larray/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from larray.inout.excel import read_excel
2929
from larray.inout.hdf import read_hdf
3030
from larray.inout.sas import read_sas
31+
from larray.inout.stata import read_stata
3132
from larray.inout.xw_excel import open_excel, Workbook
3233

3334
# just make sure handlers for .pkl and .pickle are initialized
@@ -74,7 +75,7 @@
7475
'real_if_close', 'interp', 'isnan', 'isinf', 'inverse',
7576
# inout
7677
'from_lists', 'from_string', 'from_frame', 'from_series', 'read_csv', 'read_tsv',
77-
'read_eurostat', 'read_excel', 'read_hdf', 'read_sas', 'open_excel', 'Workbook',
78+
'read_eurostat', 'read_excel', 'read_hdf', 'read_sas', 'read_stata', 'open_excel', 'Workbook',
7879
# utils
7980
'get_options', 'set_options',
8081
# viewer

larray/core/array.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6266,6 +6266,38 @@ def to_hdf(self, filepath, key):
62666266
attrs.writer = 'LArray'
62676267
self.meta.to_hdf(store, key)
62686268

6269+
def to_stata(self, filepath_or_buffer, **kwargs):
6270+
r"""
6271+
Writes array to a Stata .dta file.
6272+
6273+
Parameters
6274+
----------
6275+
filepath_or_buffer : str or file-like object
6276+
Path to .dta file or a file handle.
6277+
6278+
See Also
6279+
--------
6280+
read_stata
6281+
6282+
Notes
6283+
-----
6284+
The round trip to Stata (LArray.to_stata followed by read_stata) loose the name of the "column" axis.
6285+
6286+
Examples
6287+
--------
6288+
>>> axes = [Axis(3, 'row'), Axis('column=country,sex')] # doctest: +SKIP
6289+
>>> arr = LArray([['BE', 'F'],
6290+
... ['FR', 'M'],
6291+
... ['FR', 'F']], axes=axes) # doctest: +SKIP
6292+
>>> arr # doctest: +SKIP
6293+
row*\column age sex
6294+
0 5 F
6295+
1 25 M
6296+
2 30 F
6297+
>>> arr.to_stata('test.dta') # doctest: +SKIP
6298+
"""
6299+
self.to_frame().to_stata(filepath_or_buffer, **kwargs)
6300+
62696301
@deprecate_kwarg('sheet_name', 'sheet')
62706302
def to_excel(self, filepath=None, sheet=None, position='A1', overwrite_file=False, clear_sheet=False,
62716303
header=True, transpose=False, wide=True, value_name='value', engine=None, *args, **kwargs):

larray/inout/stata.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
from __future__ import absolute_import, print_function
2+
3+
import pandas as pd
4+
5+
from larray.inout.pandas import from_frame
6+
7+
__all__ = ['read_stata']
8+
9+
10+
def read_stata(filepath_or_buffer, index_col=None, sort_rows=False, sort_columns=False, **kwargs):
11+
r"""
12+
Reads Stata .dta file and returns an LArray with the contents
13+
14+
Parameters
15+
----------
16+
filepath_or_buffer : str or file-like object
17+
Path to .dta file or a file handle.
18+
index_col : str or None, optional
19+
Name of column to set as index. Defaults to None.
20+
sort_rows : bool, optional
21+
Whether or not to sort the rows alphabetically (sorting is more efficient than not sorting).
22+
This only makes sense in combination with index_col. Defaults to False.
23+
sort_columns : bool, optional
24+
Whether or not to sort the columns alphabetically (sorting is more efficient than not sorting).
25+
Defaults to False.
26+
27+
Returns
28+
-------
29+
LArray
30+
31+
See Also
32+
--------
33+
LArray.to_stata
34+
35+
Notes
36+
-----
37+
The round trip to Stata (LArray.to_stata followed by read_stata) loose the name of the "column" axis.
38+
39+
Examples
40+
--------
41+
>>> read_stata('test.dta') # doctest: +SKIP
42+
{0}\{1} row country sex
43+
0 0 BE F
44+
1 1 FR M
45+
2 2 FR F
46+
>>> read_stata('test.dta', index_col='row') # doctest: +SKIP
47+
row\{1} country sex
48+
0 BE F
49+
1 FR M
50+
2 FR F
51+
"""
52+
df = pd.read_stata(filepath_or_buffer, index_col=index_col, **kwargs)
53+
return from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns)

0 commit comments

Comments
 (0)