From 589098c3573c1ffde77b3e3b273713f37a8b7d2b Mon Sep 17 00:00:00 2001 From: peadarcoyle Date: Mon, 27 Oct 2014 23:33:26 +0100 Subject: [PATCH 01/23] Updating tests file Cleaning up --- pandas/tests/test_groupby.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index b0b521141c92c..bd05cb8c2d2e5 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1961,6 +1961,9 @@ def test_groupby_level(self): # raise exception for non-MultiIndex self.assertRaises(ValueError, self.df.groupby, level=1) + + + def test_groupby_level_index_names(self): ## GH4014 this used to raise ValueError since 'exp'>1 (in py2) df = DataFrame({'exp' : ['A']*3 + ['B']*3, 'var1' : lrange(6),}).set_index('exp') @@ -1999,6 +2002,27 @@ def test_groupby_level_apply(self): result = frame['A'].groupby(level=0).count() self.assertEqual(result.index.name, 'first') + +#PR8618 and issue 8015 + def test_groupby_args(self): + frame = self.mframe + def g(): + frame.groupby(level=None).count() + self.assertRaisesRegexp(TypeError, g, "You have to supply one of 'by' or 'level'") + + def k(): + frame.groupby(by=None).count() + self.assertRaisesRegexp(TypeError, k, "You have to supply one of 'by' or 'level'") + + def j(): + frame.groupby() + self.assertRaisesRegexp(TypeError, j, "You have to supply one of 'by' or 'level'") + + def i(): + frame.groupby(axes=None) + self.assertRaisesRegexp(TypeError, i, "You have to supply one of 'by' or 'level'") + + def test_groupby_level_mapper(self): frame = self.mframe deleveled = frame.reset_index() From 2e3c35b97db709d6b05822c1fa058806524abc36 Mon Sep 17 00:00:00 2001 From: peadarcoyle Date: Thu, 30 Oct 2014 09:34:13 +0100 Subject: [PATCH 02/23] Updating generic file to include change --- pandas/core/generic.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 71668a73d9286..c0af2b6f1e259 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2868,9 +2868,17 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, """ from pandas.core.groupby import groupby - axis = self._get_axis_number(axis) - return groupby(self, by, axis=axis, level=level, as_index=as_index, - sort=sort, group_keys=group_keys, squeeze=squeeze) + if axis is not None: + axis = self._get_axis_number(axis) + return groupby(self, by, axis=axis, level=level, as_index=as_index, + sort=sort, group_keys=group_keys, squeeze=squeeze) + elif level is not None: + raise TypeError('You have to specify one of "by" or "level"') + elif by is not None: + raise TypeError('You have to specify one of "by" or "level"') + else: + raise TypeError('You have to specify one of "by" or "level"') + def asfreq(self, freq, method=None, how=None, normalize=False): """ From f41ef3d2b3579d5370b86baf75cf58fd727cae1d Mon Sep 17 00:00:00 2001 From: peadarcoyle Date: Thu, 30 Oct 2014 13:22:39 +0100 Subject: [PATCH 03/23] Pushing code for review --- pandas/core/generic.py | 11 +++++------ pandas/tests/test_groupby.py | 20 +++++--------------- 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c0af2b6f1e259..a555bf2138b13 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2868,17 +2868,16 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, """ from pandas.core.groupby import groupby - if axis is not None: - axis = self._get_axis_number(axis) - return groupby(self, by, axis=axis, level=level, as_index=as_index, - sort=sort, group_keys=group_keys, squeeze=squeeze) + if level is None and by is None: + raise TypeError('You have to specify one of "by" or "level"') elif level is not None: raise TypeError('You have to specify one of "by" or "level"') elif by is not None: raise TypeError('You have to specify one of "by" or "level"') else: - raise TypeError('You have to specify one of "by" or "level"') - + axis = self._get_axis_number(axis) + return groupby(self, by=by, axis=axis, level=level, as_index=as_index, + sort=sort, group_keys=group_keys, squeeze=squeeze) def asfreq(self, freq, method=None, how=None, normalize=False): """ diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index bd05cb8c2d2e5..924c37a6fafe5 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2003,24 +2003,14 @@ def test_groupby_level_apply(self): self.assertEqual(result.index.name, 'first') -#PR8618 and issue 8015 + #PR8618 and issue 8015 def test_groupby_args(self): frame = self.mframe - def g(): - frame.groupby(level=None).count() - self.assertRaisesRegexp(TypeError, g, "You have to supply one of 'by' or 'level'") - def k(): - frame.groupby(by=None).count() - self.assertRaisesRegexp(TypeError, k, "You have to supply one of 'by' or 'level'") - - def j(): - frame.groupby() - self.assertRaisesRegexp(TypeError, j, "You have to supply one of 'by' or 'level'") - - def i(): - frame.groupby(axes=None) - self.assertRaisesRegexp(TypeError, i, "You have to supply one of 'by' or 'level'") + + + result = frame.groupby() + self.assertRaisesRegexp(TypeError, result, "You have to supply one of 'by' or 'level'") def test_groupby_level_mapper(self): From 168b83a2efafb8e0f3c5bfb1d8d1f2855dbf7682 Mon Sep 17 00:00:00 2001 From: peadarcoyle Date: Thu, 30 Oct 2014 21:38:57 +0100 Subject: [PATCH 04/23] updating after tests fail --- pandas/core/generic.py | 11 ++++------- pandas/tests/test_groupby.py | 8 +++----- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a555bf2138b13..662c9a814a603 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2868,16 +2868,13 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, """ from pandas.core.groupby import groupby - if level is None and by is None: - raise TypeError('You have to specify one of "by" or "level"') - elif level is not None: - raise TypeError('You have to specify one of "by" or "level"') - elif by is not None: - raise TypeError('You have to specify one of "by" or "level"') - else: + if level is not None and by is not None: axis = self._get_axis_number(axis) return groupby(self, by=by, axis=axis, level=level, as_index=as_index, sort=sort, group_keys=group_keys, squeeze=squeeze) + elif by is None: + raise ValueError('You have to specify one of "by" or "level"') + def asfreq(self, freq, method=None, how=None, normalize=False): """ diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 924c37a6fafe5..451d1d6f03eed 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2002,16 +2002,14 @@ def test_groupby_level_apply(self): result = frame['A'].groupby(level=0).count() self.assertEqual(result.index.name, 'first') - +""" #PR8618 and issue 8015 def test_groupby_args(self): frame = self.mframe - - - result = frame.groupby() + result = frame.groupby(by=None, level=None) self.assertRaisesRegexp(TypeError, result, "You have to supply one of 'by' or 'level'") - +""" def test_groupby_level_mapper(self): frame = self.mframe From 6cc47ca91f2ceea8bb1d89477a62697635d72af4 Mon Sep 17 00:00:00 2001 From: peadarcoyle Date: Fri, 31 Oct 2014 18:19:19 +0100 Subject: [PATCH 05/23] Updating due to errors --- pandas/core/generic.py | 13 +++++++------ pandas/tests/test_groupby.py | 8 -------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 662c9a814a603..90905dac8055f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2866,15 +2866,16 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, GroupBy object """ - from pandas.core.groupby import groupby - if level is not None and by is not None: + + if axis is not 0: axis = self._get_axis_number(axis) return groupby(self, by=by, axis=axis, level=level, as_index=as_index, - sort=sort, group_keys=group_keys, squeeze=squeeze) - elif by is None: - raise ValueError('You have to specify one of "by" or "level"') - + sort=sort, group_keys=group_keys, squeeze=squeeze) + elif level is None and by is None: + raise TypeError('You have to specify one of "by" or "level"') + elif level is None: + raise TypeError('You have to specify one of "by" or "level"') def asfreq(self, freq, method=None, how=None, normalize=False): """ diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 451d1d6f03eed..fb57115be30b1 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2002,14 +2002,6 @@ def test_groupby_level_apply(self): result = frame['A'].groupby(level=0).count() self.assertEqual(result.index.name, 'first') -""" - #PR8618 and issue 8015 - def test_groupby_args(self): - frame = self.mframe - - result = frame.groupby(by=None, level=None) - self.assertRaisesRegexp(TypeError, result, "You have to supply one of 'by' or 'level'") -""" def test_groupby_level_mapper(self): frame = self.mframe From 968d68f1b2f07a33217a2471f29a6c73afeec7b3 Mon Sep 17 00:00:00 2001 From: peadarcoyle Date: Sat, 1 Nov 2014 14:59:29 +0100 Subject: [PATCH 06/23] Updating including test fails --- pandas/core/generic.py | 11 ++++------- pandas/tests/test_groupby.py | 7 +++++++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 90905dac8055f..0bf1927d621d1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2868,14 +2868,11 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, """ from pandas.core.groupby import groupby - if axis is not 0: - axis = self._get_axis_number(axis) - return groupby(self, by=by, axis=axis, level=level, as_index=as_index, + if level is None and by is None: + raise TypeError('You have to specify at least one of "by" and "level"') + axis = self._get_axis_number(axis) + return groupby(self, by=by, axis=axis, level=level, as_index=as_index, sort=sort, group_keys=group_keys, squeeze=squeeze) - elif level is None and by is None: - raise TypeError('You have to specify one of "by" or "level"') - elif level is None: - raise TypeError('You have to specify one of "by" or "level"') def asfreq(self, freq, method=None, how=None, normalize=False): """ diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index fb57115be30b1..6e872e62908e4 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2002,6 +2002,13 @@ def test_groupby_level_apply(self): result = frame['A'].groupby(level=0).count() self.assertEqual(result.index.name, 'first') + #PR8618 and issue 8015 + def test_groupby_args(self): + frame = self.mframe + + def k(): + frame.groupby(by=None, level=None) + self.assertRaisesRegexp(TypeError, k, "You have to supply one of 'by' and 'level'") def test_groupby_level_mapper(self): frame = self.mframe From 6bf83c5dc575f52c84783d6bd6c4b9713b6201ab Mon Sep 17 00:00:00 2001 From: Scott E Lasley Date: Fri, 7 Nov 2014 15:31:53 -0500 Subject: [PATCH 07/23] BUG CSV: fix problem with trailing whitespace in skipped rows, issues 8661, 8679 ENH CSV: Reduce memory usage when skiprows is an integer in read_csv, issue 8681 --- doc/source/whatsnew/v0.15.2.txt | 2 + pandas/io/tests/test_parsers.py | 23 ++++++++ pandas/parser.pyx | 12 ++-- pandas/src/parser/tokenizer.c | 101 ++++++++++++++++++++++++++------ pandas/src/parser/tokenizer.h | 4 ++ vb_suite/io_bench.py | 16 +++++ 6 files changed, 135 insertions(+), 23 deletions(-) diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index d6d36fd8d14ba..1e84762b60caa 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -74,6 +74,7 @@ Enhancements Performance ~~~~~~~~~~~ +- Reduce memory usage when skiprows is an integer in read_csv (:issue:`8681`) .. _whatsnew_0152.experimental: @@ -155,3 +156,4 @@ Bug Fixes of the level names are numbers (:issue:`8584`). - Bug in ``MultiIndex`` where ``__contains__`` returns wrong result if index is not lexically sorted or unique (:issue:`7724`) +- BUG CSV: fix problem with trailing whitespace in skipped rows, (:issue:`8679`), (:issue:`8661`) diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 228dad984bb3c..59647b4c781e5 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -3048,6 +3048,29 @@ def test_comment_skiprows(self): df = self.read_csv(StringIO(data), comment='#', skiprows=4) tm.assert_almost_equal(df.values, expected) + def test_trailing_spaces(self): + data = """skip +random line with trailing spaces +skip +1,2,3 +1,2.,4. +random line with trailing tabs\t\t\t + +5.,NaN,10.0 +""" + expected = pd.DataFrame([[1., 2., 4.], + [5., np.nan, 10.]]) + # this should ignore six lines including lines with trailing + # whitespace and blank lines. issues 8661, 8679 + df = self.read_csv(StringIO(data.replace(',', ' ')), + header=None, delim_whitespace=True, + skiprows=[0,1,2,3,5,6], skip_blank_lines=True) + tm.assert_frame_equal(df, expected) + df = self.read_table(StringIO(data.replace(',', ' ')), + header=None, delim_whitespace=True, + skiprows=[0,1,2,3,5,6], skip_blank_lines=True) + tm.assert_frame_equal(df, expected) + def test_comment_header(self): data = """# empty # second empty line diff --git a/pandas/parser.pyx b/pandas/parser.pyx index afaa5219ab0cd..0409ee56f22bb 100644 --- a/pandas/parser.pyx +++ b/pandas/parser.pyx @@ -86,6 +86,7 @@ cdef extern from "parser/tokenizer.h": EAT_COMMENT EAT_LINE_COMMENT WHITESPACE_LINE + SKIP_LINE FINISHED enum: ERROR_OVERFLOW @@ -158,6 +159,7 @@ cdef extern from "parser/tokenizer.h": int header_end # header row end void *skipset + int64_t skip_first_N_rows int skip_footer double (*converter)(const char *, char **, char, char, char, int) @@ -181,6 +183,8 @@ cdef extern from "parser/tokenizer.h": void parser_free(parser_t *self) nogil int parser_add_skiprow(parser_t *self, int64_t row) + int parser_set_skipfirstnrows(parser_t *self, int64_t nrows) + void parser_set_default_options(parser_t *self) int parser_consume_rows(parser_t *self, size_t nrows) @@ -524,10 +528,10 @@ cdef class TextReader: cdef _make_skiprow_set(self): if isinstance(self.skiprows, (int, np.integer)): - self.skiprows = range(self.skiprows) - - for i in self.skiprows: - parser_add_skiprow(self.parser, i) + parser_set_skipfirstnrows(self.parser, self.skiprows) + else: + for i in self.skiprows: + parser_add_skiprow(self.parser, i) cdef _setup_parser_source(self, source): cdef: diff --git a/pandas/src/parser/tokenizer.c b/pandas/src/parser/tokenizer.c index 9a7303b6874db..fc96cc5429775 100644 --- a/pandas/src/parser/tokenizer.c +++ b/pandas/src/parser/tokenizer.c @@ -156,6 +156,7 @@ void parser_set_default_options(parser_t *self) { self->thousands = '\0'; self->skipset = NULL; + self-> skip_first_N_rows = -1; self->skip_footer = 0; } @@ -444,21 +445,17 @@ static int end_line(parser_t *self) { } } - if (self->skipset != NULL) { - k = kh_get_int64((kh_int64_t*) self->skipset, self->file_lines); - - if (k != ((kh_int64_t*)self->skipset)->n_buckets) { - TRACE(("Skipping row %d\n", self->file_lines)); - // increment file line count - self->file_lines++; - - // skip the tokens from this bad line - self->line_start[self->lines] += fields; + if (self->state == SKIP_LINE) { + TRACE(("Skipping row %d\n", self->file_lines)); + // increment file line count + self->file_lines++; + + // skip the tokens from this bad line + self->line_start[self->lines] += fields; - // reset field count - self->line_fields[self->lines] = 0; - return 0; - } + // reset field count + self->line_fields[self->lines] = 0; + return 0; } /* printf("Line: %d, Fields: %d, Ex-fields: %d\n", self->lines, fields, ex_fields); */ @@ -556,6 +553,15 @@ int parser_add_skiprow(parser_t *self, int64_t row) { return 0; } +int parser_set_skipfirstnrows(parser_t *self, int64_t nrows) { + // self->file_lines is zero based so subtract 1 from nrows + if (nrows > 0) { + self->skip_first_N_rows = nrows - 1; + } + + return 0; +} + static int parser_buffer_bytes(parser_t *self, size_t nbytes) { int status; size_t bytes_read; @@ -656,6 +662,15 @@ typedef int (*parser_op)(parser_t *self, size_t line_limit); TRACE(("datapos: %d, datalen: %d\n", self->datapos, self->datalen)); +int skip_this_line(parser_t *self, int64_t rownum) { + if (self->skipset != NULL) { + return ( kh_get_int64((kh_int64_t*) self->skipset, self->file_lines) != + ((kh_int64_t*)self->skipset)->n_buckets ); + } + else { + return ( rownum <= self->skip_first_N_rows ); + } +} int tokenize_delimited(parser_t *self, size_t line_limit) { @@ -688,10 +703,25 @@ int tokenize_delimited(parser_t *self, size_t line_limit) switch(self->state) { + case SKIP_LINE: +// TRACE(("tokenize_delimited SKIP_LINE %c, state %d\n", c, self->state)); + if (c == '\n') { + END_LINE(); + } + break; + case START_RECORD: // start of record - - if (c == '\n') { + if (skip_this_line(self, self->file_lines)) { + if (c == '\n') { + END_LINE() + } + else { + self->state = SKIP_LINE; + } + break; + } + else if (c == '\n') { // \n\r possible? if (self->skip_empty_lines) { @@ -1006,9 +1036,26 @@ int tokenize_delim_customterm(parser_t *self, size_t line_limit) self->state)); switch(self->state) { + + case SKIP_LINE: +// TRACE(("tokenize_delim_customterm SKIP_LINE %c, state %d\n", c, self->state)); + if (c == self->lineterminator) { + END_LINE(); + } + break; + case START_RECORD: // start of record - if (c == self->lineterminator) { + if (skip_this_line(self, self->file_lines)) { + if (c == self->lineterminator) { + END_LINE() + } + else { + self->state = SKIP_LINE; + } + break; + } + else if (c == self->lineterminator) { // \n\r possible? if (self->skip_empty_lines) { @@ -1252,6 +1299,14 @@ int tokenize_whitespace(parser_t *self, size_t line_limit) self->state)); switch(self->state) { + + case SKIP_LINE: +// TRACE(("tokenize_whitespace SKIP_LINE %c, state %d\n", c, self->state)); + if (c == '\n') { + END_LINE(); + } + break; + case WHITESPACE_LINE: if (c == '\n') { self->file_lines++; @@ -1283,9 +1338,17 @@ int tokenize_whitespace(parser_t *self, size_t line_limit) case START_RECORD: // start of record - if (c == '\n') { - // \n\r possible? + if (skip_this_line(self, self->file_lines)) { + if (c == '\n') { + END_LINE() + } + else { + self->state = SKIP_LINE; + } + break; + } else if (c == '\n') { if (self->skip_empty_lines) + // \n\r possible? { self->file_lines++; } diff --git a/pandas/src/parser/tokenizer.h b/pandas/src/parser/tokenizer.h index 0947315fbe6b7..07f4153038dd8 100644 --- a/pandas/src/parser/tokenizer.h +++ b/pandas/src/parser/tokenizer.h @@ -127,6 +127,7 @@ typedef enum { EAT_COMMENT, EAT_LINE_COMMENT, WHITESPACE_LINE, + SKIP_LINE, FINISHED } ParserState; @@ -203,6 +204,7 @@ typedef struct parser_t { int header_end; // header row end void *skipset; + int64_t skip_first_N_rows; int skip_footer; double (*converter)(const char *, char **, char, char, char, int); @@ -240,6 +242,8 @@ int parser_trim_buffers(parser_t *self); int parser_add_skiprow(parser_t *self, int64_t row); +int parser_set_skipfirstnrows(parser_t *self, int64_t nrows); + void parser_free(parser_t *self); void parser_set_default_options(parser_t *self); diff --git a/vb_suite/io_bench.py b/vb_suite/io_bench.py index 0b9f68f0e6ed5..a70c543ca59eb 100644 --- a/vb_suite/io_bench.py +++ b/vb_suite/io_bench.py @@ -21,6 +21,22 @@ read_csv_standard = Benchmark("read_csv('__test__.csv')", setup1, start_date=datetime(2011, 9, 15)) +#---------------------------------- +# skiprows + +setup1 = common_setup + """ +index = tm.makeStringIndex(20000) +df = DataFrame({'float1' : randn(20000), + 'float2' : randn(20000), + 'string1' : ['foo'] * 20000, + 'bool1' : [True] * 20000, + 'int1' : np.random.randint(0, 200000, size=20000)}, + index=index) +df.to_csv('__test__.csv') +""" + +read_csv_skiprows = Benchmark("read_csv('__test__.csv', skiprows=10000)", setup1, + start_date=datetime(2011, 9, 15)) #---------------------------------------------------------------------- # write_csv From 0ef5c075c45e782321833c71c410bceb792a313d Mon Sep 17 00:00:00 2001 From: behzad nouri Date: Wed, 26 Nov 2014 21:19:36 -0500 Subject: [PATCH 08/23] BUG: DatetimeIndex with time as key --- doc/source/whatsnew/v0.15.2.txt | 1 + pandas/index.pyx | 10 ++++++++-- pandas/tests/test_index.py | 21 +++++++++++++++++++++ pandas/tseries/index.py | 14 +++++++------- 4 files changed, 37 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index 1e84762b60caa..b740ac948dc05 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -97,6 +97,7 @@ Bug Fixes - ``sql_schema`` now generates dialect appropriate ``CREATE TABLE`` statements (:issue:`8697`) - ``slice`` string method now takes step into account (:issue:`8754`) - Bug in ``BlockManager`` where setting values with different type would break block integrity (:issue:`8850`) +- Bug in ``DatetimeIndex`` when using ``time`` object as key (:issue:`8667`) - Fix negative step support for label-based slices (:issue:`8753`) Old behavior: diff --git a/pandas/index.pyx b/pandas/index.pyx index 73d886f10b241..9be7e7404f3fe 100644 --- a/pandas/index.pyx +++ b/pandas/index.pyx @@ -545,8 +545,14 @@ cdef class DatetimeEngine(Int64Engine): val = _to_i8(val) return self._get_loc_duplicates(val) values = self._get_index_values() - conv = _to_i8(val) - loc = values.searchsorted(conv, side='left') + + try: + conv = _to_i8(val) + loc = values.searchsorted(conv, side='left') + except TypeError: + self._date_check_type(val) + raise KeyError(val) + if loc == len(values) or util.get_value_at(values, loc) != conv: raise KeyError(val) return loc diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index b7a18da3924c8..5265318d2c831 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1886,6 +1886,27 @@ def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): self.assertEqual(str(index.reindex([])[0].tz), 'US/Eastern') self.assertEqual(str(index.reindex(np.array([]))[0].tz), 'US/Eastern') + def test_time_loc(self): # GH8667 + from datetime import time + from pandas.index import _SIZE_CUTOFF + + ns = _SIZE_CUTOFF + np.array([-100, 100]) + key = time(15, 11, 30) + start = key.hour * 3600 + key.minute * 60 + key.second + step = 24 * 3600 + + for n in ns: + idx = pd.date_range('2014-11-26', periods=n, freq='S') + ts = pd.Series(np.random.randn(n), index=idx) + i = np.arange(start, n, step) + + tm.assert_array_equal(ts.index.get_loc(key), i) + tm.assert_series_equal(ts[key], ts.iloc[i]) + + left, right = ts.copy(), ts.copy() + left[key] *= -10 + right.iloc[i] *= -10 + tm.assert_series_equal(left, right) class TestPeriodIndex(Base, tm.TestCase): _holder = PeriodIndex diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 202e30cc2eb5e..e7c001ac57c0a 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1210,6 +1210,10 @@ def get_value(self, series, key): return self.get_value_maybe_box(series, key) + if isinstance(key, time): + locs = self.indexer_at_time(key) + return series.take(locs) + try: return _maybe_box(self, Index.get_value(self, series, key), series, key) except KeyError: @@ -1219,10 +1223,6 @@ def get_value(self, series, key): except (TypeError, ValueError, KeyError): pass - if isinstance(key, time): - locs = self.indexer_at_time(key) - return series.take(locs) - try: return self.get_value_maybe_box(series, key) except (TypeError, ValueError, KeyError): @@ -1250,6 +1250,9 @@ def get_loc(self, key): stamp = Timestamp(key, tz=self.tz) return self._engine.get_loc(stamp) + if isinstance(key, time): + return self.indexer_at_time(key) + try: return Index.get_loc(self, key) except (KeyError, ValueError): @@ -1258,9 +1261,6 @@ def get_loc(self, key): except (TypeError, KeyError, ValueError): pass - if isinstance(key, time): - return self.indexer_at_time(key) - try: stamp = Timestamp(key, tz=self.tz) return self._engine.get_loc(stamp) From 1e5d25a262e563828fa13bd7f682479cacb26c65 Mon Sep 17 00:00:00 2001 From: broessli Date: Sun, 16 Nov 2014 15:26:47 +0100 Subject: [PATCH 09/23] Fix unrecognized 'Z' UTC designator --- doc/source/whatsnew/v0.15.2.txt | 2 +- pandas/src/datetime/np_datetime_strings.c | 11 ++++++++--- pandas/tseries/tests/test_tslib.py | 5 ++++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index 1e84762b60caa..3aa50ad609064 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -151,9 +151,9 @@ Bug Fixes - Bug in `pd.infer_freq`/`DataFrame.inferred_freq` that prevented proper sub-daily frequency inference when the index contained DST days (:issue:`8772`). - Bug where index name was still used when plotting a series with ``use_index=False`` (:issue:`8558`). - - Bugs when trying to stack multiple columns, when some (or all) of the level names are numbers (:issue:`8584`). - Bug in ``MultiIndex`` where ``__contains__`` returns wrong result if index is not lexically sorted or unique (:issue:`7724`) - BUG CSV: fix problem with trailing whitespace in skipped rows, (:issue:`8679`), (:issue:`8661`) +- Regression in ``Timestamp`` does not parse 'Z' zone designator for UTC (:issue:`8771`) diff --git a/pandas/src/datetime/np_datetime_strings.c b/pandas/src/datetime/np_datetime_strings.c index 3f09de851e231..44363fd930510 100644 --- a/pandas/src/datetime/np_datetime_strings.c +++ b/pandas/src/datetime/np_datetime_strings.c @@ -363,7 +363,8 @@ convert_datetimestruct_local_to_utc(pandas_datetimestruct *out_dts_utc, * to be cast to the 'unit' parameter. * * 'out' gets filled with the parsed date-time. - * 'out_local' gets whether returned value contains timezone. 0 for UTC, 1 for local time. + * 'out_local' gets set to 1 if the parsed time contains timezone, + * to 0 otherwise. * 'out_tzoffset' gets set to timezone offset by minutes * if the parsed time was in local time, * to 0 otherwise. The values 'now' and 'today' don't get counted @@ -785,11 +786,15 @@ parse_iso_8601_datetime(char *str, int len, /* UTC specifier */ if (*substr == 'Z') { - /* "Z" means not local */ + /* "Z" should be equivalent to tz offset "+00:00" */ if (out_local != NULL) { - *out_local = 0; + *out_local = 1; } + if (out_tzoffset != NULL) { + *out_tzoffset = 0; + } + if (sublen == 1) { goto finish; } diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index 9adcbb4ea4a41..6c358bd99e620 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -6,7 +6,7 @@ import datetime from pandas.core.api import Timestamp, Series -from pandas.tslib import period_asfreq, period_ordinal +from pandas.tslib import period_asfreq, period_ordinal, get_timezone from pandas.tseries.index import date_range from pandas.tseries.frequencies import get_freq import pandas.tseries.offsets as offsets @@ -298,6 +298,9 @@ def test_barely_oob_dts(self): # One us more than the maximum is an error self.assertRaises(ValueError, Timestamp, max_ts_us + one_us) + def test_utc_z_designator(self): + self.assertEqual(get_timezone(Timestamp('2014-11-02 01:00Z').tzinfo), 'UTC') + class TestDatetimeParsingWrappers(tm.TestCase): def test_does_not_convert_mixed_integer(self): From c8e36d421917110e246c9f52187e1f3138a16669 Mon Sep 17 00:00:00 2001 From: Victor Chaves Date: Thu, 27 Nov 2014 18:05:28 -0200 Subject: [PATCH 10/23] Doc change for Issue #8805 --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 237012a71aeb4..a464b687209cb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3279,7 +3279,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None, Parameters ---------- other : DataFrame, or object coercible into a DataFrame - join : {'left', 'right', 'outer', 'inner'}, default 'left' + join : {'left'}, default 'left' overwrite : boolean, default True If True then overwrite values for common keys in the calling frame filter_func : callable(1d-array) -> 1d-array, default None From 3b2089bb4a6c59888bfdea4f4de76b07954d1fee Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 29 Nov 2014 00:35:31 +0000 Subject: [PATCH 11/23] DOC: specify return type in to_datetime --- pandas/tseries/tools.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index 45bea00ac104f..f29ab14ed8745 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -177,7 +177,7 @@ def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True, format=None, coerce=False, unit='ns', infer_datetime_format=False): """ - Convert argument to datetime + Convert argument to datetime. Parameters ---------- @@ -198,13 +198,16 @@ def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True, coerce : force errors to NaT (False by default) unit : unit of the arg (D,s,ms,us,ns) denote the unit in epoch (e.g. a unix timestamp), which is an integer/float number - infer_datetime_format: boolean, default False + infer_datetime_format : boolean, default False If no `format` is given, try to infer the format based on the first datetime string. Provides a large speed-up in many cases. Returns ------- - ret : datetime if parsing succeeded + ret : datetime if parsing succeeded. Return type depends on input: + - list-like: DatetimeIndex + - Series: Series of datetime64 dtype + - scalar: Timestamp Examples -------- From cfcda5f8a2beb58ab87b7a29e8a97aafdb6caeb7 Mon Sep 17 00:00:00 2001 From: Angelos Evripiotis Date: Sat, 29 Nov 2014 11:39:09 +0000 Subject: [PATCH 12/23] CLN: move import to top of file For consistency with [PEP8][1]: Imports are always put at the top of the file, just after any module comments and docstrings, and before module globals and constants. [1]: https://www.python.org/dev/peps/pep-0008#id17 --- pandas/core/config.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/config.py b/pandas/core/config.py index 60dc1d7d0341e..6768e0af0dfb6 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -51,6 +51,7 @@ import re from collections import namedtuple +from contextlib import contextmanager import warnings from pandas.compat import map, lmap, u import pandas.compat as compat @@ -681,8 +682,6 @@ def pp(name, ks): # # helpers -from contextlib import contextmanager - @contextmanager def config_prefix(prefix): From f21539bdd565fe03e5a92af945f78c45dca1676b Mon Sep 17 00:00:00 2001 From: Henry Kleynhans Date: Sat, 29 Nov 2014 13:09:49 +0000 Subject: [PATCH 13/23] BUG: Option context applies on __enter__ Option context no longer overrides options when used outside a `with` statement. Added test TestConfig.test_option_config_scope Closes #8514 --- doc/source/whatsnew/v0.15.2.txt | 2 +- pandas/core/config.py | 11 +++++------ pandas/tests/test_config.py | 21 +++++++++++++++++++++ 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index 3aa50ad609064..d299121092987 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -142,7 +142,7 @@ Bug Fixes - +- BUG: Option context applies on __enter__ (:issue:`8514`) diff --git a/pandas/core/config.py b/pandas/core/config.py index 60dc1d7d0341e..2c1865730874d 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -384,19 +384,18 @@ def __init__(self, *args): 'option_context(pat, val, [(pat, val), ...)).' ) - ops = list(zip(args[::2], args[1::2])) + self.ops = list(zip(args[::2], args[1::2])) + + def __enter__(self): undo = [] - for pat, val in ops: + for pat, val in self.ops: undo.append((pat, _get_option(pat, silent=True))) self.undo = undo - for pat, val in ops: + for pat, val in self.ops: _set_option(pat, val, silent=True) - def __enter__(self): - pass - def __exit__(self, *args): if self.undo: for pat, val in self.undo: diff --git a/pandas/tests/test_config.py b/pandas/tests/test_config.py index dc5e9a67bdb65..3a8fdd877f5a0 100644 --- a/pandas/tests/test_config.py +++ b/pandas/tests/test_config.py @@ -425,3 +425,24 @@ def f3(key): options.c = 1 self.assertEqual(len(holder), 1) + def test_option_context_scope(self): + # Ensure that creating a context does not affect the existing + # environment as it is supposed to be used with the `with` statement. + # See https://github.com/pydata/pandas/issues/8514 + + original_value = 60 + context_value = 10 + option_name = 'a' + + self.cf.register_option(option_name, original_value) + + # Ensure creating contexts didn't affect the current context. + ctx = self.cf.option_context(option_name, context_value) + self.assertEqual(self.cf.get_option(option_name), original_value) + + # Ensure the correct value is available inside the context. + with ctx: + self.assertEqual(self.cf.get_option(option_name), context_value) + + # Ensure the current context is reset + self.assertEqual(self.cf.get_option(option_name), original_value) From fef4b0970bc353c4eaa37056d72dba039e334ea3 Mon Sep 17 00:00:00 2001 From: Rupert Thompson Date: Sat, 29 Nov 2014 16:55:33 +0000 Subject: [PATCH 14/23] BUG: fix doctests in pandas.core.common $ nosetests pandas/core/common.py --with-doc -v --- pandas/core/common.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 759f5f1dfaf7a..6aff67412d677 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -404,9 +404,13 @@ def array_equivalent(left, right, strict_nan=False): Examples -------- - >>> array_equivalent(np.array([1, 2, nan]), np.array([1, 2, nan])) + >>> array_equivalent( + ... np.array([1, 2, np.nan]), + ... np.array([1, 2, np.nan])) True - >>> array_equivalent(np.array([1, nan, 2]), np.array([1, 2, nan])) + >>> array_equivalent( + ... np.array([1, np.nan, 2]), + ... np.array([1, 2, np.nan])) False """ @@ -2171,8 +2175,8 @@ def iterpairs(seq): Examples -------- - >>> iterpairs([1, 2, 3, 4]) - [(1, 2), (2, 3), (3, 4) + >>> list(iterpairs([1, 2, 3, 4])) + [(1, 2), (2, 3), (3, 4)] """ # input may not be sliceable seq_it = iter(seq) From e759d99f520e1217f7ce6ca8bfc33e4c10c8d29f Mon Sep 17 00:00:00 2001 From: jreback Date: Sun, 30 Nov 2014 09:13:30 -0500 Subject: [PATCH 15/23] TST: 32-bit construction fix re GH8907 --- pandas/tests/test_index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 5265318d2c831..3c57dd764e3aa 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1890,7 +1890,7 @@ def test_time_loc(self): # GH8667 from datetime import time from pandas.index import _SIZE_CUTOFF - ns = _SIZE_CUTOFF + np.array([-100, 100]) + ns = _SIZE_CUTOFF + np.array([-100, 100],dtype=np.int64) key = time(15, 11, 30) start = key.hour * 3600 + key.minute * 60 + key.second step = 24 * 3600 From bcaf7fd34437a7902258d6cbb15ca6c39c658378 Mon Sep 17 00:00:00 2001 From: peadarcoyle Date: Mon, 27 Oct 2014 23:33:26 +0100 Subject: [PATCH 16/23] Updating tests file Cleaning up --- pandas/tests/test_groupby.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index ef3fc03fc8d22..436edafd62a27 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1961,6 +1961,9 @@ def test_groupby_level(self): # raise exception for non-MultiIndex self.assertRaises(ValueError, self.df.groupby, level=1) + + + def test_groupby_level_index_names(self): ## GH4014 this used to raise ValueError since 'exp'>1 (in py2) df = DataFrame({'exp' : ['A']*3 + ['B']*3, 'var1' : lrange(6),}).set_index('exp') @@ -1999,6 +2002,27 @@ def test_groupby_level_apply(self): result = frame['A'].groupby(level=0).count() self.assertEqual(result.index.name, 'first') + +#PR8618 and issue 8015 + def test_groupby_args(self): + frame = self.mframe + def g(): + frame.groupby(level=None).count() + self.assertRaisesRegexp(TypeError, g, "You have to supply one of 'by' or 'level'") + + def k(): + frame.groupby(by=None).count() + self.assertRaisesRegexp(TypeError, k, "You have to supply one of 'by' or 'level'") + + def j(): + frame.groupby() + self.assertRaisesRegexp(TypeError, j, "You have to supply one of 'by' or 'level'") + + def i(): + frame.groupby(axes=None) + self.assertRaisesRegexp(TypeError, i, "You have to supply one of 'by' or 'level'") + + def test_groupby_level_mapper(self): frame = self.mframe deleveled = frame.reset_index() From d55f58221c074d3362b4b4a5b8a2096e50461ec9 Mon Sep 17 00:00:00 2001 From: peadarcoyle Date: Thu, 30 Oct 2014 09:34:13 +0100 Subject: [PATCH 17/23] Updating generic file to include change --- pandas/core/generic.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7201428e6b935..cf9fef16767a1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2874,9 +2874,17 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, """ from pandas.core.groupby import groupby - axis = self._get_axis_number(axis) - return groupby(self, by, axis=axis, level=level, as_index=as_index, - sort=sort, group_keys=group_keys, squeeze=squeeze) + if axis is not None: + axis = self._get_axis_number(axis) + return groupby(self, by, axis=axis, level=level, as_index=as_index, + sort=sort, group_keys=group_keys, squeeze=squeeze) + elif level is not None: + raise TypeError('You have to specify one of "by" or "level"') + elif by is not None: + raise TypeError('You have to specify one of "by" or "level"') + else: + raise TypeError('You have to specify one of "by" or "level"') + def asfreq(self, freq, method=None, how=None, normalize=False): """ From b1168b585ed3ccadbb31588c47d8d82b5d3b339a Mon Sep 17 00:00:00 2001 From: peadarcoyle Date: Thu, 30 Oct 2014 13:22:39 +0100 Subject: [PATCH 18/23] Pushing code for review --- pandas/core/generic.py | 11 +++++------ pandas/tests/test_groupby.py | 20 +++++--------------- 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cf9fef16767a1..44af8113e744c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2874,17 +2874,16 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, """ from pandas.core.groupby import groupby - if axis is not None: - axis = self._get_axis_number(axis) - return groupby(self, by, axis=axis, level=level, as_index=as_index, - sort=sort, group_keys=group_keys, squeeze=squeeze) + if level is None and by is None: + raise TypeError('You have to specify one of "by" or "level"') elif level is not None: raise TypeError('You have to specify one of "by" or "level"') elif by is not None: raise TypeError('You have to specify one of "by" or "level"') else: - raise TypeError('You have to specify one of "by" or "level"') - + axis = self._get_axis_number(axis) + return groupby(self, by=by, axis=axis, level=level, as_index=as_index, + sort=sort, group_keys=group_keys, squeeze=squeeze) def asfreq(self, freq, method=None, how=None, normalize=False): """ diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 436edafd62a27..98bf99b8d9ffa 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2003,24 +2003,14 @@ def test_groupby_level_apply(self): self.assertEqual(result.index.name, 'first') -#PR8618 and issue 8015 + #PR8618 and issue 8015 def test_groupby_args(self): frame = self.mframe - def g(): - frame.groupby(level=None).count() - self.assertRaisesRegexp(TypeError, g, "You have to supply one of 'by' or 'level'") - def k(): - frame.groupby(by=None).count() - self.assertRaisesRegexp(TypeError, k, "You have to supply one of 'by' or 'level'") - - def j(): - frame.groupby() - self.assertRaisesRegexp(TypeError, j, "You have to supply one of 'by' or 'level'") - - def i(): - frame.groupby(axes=None) - self.assertRaisesRegexp(TypeError, i, "You have to supply one of 'by' or 'level'") + + + result = frame.groupby() + self.assertRaisesRegexp(TypeError, result, "You have to supply one of 'by' or 'level'") def test_groupby_level_mapper(self): From ca70dbeee72d702492242beb3e90b690b465a0a3 Mon Sep 17 00:00:00 2001 From: peadarcoyle Date: Thu, 30 Oct 2014 21:38:57 +0100 Subject: [PATCH 19/23] updating after tests fail --- pandas/core/generic.py | 11 ++++------- pandas/tests/test_groupby.py | 8 +++----- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 44af8113e744c..9286ec742c4c2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2874,16 +2874,13 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, """ from pandas.core.groupby import groupby - if level is None and by is None: - raise TypeError('You have to specify one of "by" or "level"') - elif level is not None: - raise TypeError('You have to specify one of "by" or "level"') - elif by is not None: - raise TypeError('You have to specify one of "by" or "level"') - else: + if level is not None and by is not None: axis = self._get_axis_number(axis) return groupby(self, by=by, axis=axis, level=level, as_index=as_index, sort=sort, group_keys=group_keys, squeeze=squeeze) + elif by is None: + raise ValueError('You have to specify one of "by" or "level"') + def asfreq(self, freq, method=None, how=None, normalize=False): """ diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 98bf99b8d9ffa..48a07b5f1fb96 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2002,16 +2002,14 @@ def test_groupby_level_apply(self): result = frame['A'].groupby(level=0).count() self.assertEqual(result.index.name, 'first') - +""" #PR8618 and issue 8015 def test_groupby_args(self): frame = self.mframe - - - result = frame.groupby() + result = frame.groupby(by=None, level=None) self.assertRaisesRegexp(TypeError, result, "You have to supply one of 'by' or 'level'") - +""" def test_groupby_level_mapper(self): frame = self.mframe From 11c53a7ee90673bff39d9562756630334620584e Mon Sep 17 00:00:00 2001 From: peadarcoyle Date: Fri, 31 Oct 2014 18:19:19 +0100 Subject: [PATCH 20/23] Updating due to errors --- pandas/core/generic.py | 13 +++++++------ pandas/tests/test_groupby.py | 8 -------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9286ec742c4c2..de3b94346e3dd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2872,15 +2872,16 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, GroupBy object """ - from pandas.core.groupby import groupby - if level is not None and by is not None: + + if axis is not 0: axis = self._get_axis_number(axis) return groupby(self, by=by, axis=axis, level=level, as_index=as_index, - sort=sort, group_keys=group_keys, squeeze=squeeze) - elif by is None: - raise ValueError('You have to specify one of "by" or "level"') - + sort=sort, group_keys=group_keys, squeeze=squeeze) + elif level is None and by is None: + raise TypeError('You have to specify one of "by" or "level"') + elif level is None: + raise TypeError('You have to specify one of "by" or "level"') def asfreq(self, freq, method=None, how=None, normalize=False): """ diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 48a07b5f1fb96..562cfb9b38aa0 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2002,14 +2002,6 @@ def test_groupby_level_apply(self): result = frame['A'].groupby(level=0).count() self.assertEqual(result.index.name, 'first') -""" - #PR8618 and issue 8015 - def test_groupby_args(self): - frame = self.mframe - - result = frame.groupby(by=None, level=None) - self.assertRaisesRegexp(TypeError, result, "You have to supply one of 'by' or 'level'") -""" def test_groupby_level_mapper(self): frame = self.mframe From baa8cd6d51378daa588f237b63a9acabafc7ab4c Mon Sep 17 00:00:00 2001 From: peadarcoyle Date: Sat, 1 Nov 2014 14:59:29 +0100 Subject: [PATCH 21/23] Updating including test fails --- pandas/core/generic.py | 11 ++++------- pandas/tests/test_groupby.py | 7 +++++++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index de3b94346e3dd..7b9ce2b86f730 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2874,14 +2874,11 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, """ from pandas.core.groupby import groupby - if axis is not 0: - axis = self._get_axis_number(axis) - return groupby(self, by=by, axis=axis, level=level, as_index=as_index, + if level is None and by is None: + raise TypeError('You have to specify at least one of "by" and "level"') + axis = self._get_axis_number(axis) + return groupby(self, by=by, axis=axis, level=level, as_index=as_index, sort=sort, group_keys=group_keys, squeeze=squeeze) - elif level is None and by is None: - raise TypeError('You have to specify one of "by" or "level"') - elif level is None: - raise TypeError('You have to specify one of "by" or "level"') def asfreq(self, freq, method=None, how=None, normalize=False): """ diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 562cfb9b38aa0..75d0b02444b68 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2002,6 +2002,13 @@ def test_groupby_level_apply(self): result = frame['A'].groupby(level=0).count() self.assertEqual(result.index.name, 'first') + #PR8618 and issue 8015 + def test_groupby_args(self): + frame = self.mframe + + def k(): + frame.groupby(by=None, level=None) + self.assertRaisesRegexp(TypeError, k, "You have to supply one of 'by' and 'level'") def test_groupby_level_mapper(self): frame = self.mframe From ab1c90f3f75d94374a39f13862ee8d3f76c64a08 Mon Sep 17 00:00:00 2001 From: peadarcoyle Date: Sun, 30 Nov 2014 18:41:31 +0100 Subject: [PATCH 22/23] Updating test files --- pandas/core/generic.py | 2 +- pandas/tests/test_groupby.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7b9ce2b86f730..b7bf10f5d33a9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2875,7 +2875,7 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, from pandas.core.groupby import groupby if level is None and by is None: - raise TypeError('You have to specify at least one of "by" and "level"') + raise TypeError("You have to supply one of 'by' and 'level'") axis = self._get_axis_number(axis) return groupby(self, by=by, axis=axis, level=level, as_index=as_index, sort=sort, group_keys=group_keys, squeeze=squeeze) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 75d0b02444b68..70ddac82b25e0 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2005,10 +2005,9 @@ def test_groupby_level_apply(self): #PR8618 and issue 8015 def test_groupby_args(self): frame = self.mframe - - def k(): - frame.groupby(by=None, level=None) - self.assertRaisesRegexp(TypeError, k, "You have to supply one of 'by' and 'level'") + def j(): + frame.groupby() + self.assertRaisesRegexp(TypeError, "You have to supply one of 'by' and 'level'", j) def test_groupby_level_mapper(self): frame = self.mframe @@ -3700,8 +3699,9 @@ def test_cumcount(self): assert_series_equal(expected, sg.cumcount()) def test_cumcount_empty(self): - ge = DataFrame().groupby() - se = Series().groupby() + dfe = DataFrame() + ge = dfe.groupby(dfe.index) + se = Series().groupby(by=1, level=2) e = Series(dtype='int64') # edge case, as this is usually considered float From 49bd3732ff9bf864455ae2f19324f7e8eba6c349 Mon Sep 17 00:00:00 2001 From: peadarcoyle Date: Sun, 30 Nov 2014 19:38:12 +0100 Subject: [PATCH 23/23] Refactoring slightly --- pandas/core/generic.py | 4 ---- pandas/tests/test_groupby.py | 7 ++----- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c606abbf25d9a..7b9ce2b86f730 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2875,11 +2875,7 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, from pandas.core.groupby import groupby if level is None and by is None: -<<<<<<< HEAD - raise TypeError("You have to supply one of 'by' and 'level'") -======= raise TypeError('You have to specify at least one of "by" and "level"') ->>>>>>> 968d68f1b2f07a33217a2471f29a6c73afeec7b3 axis = self._get_axis_number(axis) return groupby(self, by=by, axis=axis, level=level, as_index=as_index, sort=sort, group_keys=group_keys, squeeze=squeeze) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index a1b8da171300f..cd768423e492a 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2002,19 +2002,16 @@ def test_groupby_level_apply(self): result = frame['A'].groupby(level=0).count() self.assertEqual(result.index.name, 'first') - #PR8618 and issue 8015 def test_groupby_args(self): + #PR8618 and issue 8015 frame = self.mframe -<<<<<<< HEAD def j(): frame.groupby() self.assertRaisesRegexp(TypeError, "You have to supply one of 'by' and 'level'", j) -======= def k(): frame.groupby(by=None, level=None) - self.assertRaisesRegexp(TypeError, k, "You have to supply one of 'by' and 'level'") ->>>>>>> 968d68f1b2f07a33217a2471f29a6c73afeec7b3 + self.assertRaisesRegexp(TypeError, "You have to supply one of 'by' and 'level'", k) def test_groupby_level_mapper(self): frame = self.mframe