@@ -1706,7 +1706,7 @@ def test_utf16_example(self):
1706
1706
self .assertEquals (len (result ), 50 )
1707
1707
1708
1708
def test_converters_corner_with_nas (self ):
1709
- # skip aberration observed on Win64 Python 3.2.2
1709
+ # skip aberration observed on Win64 Python 3.2.2
1710
1710
if hash (np .int64 (- 1 )) != - 2 :
1711
1711
raise nose .SkipTest ("skipping because of windows hash on Python"
1712
1712
" 3.2.2" )
@@ -2078,19 +2078,19 @@ def test_fwf(self):
2078
2078
read_fwf (StringIO (data3 ), colspecs = colspecs , widths = [6 , 10 , 10 , 7 ])
2079
2079
2080
2080
with tm .assertRaisesRegexp (ValueError , "Must specify either" ):
2081
- read_fwf (StringIO (data3 ))
2081
+ read_fwf (StringIO (data3 ), colspecs = None , widths = None )
2082
2082
2083
2083
def test_fwf_colspecs_is_list_or_tuple (self ):
2084
2084
with tm .assertRaisesRegexp (TypeError ,
2085
2085
'column specifications must be a list or '
2086
2086
'tuple.+' ):
2087
- fwr = pd .io .parsers .FixedWidthReader (StringIO (self .data1 ),
2088
- {'a' : 1 }, ',' )
2087
+ pd .io .parsers .FixedWidthReader (StringIO (self .data1 ),
2088
+ {'a' : 1 }, ',' , '# ' )
2089
2089
2090
2090
def test_fwf_colspecs_is_list_or_tuple_of_two_element_tuples (self ):
2091
2091
with tm .assertRaisesRegexp (TypeError ,
2092
2092
'Each column specification must be.+' ):
2093
- read_fwf (StringIO (self .data1 ), { 'a' : 1 } )
2093
+ read_fwf (StringIO (self .data1 ), [( 'a' , 1 )] )
2094
2094
2095
2095
def test_fwf_regression (self ):
2096
2096
# GH 3594
@@ -2223,6 +2223,107 @@ def test_iteration_open_handle(self):
2223
2223
expected = Series (['DDD' , 'EEE' , 'FFF' , 'GGG' ])
2224
2224
tm .assert_series_equal (result , expected )
2225
2225
2226
+
2227
+ class TestFwfColspaceSniffing (unittest .TestCase ):
2228
+ def test_full_file (self ):
2229
+ # File with all values
2230
+ test = '''index A B C
2231
+ 2000-01-03T00:00:00 0.980268513777 3 foo
2232
+ 2000-01-04T00:00:00 1.04791624281 -4 bar
2233
+ 2000-01-05T00:00:00 0.498580885705 73 baz
2234
+ 2000-01-06T00:00:00 1.12020151869 1 foo
2235
+ 2000-01-07T00:00:00 0.487094399463 0 bar
2236
+ 2000-01-10T00:00:00 0.836648671666 2 baz
2237
+ 2000-01-11T00:00:00 0.157160753327 34 foo'''
2238
+ colspecs = ((0 , 19 ), (21 , 35 ), (38 , 40 ), (42 , 45 ))
2239
+ expected = read_fwf (StringIO (test ), colspecs = colspecs )
2240
+ tm .assert_frame_equal (expected , read_fwf (StringIO (test )))
2241
+
2242
+ def test_full_file_with_missing (self ):
2243
+ # File with missing values
2244
+ test = '''index A B C
2245
+ 2000-01-03T00:00:00 0.980268513777 3 foo
2246
+ 2000-01-04T00:00:00 1.04791624281 -4 bar
2247
+ 0.498580885705 73 baz
2248
+ 2000-01-06T00:00:00 1.12020151869 1 foo
2249
+ 2000-01-07T00:00:00 0 bar
2250
+ 2000-01-10T00:00:00 0.836648671666 2 baz
2251
+ 34'''
2252
+ colspecs = ((0 , 19 ), (21 , 35 ), (38 , 40 ), (42 , 45 ))
2253
+ expected = read_fwf (StringIO (test ), colspecs = colspecs )
2254
+ tm .assert_frame_equal (expected , read_fwf (StringIO (test )))
2255
+
2256
+ def test_full_file_with_spaces (self ):
2257
+ # File with spaces in columns
2258
+ test = '''
2259
+ Account Name Balance CreditLimit AccountCreated
2260
+ 101 Keanu Reeves 9315.45 10000.00 1/17/1998
2261
+ 312 Gerard Butler 90.00 1000.00 8/6/2003
2262
+ 868 Jennifer Love Hewitt 0 17000.00 5/25/1985
2263
+ 761 Jada Pinkett-Smith 49654.87 100000.00 12/5/2006
2264
+ 317 Bill Murray 789.65 5000.00 2/5/2007
2265
+ ''' .strip ('\r \n ' )
2266
+ colspecs = ((0 , 7 ), (8 , 28 ), (30 , 38 ), (42 , 53 ), (56 , 70 ))
2267
+ expected = read_fwf (StringIO (test ), colspecs = colspecs )
2268
+ tm .assert_frame_equal (expected , read_fwf (StringIO (test )))
2269
+
2270
+ def test_full_file_with_spaces_and_missing (self ):
2271
+ # File with spaces and missing values in columsn
2272
+ test = '''
2273
+ Account Name Balance CreditLimit AccountCreated
2274
+ 101 10000.00 1/17/1998
2275
+ 312 Gerard Butler 90.00 1000.00 8/6/2003
2276
+ 868 5/25/1985
2277
+ 761 Jada Pinkett-Smith 49654.87 100000.00 12/5/2006
2278
+ 317 Bill Murray 789.65
2279
+ ''' .strip ('\r \n ' )
2280
+ colspecs = ((0 , 7 ), (8 , 28 ), (30 , 38 ), (42 , 53 ), (56 , 70 ))
2281
+ expected = read_fwf (StringIO (test ), colspecs = colspecs )
2282
+ tm .assert_frame_equal (expected , read_fwf (StringIO (test )))
2283
+
2284
+ def test_messed_up_data (self ):
2285
+ # Completely messed up file
2286
+ test = '''
2287
+ Account Name Balance Credit Limit Account Created
2288
+ 101 10000.00 1/17/1998
2289
+ 312 Gerard Butler 90.00 1000.00
2290
+
2291
+ 761 Jada Pinkett-Smith 49654.87 100000.00 12/5/2006
2292
+ 317 Bill Murray 789.65
2293
+ ''' .strip ('\r \n ' )
2294
+ colspecs = ((2 , 10 ), (15 , 33 ), (37 , 45 ), (49 , 61 ), (64 , 79 ))
2295
+ expected = read_fwf (StringIO (test ), colspecs = colspecs )
2296
+ tm .assert_frame_equal (expected , read_fwf (StringIO (test )))
2297
+
2298
+ def test_multiple_delimiters (self ):
2299
+ test = r'''
2300
+ col1~~~~~col2 col3++++++++++++++++++col4
2301
+ ~~22.....11.0+++foo~~~~~~~~~~Keanu Reeves
2302
+ 33+++122.33\\\bar.........Gerard Butler
2303
+ ++44~~~~12.01 baz~~Jennifer Love Hewitt
2304
+ ~~55 11+++foo++++Jada Pinkett-Smith
2305
+ ..66++++++.03~~~bar Bill Murray
2306
+ ''' .strip ('\r \n ' )
2307
+ colspecs = ((0 , 4 ), (7 , 13 ), (15 , 19 ), (21 , 41 ))
2308
+ expected = read_fwf (StringIO (test ), colspecs = colspecs ,
2309
+ delimiter = ' +~.\\ ' )
2310
+ tm .assert_frame_equal (expected , read_fwf (StringIO (test ),
2311
+ delimiter = ' +~.\\ ' ))
2312
+
2313
+ def test_variable_width_unicode (self ):
2314
+ if not compat .PY3 :
2315
+ raise nose .SkipTest ('Bytes-related test - only needs to work on Python 3' )
2316
+ test = '''
2317
+ שלום שלום
2318
+ ום שלל
2319
+ של ום
2320
+ ''' .strip ('\r \n ' )
2321
+ expected = pd .read_fwf (BytesIO (test .encode ('utf8' )),
2322
+ colspecs = [(0 , 4 ), (5 , 9 )], header = None )
2323
+ tm .assert_frame_equal (expected , read_fwf (BytesIO (test .encode ('utf8' )),
2324
+ header = None ))
2325
+
2326
+
2226
2327
class TestCParserHighMemory (ParserTests , unittest .TestCase ):
2227
2328
2228
2329
def read_csv (self , * args , ** kwds ):
0 commit comments