@@ -1747,7 +1747,7 @@ def check_col_names_unique(cols_x, col_y=None):
17471747 return None
17481748
17491749
1750- def create_instances_from_lists (x , y = None , name = "data" , cols_x = None , col_y = None ):
1750+ def create_instances_from_lists (x , y = None , name = "data" , cols_x = None , col_y = None , nominal_x = None , nominal_y = False ):
17511751 """
17521752 Allows the generation of an Instances object from a list of lists for X and a list for Y (optional).
17531753 Data can be numeric, string or bytes. Attributes can be converted to nominal with the
@@ -1764,6 +1764,10 @@ def create_instances_from_lists(x, y=None, name="data", cols_x=None, col_y=None)
17641764 :type cols_x: list
17651765 :param col_y: the column name to use for the output variable (y)
17661766 :type col_y: str
1767+ :param nominal_x: the list of 0-based column indices to treat as nominal ones, ignored if None
1768+ :type nominal_x: list
1769+ :param nominal_y: whether the y column is to be treated as nominal
1770+ :type nominal_y: bool
17671771 :return: the generated dataset
17681772 :rtype: Instances
17691773 """
@@ -1783,6 +1787,16 @@ def create_instances_from_lists(x, y=None, name="data", cols_x=None, col_y=None)
17831787 if msg is not None :
17841788 raise Exception (msg )
17851789
1790+ # nominal x columns?
1791+ nominal_x_values = None
1792+ if nominal_x is not None :
1793+ nominal_x_values = dict ()
1794+ for nominal_col in nominal_x :
1795+ labels = set ()
1796+ for n in range (len (x )):
1797+ labels .add (typeconv .to_string (x [n ][nominal_col ]))
1798+ nominal_x_values [nominal_col ] = sorted (list (labels ))
1799+
17861800 # create header
17871801 atts = []
17881802 type_x = []
@@ -1792,6 +1806,9 @@ def create_instances_from_lists(x, y=None, name="data", cols_x=None, col_y=None)
17921806 for n in range (len (x )):
17931807 if x [n ][i ] is None :
17941808 continue
1809+ if (nominal_x_values is not None ) and (i in nominal_x_values ):
1810+ type_x [i ] = "C"
1811+ break
17951812 if isinstance (x [n ][i ], float ) or isinstance (x [n ][i ], int ):
17961813 type_x [i ] = "N"
17971814 break
@@ -1811,15 +1828,29 @@ def create_instances_from_lists(x, y=None, name="data", cols_x=None, col_y=None)
18111828 atts .append (Attribute .create_string (cols_x [i ]))
18121829 elif type_x [i ] == "S" :
18131830 atts .append (Attribute .create_string (cols_x [i ]))
1831+ elif type_x [i ] == "C" :
1832+ atts .append (Attribute .create_nominal (cols_x [i ], nominal_x_values [i ]))
18141833 else :
18151834 print ("WARNING: Failed to determine data type for column #%d" % i )
18161835 atts .append (Attribute .create_numeric (cols_x [i ]))
18171836
18181837 type_y = ""
18191838 if y is not None :
1839+ # nominal y column?
1840+ nominal_y_values = None
1841+ if nominal_y :
1842+ labels = set ()
1843+ for n in range (len (y )):
1844+ labels .add (typeconv .to_string (y [n ]))
1845+ nominal_y_values = sorted (list (labels ))
1846+
18201847 for n in range (len (y )):
18211848 if y [n ] is None :
18221849 continue
1850+ if nominal_y :
1851+ type_y = "C"
1852+ atts .append (Attribute .create_nominal (col_y , nominal_y_values ))
1853+ break
18231854 if isinstance (y [n ], float ) or isinstance (y [n ], int ):
18241855 type_y = "N"
18251856 atts .append (Attribute .create_numeric (col_y ))
@@ -1849,7 +1880,9 @@ def create_instances_from_lists(x, y=None, name="data", cols_x=None, col_y=None)
18491880 if type_x [n ] == "N" :
18501881 values .append (x [i ][n ])
18511882 elif type_x [n ] == "B" :
1852- values .append (result .attribute (n ).add_string_value (x [i ][n ].decode ("utf-8" )))
1883+ values .append (result .attribute (n ).add_string_value (typeconv .to_string (x [i ][n ])))
1884+ elif type_x [n ] == "C" :
1885+ values .append (result .attribute (n ).index_of (typeconv .to_string (x [i ][n ])))
18531886 else :
18541887 values .append (result .attribute (n ).add_string_value (x [i ][n ]))
18551888
@@ -1860,7 +1893,9 @@ def create_instances_from_lists(x, y=None, name="data", cols_x=None, col_y=None)
18601893 if type_y == "N" :
18611894 values .append (y [i ])
18621895 elif type_y == "B" :
1863- values .append (result .attribute (result .num_attributes - 1 ).add_string_value (y [i ].decode ("utf-8" )))
1896+ values .append (result .attribute (result .num_attributes - 1 ).add_string_value (typeconv .to_string (y [i ])))
1897+ elif type_y == "C" :
1898+ values .append (result .attribute (result .num_attributes - 1 ).index_of (typeconv .to_string (y [i ])))
18641899 else :
18651900 values .append (result .attribute (result .num_attributes - 1 ).add_string_value (y [i ]))
18661901
@@ -1869,7 +1904,7 @@ def create_instances_from_lists(x, y=None, name="data", cols_x=None, col_y=None)
18691904 return result
18701905
18711906
1872- def create_instances_from_matrices (x , y = None , name = "data" , cols_x = None , col_y = None ):
1907+ def create_instances_from_matrices (x , y = None , name = "data" , cols_x = None , col_y = None , nominal_x = None , nominal_y = False ):
18731908 """
18741909 Allows the generation of an Instances object from a 2-dimensional matrix for X and a
18751910 1-dimensional matrix for Y (optional).
@@ -1887,6 +1922,10 @@ def create_instances_from_matrices(x, y=None, name="data", cols_x=None, col_y=No
18871922 :type cols_x: list
18881923 :param col_y: the column name to use for the output variable (y)
18891924 :type col_y: str
1925+ :param nominal_x: the list of 0-based column indices to treat as nominal ones, ignored if None
1926+ :type nominal_x: list
1927+ :param nominal_y: whether the y column is to be treated as nominal
1928+ :type nominal_y: bool
18901929 :return: the generated dataset
18911930 :rtype: Instances
18921931 """
@@ -1906,11 +1945,25 @@ def create_instances_from_matrices(x, y=None, name="data", cols_x=None, col_y=No
19061945 if msg is not None :
19071946 raise Exception (msg )
19081947
1948+ # nominal x columns?
1949+ nominal_x_values = None
1950+ if nominal_x is not None :
1951+ nominal_x_values = dict ()
1952+ for nominal_col in nominal_x :
1953+ labels = set ()
1954+ for n in range (len (x )):
1955+ labels .add (typeconv .to_string (x [n ][nominal_col ]))
1956+ nominal_x_values [nominal_col ] = sorted (list (labels ))
1957+
19091958 # create header
19101959 atts = []
19111960 type_x = []
19121961 for i in range (len (x [0 ])):
19131962 try :
1963+ if (nominal_x_values is not None ) and (i in nominal_x_values ):
1964+ type_x .append ("C" ) # nominal
1965+ atts .append (Attribute .create_nominal (cols_x [i ], nominal_x_values [i ]))
1966+ continue
19141967 len (x .dtype )
19151968 if np .issubdtype (x .dtype [i ], np .number ):
19161969 type_x .append ("N" ) # number
@@ -1926,7 +1979,18 @@ def create_instances_from_matrices(x, y=None, name="data", cols_x=None, col_y=No
19261979 atts .append (Attribute .create_numeric (cols_x [i ]))
19271980 type_y = ""
19281981 if y is not None :
1929- if np .issubdtype (y .dtype , np .number ):
1982+ # nominal y column?
1983+ nominal_y_values = None
1984+ if nominal_y :
1985+ labels = set ()
1986+ for n in range (len (y )):
1987+ labels .add (typeconv .to_string (y [n ]))
1988+ nominal_y_values = sorted (list (labels ))
1989+
1990+ if nominal_y :
1991+ type_y = "C"
1992+ atts .append (Attribute .create_nominal (col_y , nominal_y_values ))
1993+ elif np .issubdtype (y .dtype , np .number ):
19301994 type_y = "N" # number
19311995 atts .append (Attribute .create_numeric (col_y ))
19321996 elif np .issubdtype (y .dtype , np .str_ ):
@@ -1949,8 +2013,10 @@ def create_instances_from_matrices(x, y=None, name="data", cols_x=None, col_y=No
19492013 values .append (x [i ][n ])
19502014 elif type_x [n ] == "S" :
19512015 values .append (result .attribute (n ).add_string_value (x [i ][n ]))
2016+ elif type_x [n ] == "C" :
2017+ values .append (result .attribute (n ).index_of (typeconv .to_string (x [i ][n ])))
19522018 else :
1953- values .append (result .attribute (n ).add_string_value (x [i ][n ]. decode ( "utf-8" )))
2019+ values .append (result .attribute (n ).add_string_value (typeconv . to_string ( x [i ][n ])))
19542020
19552021 if y is not None :
19562022 if isinstance (y [i ], float ) and np .isnan (y [i ]):
@@ -1959,8 +2025,10 @@ def create_instances_from_matrices(x, y=None, name="data", cols_x=None, col_y=No
19592025 values .append (y [i ])
19602026 elif type_y == "S" :
19612027 values .append (result .attribute (result .num_attributes - 1 ).add_string_value (y [i ]))
2028+ elif type_y == "C" :
2029+ values .append (result .attribute (result .num_attributes - 1 ).index_of (typeconv .to_string (y [i ])))
19622030 else :
1963- values .append (result .attribute (result .num_attributes - 1 ).add_string_value (y [i ]. decode ( "utf-8" )))
2031+ values .append (result .attribute (result .num_attributes - 1 ).add_string_value (typeconv . to_string ( y [i ])))
19642032
19652033 result .add_instance (Instance .create_instance (values ))
19662034
0 commit comments