sightmachine · mks-sight · Dec 6, 2022 · Dec 7, 2022 · Dec 12, 2022 · Dec 12, 2022
diff --git a/smsdk/client.py b/smsdk/client.py
@@ -46,7 +46,7 @@ def dict_to_df(data, normalize=True):
                 # machine type stats are list
                 cols = [*data[0]]
                 cols.remove('stats')
-                df = json_normalize(data, 'stats', cols, record_prefix='stats.')
+                df = json_normalize(data, 'stats', cols, record_prefix='stats.', errors='ignore')
         else:
             try:
                 df = json_normalize(data)
@@ -62,7 +62,7 @@ def dict_to_df(data, normalize=True):
 
         if 'id' in df.columns:
             df.set_index('id', inplace=True)
-
+            
     return df
 
 
@@ -154,13 +154,17 @@ def get_data_v1(self, ename, util_name, normalize=True, *args, **kwargs):
             # dict params strictly follow {'key':'value'} format
 
             # sub_kwargs = kwargs
-            if util_name in ['get_cycles', 'get_downtime', 'get_parts']:
+            if util_name in ['get_cycles', 'get_downtime', 'get_parts', 'get_factories', 'get_machines', 'get_machine_types']:
                 sub_kwargs = [kwargs]
             else:
                 sub_kwargs = self.fix_only(kwargs)
 
             if len(sub_kwargs) == 1:
-                data = dict_to_df(getattr(cls, util_name)(*args, **sub_kwargs[0]), normalize)
+                if util_name in ['get_factories', 'get_machines', 'get_machine_types']:
+                    # data = dict_to_df(getattr(cls, util_name)(*args, **sub_kwargs[0]), normalize)
+                    return getattr(cls, util_name)(normalize, *args, **sub_kwargs[0])
+                else:
+                    data = dict_to_df(getattr(cls, util_name)(*args, **sub_kwargs[0]), normalize)
             else:
                 data = dict_to_df(getattr(cls, util_name)(*args, **sub_kwargs[0]), normalize)
                 for sub in sub_kwargs[1:]:
@@ -225,3 +229,118 @@ def get_machine_schema(self, machine_source, types=[], return_mtype=False, **kwa
                             f"Unknow stat schema identified :: machine_type {machine_source} - "
                             f"title_prefix :: {stat.get('display', {}).get('title_prefix', '')}")
         return fields
+
+    def _get_factories(self, normalize=True, *args, **kwargs):
+        """
+        Get list of factories and associated metadata.  Note this includes extensive internal metadata.  
+
+        :param normalize: Flatten nested data structures
+        :type normalize: bool
+        :return: pandas dataframe
+        """
+        return self.get_data_v1('factory_v1', 'get_factories', normalize, *args, **kwargs)
+
+    def _get_machines(self, normalize=True, *args, **kwargs) -> pd.DataFrame:
+        """
+        Get list of machines and associated metadata.  Note this includes extensive internal metadata.  If you only want to get a list of machine names
+        then see also get_machine_names(). 
+
+        :param normalize: Flatten nested data structures
+        :type normalize: bool
+        :return: pandas dataframe
+        """
+        return self.get_data_v1('machine_v1', 'get_machines', normalize, *args, **kwargs)
+
+    def _get_machine_types(self, normalize=True, *args, **kwargs):
+        """
+        Get list of machine types and associated metadata.  Note this includes extensive internal metadata.  If you only want to get a list of machine type names
+        then see also get_machine_type_names(). 
+
+        :param normalize: Flatten nested data structures
+        :type normalize: bool
+        :return: pandas dataframe
+        """
+
+        return self.get_data_v1('machine_type_v1', 'get_machine_types', normalize, *args, **kwargs)
+
+    def get_factories(self, normalize=True, *args, **kwargs):
+        generator = self._get_factories(normalize=normalize, *args, **kwargs)
+        data = []
+        for page in generator:
+            try:
+                data.append(page)
+            except Exception as e:
+                print(e)
+        data = pd.concat(data)
+        return data
+
+    def get_machines(self, normalize=True, *args, **kwargs):
+        generator = self._get_machines(normalize=normalize, *args, **kwargs)
+        data = []
+        for page in generator:
+            try:
+                data.append(page)
+            except Exception as e:
+                print(e)
+        data = pd.concat(data)
+        return data
+
+    def get_machine_types(self, normalize=True, *args, **kwargs):
+        generator = self._get_machine_types(normalize=normalize, *args, **kwargs)
+        data = []
+        for page in generator:
+            try:
+                data.append(page)
+            except Exception as e:
+                print(e)
+        data = pd.concat(data)
+        return data
+
+    def get_machine_names(self, source_type=None, clean_strings_out=True):
+        """
+        Get a list of machine names.  This is a simplified version of get_machines().  
+
+        :param source_type: filter the list to only the specified source_type
+        :type source_type: str
+        :param clean_strings_out: If true, return the list using the UI-based display names.  If false, the list contains the Sight Machine internal machine names.
+        :return: list
+        """
+
+        query_params = {'_only': ['source', 'source_clean', 'source_type'],
+                        '_order_by': 'source_clean'}
+
+        if source_type:
+            # Double check the type
+            mt = self.get_machine_types(source_type=source_type)
+            # If it was found, then no action to take, otherwise try looking up from clean string
+            mt = self.get_machine_types(source_type_clean=source_type) if not len(mt) else []
+            if len(mt):
+                source_type = mt['source_type'].iloc[0]
+            else:
+                log.error('Machine Type not found')
+                return []
+
+            query_params['source_type'] = source_type
+
+        machines = self.get_data_v1('machine_v1', 'get_machines', normalize=True, **query_params)
+
+        if clean_strings_out:
+            return machines['source_clean'].to_list()
+        else:
+            return machines['source'].to_list()
+
+    def get_machine_type_names(self, clean_strings_out=True):
+        """
+        Get a list of machine type names.  This is a simplified version of get_machine_types().  
+
+        :param clean_strings_out: If true, return the list using the UI-based display names.  If false, the list contains the Sight Machine internal machine types.
+        :return: list
+        """
+        query_params = {'_only': ['source_type', 'source_type_clean'],
+                        '_order_by': 'source_type_clean'}
+        machine_types = self.get_data_v1('machine_type_v1', 'get_machine_types', normalize=True, **query_params)
+
+        if clean_strings_out:
+            return machine_types['source_type_clean'].to_list()
+        else:
+            return machine_types['source_type'].to_list()
diff --git a/smsdk/client_v0.py b/smsdk/client_v0.py
@@ -439,6 +439,7 @@ def inner(self, machine_source, types=[], return_mtype=False, **kwargs):
                         log.error(f'Unable to find machine type for {machine_source}')
                         return
             try:
+                stats = self.get_machine_types(normalize=False, _limit=1, source_type=machine_type)
                 stats = self.get_machine_types(normalize=False, _limit=1, source_type=machine_type)['stats'][0]
             except KeyError:
                 # explicitly embed string to machine type names esp JCP
@@ -591,7 +592,7 @@ def get_factories(self, normalize=True, *args, **kwargs):
         """
         return self.get_data('factory', 'get_factories', normalize, *args, **kwargs)
 
-    def get_machines(self, normalize=True, *args, **kwargs):
+    def get_machines(self, normalize=True, *args, **kwargs) -> pd.DataFrame:
         """
         Get list of machines and associated metadata.  Note this includes extensive internal metadata.  If you only want to get a list of machine names
         then see also get_machine_names(). 
@@ -619,13 +620,12 @@ def get_machine_names(self, source_type=None, clean_strings_out=True):
             # Double check the type
             mt = self.get_machine_types(source_type=source_type)
             # If it was found, then no action to take, otherwise try looking up from clean string
-            if not len(mt):
-                mt = self.get_machine_types(source_type_clean=source_type)
-                if len(mt):
-                    source_type = mt['source_type'].iloc[0]
-                else:
-                    log.error('Machine Type not found')
-                    return []
+            mt = self.get_machine_types(source_type_clean=source_type) if not len(mt) else []
+            if len(mt):
+                source_type = mt['source_type'].iloc[0]
+            else:
+                log.error('Machine Type not found')
+                return []
 
             query_params['source_type'] = source_type
 

diff --git a/smsdk/config/api_endpoints.json b/smsdk/config/api_endpoints.json
@@ -8,13 +8,16 @@
     "alt_url": "/api/cycle"
   },
   "Factory": {
-    "url" : "/api/factory"
+    "url" : "/api/factory",
+    "url_v1" : "/v1/obj/factory"
   },
   "MachineType": {
-    "url" : "/api/machinetype"
+    "url" : "/api/machinetype",
+    "url_v1" : "/v1/obj/machine_type"
   },
   "Machine": {
-    "url" : "/api/machine"
+    "url" : "/api/machine",
+    "url_v1" : "/v1/obj/machine"
   },
   "Parts": {
     "url" : "/api/part",

diff --git a/smsdk/ma_session.py b/smsdk/ma_session.py
@@ -4,6 +4,8 @@
 import requests
 
 import numpy as np
+import pandas as pd
+from pandas import json_normalize
 
 from requests.structures import CaseInsensitiveDict
 from requests.sessions import Session
@@ -22,6 +24,35 @@
 SM_AUTH_HEADER_SECRET_ID_OLD = RESOURCE_CONFIG["auth_header-api-secret_old"]
 SM_AUTH_HEADER_KEY_ID = RESOURCE_CONFIG["auth_header-api-key"]
 
+def dict_to_df(data, normalize=True):
+    if normalize:
+        # special case to handle the 'stats' block
+        if data and 'stats' in data[0]:
+            if isinstance(data[0]['stats'], dict):
+                # part stats are dict
+                df = json_normalize(data)
+            else:
+                # machine type stats are list
+                cols = [*data[0]]
+                cols.remove('stats')
+                df = json_normalize(data, 'stats', cols, record_prefix='stats.', errors='ignore')
+        else:
+            try:
+                df = json_normalize(data)
+            except:
+                # From cases like _distinct which don't have a "normal" return format
+                return pd.DataFrame({'values': data})
+    else:
+        df = pd.DataFrame(data)
+
+    if len(df) > 0:
+        if '_id' in df.columns:
+            df.set_index('_id', inplace=True)
+
+        if 'id' in df.columns:
+            df.set_index('id', inplace=True)
+    return df
+
 import logging
 log = logging.getLogger(__name__)
 
@@ -96,8 +127,7 @@ def _get_records(
 
             except:
                 import traceback
-
-                print(traceback.print_exc())
+                log.error(traceback.print_exc())
                 return records
 
     def _get_schema(
@@ -203,7 +233,7 @@ def _get_records_v1(
             except:
                 import traceback
 
-                print(traceback.print_exc())
+                log.error(traceback.print_exc())
                 return records
 
     def get_json_headers(self):
@@ -243,3 +273,83 @@ def get_starttime_endtime_keys(self, **kwargs):
                     continue
 
         return starttime_key, endtime_key
+
+    def _get_records_mongo_v1(
+        self,
+        endpoint,
+        normalize=True,
+        method="get",
+        limit=np.Inf,
+        offset=1,
+        **url_params
+    ):
+        """
+        Function to get api call and fetch data from MA APIs
+        :param endpoint: complete url endpoint
+        :param method: Reqested method. Default = get
+        :param enable_pagination: if pagination is enabled then
+        the records are fetched with limit offset pagination
+        :param limit: Limit the number of records for pagination
+        :param offset: pagination offset
+        :param url_params: dict of params for API ex filtering, columns etc
+        :return: List of records
+        """
+        next_page = ""
+        offset = int(offset)
+        try:
+            limit = int(limit)
+        except:
+            limit = float(limit)
+
+        if 'machine_type' in url_params:
+            url_params.pop('machine_type')
+        max_page_size = 2000
+        limit = min(max_page_size, limit)
+        if not url_params.get("per_page"):
+            url_params["per_page"] = 5
+
+        def _fetch_data(endpoint, url_params):
+            response = getattr(self.session, method.lower())(
+                    endpoint, params=url_params
+                )
+            if response.text:
+                if response.status_code not in [200, 201]:
+                    raise ValueError("Error - {}".format(response.text))
+                try:
+                    data = response.json()
+                    try:
+                        next_page = data["next_page"]
+                    except:
+                        next_page = ""
+                    if data["success"]:
+                        data = data['objects']
+                except JSONDecodeError as e:
+                    print(f'No valid JSON returned {e}')
+                    data = []
+            else:
+                data = []
+            return data, next_page
+        while limit > 0:
+            if next_page:
+                data, next_page = _fetch_data(endpoint=next_page, url_params={})
+                if not next_page:
+                    limit = 0
+                else:
+                    limit -= len(data)
+            else:
+                data, next_page = _fetch_data(endpoint=endpoint, url_params=url_params)
+                if not next_page:
+                    limit = 0
+                else:
+                    limit -= len(data)
+            data = dict_to_df(data, normalize=normalize)
+
+            # To keep consistent, rename columns back from '.' to '__'
+            data.columns = [name.replace('.', '__') for name in data.columns]
+
+            if 'endtime' in data.columns:
+                data['endtime'] = pd.to_datetime(data['endtime'])
+            if 'starttime' in data.columns:
+                data['starttime'] = pd.to_datetime(data['starttime'])
+
+            yield data
diff --git a/smsdk/smsdk_entities/cycle/cycleV1.py b/smsdk/smsdk_entities/cycle/cycleV1.py
@@ -8,7 +8,7 @@
     import importlib_resources as pkg_resources
 
 from smsdk.tool_register import SmsdkEntities, smsdkentities
-from smsdk.utils import module_utility
+from smsdk.utils import module_utility, check_kw
 from smsdk import config
 from smsdk.ma_session import MaSession
 from datetime import datetime, timedelta
@@ -96,7 +96,8 @@ def modify_input_params(self, **kwargs):
             where.append({'name': end_key.split('__')[0], 'op': end_key.split('__')[-1], 'value': endtime.isoformat()})
 
         for kw in kwargs:
-            if kw[0] != '_' and 'machine_type' not in kw and 'Machine' not in kw and 'machine__source' not in kw and 'End Time' not in kw and 'endtime' not in kw and 'Start Time' not in kw and 'starttime' not in kw:
+            if check_kw(kw):
+            # if kw[0] != '_' and 'machine_type' not in kw and 'Machine' not in kw and 'machine__source' not in kw and 'End Time' not in kw and 'endtime' not in kw and 'Start Time' not in kw and 'starttime' not in kw:
                 if '__' not in kw:
                     where.append({'name': kw, 'op': 'eq', 'value': kwargs[kw]})
                 else: