Skip to content

Commit f047161

Browse files
committed
Bug# 5001 and clean up debugging code in notebooks
1 parent 8101db1 commit f047161

File tree

5 files changed

+5
-5
lines changed

5 files changed

+5
-5
lines changed

Deployment/scripts/fabric_scripts/notebooks/00_process_json_files.ipynb

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

Deployment/scripts/fabric_scripts/notebooks/01_process_audio_files.ipynb

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

Deployment/scripts/fabric_scripts/notebooks/02_enrich_audio_data.ipynb

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

Deployment/scripts/fabric_scripts/notebooks/03_post_processing.ipynb

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"cells":[{"cell_type":"markdown","id":"b337af2d-17af-471a-81e9-92b75e40b46e","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["Copyright (c) Microsoft Corporation.\n","\n","Licensed under the MIT License."]},{"cell_type":"code","execution_count":null,"id":"7a1b029b-00ab-489a-b8fc-c6cc2cff94ef","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"microsoft":{"language":"python","language_group":"synapse_pyspark"},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["import pandas as pd\n","import numpy as np\n","from datetime import datetime, timedelta\n","\n","#set start and end date variables\n","start_dt='01-01-2020'\n","end_dt='12-31-2026'\n","\n","df = pd.DataFrame()\n","#create base date range\n","\n","df['dt'] = pd.date_range(start=start_dt, end=end_dt, freq='D')\n","\n","#year as int\n","df['y'] = pd.DatetimeIndex(df['dt']).year\n","\n","#month as int\n","df['m'] = pd.DatetimeIndex(df['dt']).month\n","\n","#calendar day as int\n","df['d'] = pd.DatetimeIndex(df['dt']).day\n","\n","#yearmonth as int\n","df['ym'] = df['y']*100 + df['m']\n","\n","#date in yyyymmdd as int\n","df['dt_int'] = df['y']*10000 + df['m']*100 + df['d']\n","\n","#day of week name (Monday, Tuesday, ...)\n","df['dow_name'] = df['dt'].dt.day_name()\n","\n","#day of week number as int (Monday=0, Sunday=6)\n","df['dow'] = df['dt'].dt.dayofweek\n","\n","#day of year number as int\n","df['doy'] = df['dt'].dt.dayofyear\n","\n","#month name (January, February, ...)\n","df['m_name'] = df['dt'].dt.month_name()\n","\n","#week number of year, using iso conventions (Monday is first DOW)\n","df['iso_week'] = 18 #df['dt'].isocalendar()[1] #df['dt'].dt.week\n","\n","#quarter number of year\n","df['q'] = ((df['m']-1) // 3) + 1\n","\n","#yearquarter as int\n","df['yq'] = df['y']*10+df['q']\n","\n","#half number of year\n","df['h'] = ((df['q']-1) // 2) + 1\n","\n","#yearhalf as int\n","df['yh'] = df['y']*10+df['h']\n","\n","#yearmonth name\n","df['ym_name'] = df['m_name'] + ', ' + df['y'].apply(lambda x: str(x))\n","\n","#is weekday (1=True, 0=False)\n","df['is_weekd'] = np.where(df['dow'].isin([0,1,2,3,4,]), 1, 0)\n","\n","#weekdays in yearmonth through date\n","df['weekdom'] = df[['ym','is_weekd']].groupby('ym')['is_weekd'].cumsum()\n","\n","#timestamp when the calendar table was generated by this script\n","# df['created_on'] = datetime.now()"]},{"cell_type":"code","execution_count":null,"id":"d30c6627-cc0d-436e-bb4f-3e0fc07e4713","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"microsoft":{"language":"python","language_group":"synapse_pyspark"},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["calendar_df = spark.createDataFrame(df)"]},{"cell_type":"code","execution_count":null,"id":"46ff2c8f-5f50-4bcd-80c4-a3815f8a6fd4","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"microsoft":{"language":"python","language_group":"synapse_pyspark"},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["calendar_df.write.format('delta').mode('overwrite').option(\"overwriteSchema\", \"true\").saveAsTable('ckm_calendar')"]}],"metadata":{"dependencies":{"lakehouse":{"default_lakehouse":"e6ad9dad-e3da-4da5-bca6-6572c466b69a","default_lakehouse_name":"ckm_lakehouse","default_lakehouse_workspace_id":"0d98d480-171b-4b4d-a8e7-80fbd031d1a6"}},"description":null,"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Synapse PySpark","language":"Python","name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python","language_group":"synapse_pyspark","ms_spell_check":{"ms_spell_check_language":"en"}},"nteract":{"version":"[email protected]"},"save_output":true,"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5}
1+
{"cells":[{"cell_type":"markdown","source":["Copyright (c) Microsoft Corporation.\n","\n","Licensed under the MIT License."],"metadata":{"nteract":{"transient":{"deleting":false}}},"id":"b337af2d-17af-471a-81e9-92b75e40b46e"},{"cell_type":"code","source":["import pandas as pd\n","import numpy as np\n","from datetime import datetime, timedelta\n","\n","#set start and end date variables\n","start_dt='01-01-2020'\n","end_dt='12-31-2026'\n","\n","df = pd.DataFrame()\n","#create base date range\n","\n","df['dt'] = pd.date_range(start=start_dt, end=end_dt, freq='D')\n","\n","#year as int\n","df['y'] = pd.DatetimeIndex(df['dt']).year\n","\n","#month as int\n","df['m'] = pd.DatetimeIndex(df['dt']).month\n","\n","#calendar day as int\n","df['d'] = pd.DatetimeIndex(df['dt']).day\n","\n","#yearmonth as int\n","df['ym'] = df['y']*100 + df['m']\n","\n","#date in yyyymmdd as int\n","df['dt_int'] = df['y']*10000 + df['m']*100 + df['d']\n","\n","#day of week name (Monday, Tuesday, ...)\n","df['dow_name'] = df['dt'].dt.day_name()\n","\n","#day of week number as int (Monday=0, Sunday=6)\n","df['dow'] = df['dt'].dt.dayofweek\n","\n","#day of year number as int\n","df['doy'] = df['dt'].dt.dayofyear\n","\n","#month name (January, February, ...)\n","df['m_name'] = df['dt'].dt.month_name()\n","\n","#week number of year, using iso conventions (Monday is first DOW)\n","df['iso_week'] = 18 #df['dt'].isocalendar()[1] #df['dt'].dt.week\n","\n","#quarter number of year\n","df['q'] = ((df['m']-1) // 3) + 1\n","\n","#yearquarter as int\n","df['yq'] = df['y']*10+df['q']\n","\n","#half number of year\n","df['h'] = ((df['q']-1) // 2) + 1\n","\n","#yearhalf as int\n","df['yh'] = df['y']*10+df['h']\n","\n","#yearmonth name\n","df['ym_name'] = df['m_name'] + ', ' + df['y'].apply(lambda x: str(x))\n","\n","#is weekday (1=True, 0=False)\n","df['is_weekd'] = np.where(df['dow'].isin([0,1,2,3,4,]), 1, 0)\n","\n","#weekdays in yearmonth through date\n","df['weekdom'] = df[['ym','is_weekd']].groupby('ym')['is_weekd'].cumsum()\n","\n","#timestamp when the calendar table was generated by this script\n","# df['created_on'] = datetime.now()"],"outputs":[],"execution_count":null,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"7a1b029b-00ab-489a-b8fc-c6cc2cff94ef"},{"cell_type":"code","source":["calendar_df = spark.createDataFrame(df)"],"outputs":[],"execution_count":null,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"d30c6627-cc0d-436e-bb4f-3e0fc07e4713"},{"cell_type":"code","source":["calendar_df.write.format('delta').mode('overwrite').option(\"overwriteSchema\", \"true\").saveAsTable('ckm_calendar')"],"outputs":[],"execution_count":null,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"46ff2c8f-5f50-4bcd-80c4-a3815f8a6fd4"}],"metadata":{"kernelspec":{"name":"synapse_pyspark","language":"Python","display_name":"Synapse PySpark"},"language_info":{"name":"python"},"microsoft":{"language":"python","ms_spell_check":{"ms_spell_check_language":"en"},"language_group":"synapse_pyspark"},"widgets":{},"kernel_info":{"name":"synapse_pyspark"},"nteract":{"version":"[email protected]"},"description":null,"synapse_widget":{"version":"0.1","state":{}},"save_output":true,"spark_compute":{"compute_id":"/trident/default"},"dependencies":{"lakehouse":{"default_lakehouse":"e6ad9dad-e3da-4da5-bca6-6572c466b69a","default_lakehouse_name":"ckm_lakehouse","default_lakehouse_workspace_id":"0d98d480-171b-4b4d-a8e7-80fbd031d1a6"}}},"nbformat":4,"nbformat_minor":5}

0 commit comments

Comments
 (0)