{ "cells": [ { "cell_type": "markdown", "id": "2913c9d5", "metadata": {}, "source": [ "# Some Usecases On A Test Dataset From SAP" ] }, { "cell_type": "code", "execution_count": 1, "id": "8f816aaf", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "id": "d14f9562", "metadata": {}, "outputs": [], "source": [ "model = pd.read_csv('sap-dataset.csv')" ] }, { "cell_type": "code", "execution_count": 3, "id": "d3f79565", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AccountIdAccountDescriptionAccountParentIdStoreIdStoreDescriptionLocationIdLocationDescriptionProductIdProductDescriptionSales_ManagerId...DateDATEDateYearDateQuarterDateMonthDateDAYCategoryVersionCategoryCategoryCategoryTypeValueValueUnit
0Original_Sales_PriceOriginal Sales PriceNaNST43franchescini HandCT14SalemPD23IPASM1...2014-12-012014201442014121ActualActualspublic1779.763910USD
1Original_Sales_PriceOriginal Sales PriceNaNST35Quality ShoppingCT14SalemPD2PepsiSM4...2014-10-012014201442014101ActualActualspublic130.957113USD
2Original_Sales_PriceOriginal Sales PriceNaNST98Walter Store IncCT14SalemPD24AmberSM1...2014-06-012014201422014061ActualActualspublic10226.449395USD
3Original_Sales_PriceOriginal Sales PriceNaNST188Arrowhead Corner StoreCT1Los AngelesPD19Coconut WaterSM8...2016-11-012016201642016111ActualActualspublic383.078689USD
4Original_Sales_PriceOriginal Sales PriceNaNST107Spire MarketCT5San JosePD15Pineapple JuiceSM5...2015-12-012015201542015121ActualActualspublic3110.303679USD
..................................................................
1068Quantity_soldQuantity soldNaNST4GstoreCT13PortlandPD26Red WineSM4...2015-12-012015201542015121ActualActualspublic250.000000NaN
1069Original_Sales_PriceOriginal Sales PriceNaNST49English Bay depotCT4SacramentoPD27White WineSM5...2015-01-012015201512015011ActualActualspublic11348.592904USD
1070DiscountDiscountNaNST36PriceLow ArenaCT10RenoPD22LagerSM3...2016-05-012016201622016051ActualActualspublic4351.045830USD
1071Original_Sales_PriceOriginal Sales PriceNaNST7Main Street CoCT4SacramentoPD16Watermelon JuiceSM5...2015-12-012015201542015121ActualActualspublic7905.590606USD
1072Quantity_soldQuantity soldNaNST98Walter Store IncCT14SalemPD12LemonadeSM1...2015-07-012015201532015071ActualActualspublic100.000000NaN
\n", "

1073 rows × 21 columns

\n", "
" ], "text/plain": [ " AccountId AccountDescription AccountParentId StoreId \\\n", "0 Original_Sales_Price Original Sales Price NaN ST43 \n", "1 Original_Sales_Price Original Sales Price NaN ST35 \n", "2 Original_Sales_Price Original Sales Price NaN ST98 \n", "3 Original_Sales_Price Original Sales Price NaN ST188 \n", "4 Original_Sales_Price Original Sales Price NaN ST107 \n", "... ... ... ... ... \n", "1068 Quantity_sold Quantity sold NaN ST4 \n", "1069 Original_Sales_Price Original Sales Price NaN ST49 \n", "1070 Discount Discount NaN ST36 \n", "1071 Original_Sales_Price Original Sales Price NaN ST7 \n", "1072 Quantity_sold Quantity sold NaN ST98 \n", "\n", " StoreDescription LocationId LocationDescription ProductId \\\n", "0 franchescini Hand CT14 Salem PD23 \n", "1 Quality Shopping CT14 Salem PD2 \n", "2 Walter Store Inc CT14 Salem PD24 \n", "3 Arrowhead Corner Store CT1 Los Angeles PD19 \n", "4 Spire Market CT5 San Jose PD15 \n", "... ... ... ... ... \n", "1068 Gstore CT13 Portland PD26 \n", "1069 English Bay depot CT4 Sacramento PD27 \n", "1070 PriceLow Arena CT10 Reno PD22 \n", "1071 Main Street Co CT4 Sacramento PD16 \n", "1072 Walter Store Inc CT14 Salem PD12 \n", "\n", " ProductDescription Sales_ManagerId ... DateDATE DateYear \\\n", "0 IPA SM1 ... 2014-12-01 2014 \n", "1 Pepsi SM4 ... 2014-10-01 2014 \n", "2 Amber SM1 ... 2014-06-01 2014 \n", "3 Coconut Water SM8 ... 2016-11-01 2016 \n", "4 Pineapple Juice SM5 ... 2015-12-01 2015 \n", "... ... ... ... ... ... \n", "1068 Red Wine SM4 ... 2015-12-01 2015 \n", "1069 White Wine SM5 ... 2015-01-01 2015 \n", "1070 Lager SM3 ... 2016-05-01 2016 \n", "1071 Watermelon Juice SM5 ... 2015-12-01 2015 \n", "1072 Lemonade SM1 ... 2015-07-01 2015 \n", "\n", " DateQuarter DateMonth DateDAY CategoryVersion CategoryCategory \\\n", "0 20144 201412 1 Actual Actuals \n", "1 20144 201410 1 Actual Actuals \n", "2 20142 201406 1 Actual Actuals \n", "3 20164 201611 1 Actual Actuals \n", "4 20154 201512 1 Actual Actuals \n", "... ... ... ... ... ... \n", "1068 20154 201512 1 Actual Actuals \n", "1069 20151 201501 1 Actual Actuals \n", "1070 20162 201605 1 Actual Actuals \n", "1071 20154 201512 1 Actual Actuals \n", "1072 20153 201507 1 Actual Actuals \n", "\n", " CategoryType Value ValueUnit \n", "0 public 1779.763910 USD \n", "1 public 130.957113 USD \n", "2 public 10226.449395 USD \n", "3 public 383.078689 USD \n", "4 public 3110.303679 USD \n", "... ... ... ... \n", "1068 public 250.000000 NaN \n", "1069 public 11348.592904 USD \n", "1070 public 4351.045830 USD \n", "1071 public 7905.590606 USD \n", "1072 public 100.000000 NaN \n", "\n", "[1073 rows x 21 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model" ] }, { "cell_type": "markdown", "id": "642b7955", "metadata": {}, "source": [ "## Dropping Columns That Contain Only NaN Values" ] }, { "cell_type": "markdown", "id": "e5e63075", "metadata": {}, "source": [ "``DataFrame.drop()`` accepts a ``inplace`` parameter.\n", "\n", "* When not given, ``drop()`` returns a modified *copy* of the frame\n", "* When given, it modified the frame in place" ] }, { "cell_type": "code", "execution_count": 4, "id": "9e9f22c3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AccountIdAccountDescriptionStoreIdStoreDescriptionLocationIdLocationDescriptionProductIdProductDescriptionSales_ManagerIdSales_ManagerDescriptionDateDATEDateYearDateQuarterDateMonthDateDAYCategoryVersionCategoryCategoryCategoryTypeValueValueUnit
0Original_Sales_PriceOriginal Sales PriceST43franchescini HandCT14SalemPD23IPASM1Janet Bury2014-12-012014201442014121ActualActualspublic1779.763910USD
1Original_Sales_PriceOriginal Sales PriceST35Quality ShoppingCT14SalemPD2PepsiSM4Nancy Miller2014-10-012014201442014101ActualActualspublic130.957113USD
2Original_Sales_PriceOriginal Sales PriceST98Walter Store IncCT14SalemPD24AmberSM1Janet Bury2014-06-012014201422014061ActualActualspublic10226.449395USD
\n", "
" ], "text/plain": [ " AccountId AccountDescription StoreId StoreDescription \\\n", "0 Original_Sales_Price Original Sales Price ST43 franchescini Hand \n", "1 Original_Sales_Price Original Sales Price ST35 Quality Shopping \n", "2 Original_Sales_Price Original Sales Price ST98 Walter Store Inc \n", "\n", " LocationId LocationDescription ProductId ProductDescription Sales_ManagerId \\\n", "0 CT14 Salem PD23 IPA SM1 \n", "1 CT14 Salem PD2 Pepsi SM4 \n", "2 CT14 Salem PD24 Amber SM1 \n", "\n", " Sales_ManagerDescription DateDATE DateYear DateQuarter DateMonth \\\n", "0 Janet Bury 2014-12-01 2014 20144 201412 \n", "1 Nancy Miller 2014-10-01 2014 20144 201410 \n", "2 Janet Bury 2014-06-01 2014 20142 201406 \n", "\n", " DateDAY CategoryVersion CategoryCategory CategoryType Value \\\n", "0 1 Actual Actuals public 1779.763910 \n", "1 1 Actual Actuals public 130.957113 \n", "2 1 Actual Actuals public 10226.449395 \n", "\n", " ValueUnit \n", "0 USD \n", "1 USD \n", "2 USD " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "modified_model = model.drop(model.columns[model.isna().all()], axis='columns')\n", "modified_model.head(3)" ] }, { "cell_type": "markdown", "id": "50794e0b", "metadata": {}, "source": [ "Apparently the ``AccountParentId`` column has been dropped. So how did this work? Lets dissect this setp by step." ] }, { "cell_type": "markdown", "id": "4ddefc0d", "metadata": {}, "source": [ "### ``DataFrame.isna()``" ] }, { "cell_type": "markdown", "id": "98de2572", "metadata": {}, "source": [ "``isna()`` on a frame returns an equally shaped matrix, only with ``bool`` values representing the ``Nan``-ness of the respective field in the original frame" ] }, { "cell_type": "code", "execution_count": 5, "id": "7612fda7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AccountIdAccountDescriptionAccountParentIdStoreIdStoreDescriptionLocationIdLocationDescriptionProductIdProductDescriptionSales_ManagerId...DateDATEDateYearDateQuarterDateMonthDateDAYCategoryVersionCategoryCategoryCategoryTypeValueValueUnit
0FalseFalseTrueFalseFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
1FalseFalseTrueFalseFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
2FalseFalseTrueFalseFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
3FalseFalseTrueFalseFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
4FalseFalseTrueFalseFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
\n", "

5 rows × 21 columns

\n", "
" ], "text/plain": [ " AccountId AccountDescription AccountParentId StoreId StoreDescription \\\n", "0 False False True False False \n", "1 False False True False False \n", "2 False False True False False \n", "3 False False True False False \n", "4 False False True False False \n", "\n", " LocationId LocationDescription ProductId ProductDescription \\\n", "0 False False False False \n", "1 False False False False \n", "2 False False False False \n", "3 False False False False \n", "4 False False False False \n", "\n", " Sales_ManagerId ... DateDATE DateYear DateQuarter DateMonth DateDAY \\\n", "0 False ... False False False False False \n", "1 False ... False False False False False \n", "2 False ... False False False False False \n", "3 False ... False False False False False \n", "4 False ... False False False False False \n", "\n", " CategoryVersion CategoryCategory CategoryType Value ValueUnit \n", "0 False False False False False \n", "1 False False False False False \n", "2 False False False False False \n", "3 False False False False False \n", "4 False False False False False \n", "\n", "[5 rows x 21 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nan_fields = model.isna()\n", "nan_fields.head(5)" ] }, { "cell_type": "markdown", "id": "09ec3fa7", "metadata": {}, "source": [ "### ``DataFrame.all()`` (and ``any()``)" ] }, { "cell_type": "markdown", "id": "dab18bde", "metadata": {}, "source": [ "Returns a bitfield (err, a ``bool`` type ``Series`` object) to indicate whether *all* elements are ``True``. By default, it operates on *columns* - *all* elements of a *column* are ``True``." ] }, { "cell_type": "markdown", "id": "86036453", "metadata": {}, "source": [ "The companion method, ``any()``, indicates whether there is *at least one* element whose value is ``True``. " ] }, { "cell_type": "code", "execution_count": 6, "id": "ecc29f7e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AccountId False\n", "AccountDescription False\n", "AccountParentId True\n", "StoreId False\n", "StoreDescription False\n", "LocationId False\n", "LocationDescription False\n", "ProductId False\n", "ProductDescription False\n", "Sales_ManagerId False\n", "Sales_ManagerDescription False\n", "DateDATE False\n", "DateYear False\n", "DateQuarter False\n", "DateMonth False\n", "DateDAY False\n", "CategoryVersion False\n", "CategoryCategory False\n", "CategoryType False\n", "Value False\n", "ValueUnit False\n", "dtype: bool" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nan_fields.all()" ] }, { "cell_type": "markdown", "id": "46a01112", "metadata": {}, "source": [ "### Column Selection" ] }, { "cell_type": "markdown", "id": "ab4137e0", "metadata": {}, "source": [ "A bitfied applied to a ``Series`` (using the ``[]`` operator of ``Series``) selects the elements.\n", "jjj link to **filters**" ] }, { "cell_type": "code", "execution_count": 7, "id": "27492288", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['AccountId', 'AccountDescription', 'AccountParentId', 'StoreId',\n", " 'StoreDescription', 'LocationId', 'LocationDescription', 'ProductId',\n", " 'ProductDescription', 'Sales_ManagerId', 'Sales_ManagerDescription',\n", " 'DateDATE', 'DateYear', 'DateQuarter', 'DateMonth', 'DateDAY',\n", " 'CategoryVersion', 'CategoryCategory', 'CategoryType', 'Value',\n", " 'ValueUnit'],\n", " dtype='object')" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.columns" ] }, { "cell_type": "code", "execution_count": 8, "id": "bf7c6719", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['AccountParentId'], dtype='object')" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nan_columns = model.columns[model.isna().all()]\n", "nan_columns" ] }, { "cell_type": "markdown", "id": "be032616", "metadata": {}, "source": [ "### Dropping Columns" ] }, { "cell_type": "code", "execution_count": 9, "id": "2a65f5ae", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AccountIdAccountDescriptionStoreIdStoreDescriptionLocationIdLocationDescriptionProductIdProductDescriptionSales_ManagerIdSales_ManagerDescriptionDateDATEDateYearDateQuarterDateMonthDateDAYCategoryVersionCategoryCategoryCategoryTypeValueValueUnit
0Original_Sales_PriceOriginal Sales PriceST43franchescini HandCT14SalemPD23IPASM1Janet Bury2014-12-012014201442014121ActualActualspublic1779.763910USD
1Original_Sales_PriceOriginal Sales PriceST35Quality ShoppingCT14SalemPD2PepsiSM4Nancy Miller2014-10-012014201442014101ActualActualspublic130.957113USD
2Original_Sales_PriceOriginal Sales PriceST98Walter Store IncCT14SalemPD24AmberSM1Janet Bury2014-06-012014201422014061ActualActualspublic10226.449395USD
3Original_Sales_PriceOriginal Sales PriceST188Arrowhead Corner StoreCT1Los AngelesPD19Coconut WaterSM8Gabriel Walton2016-11-012016201642016111ActualActualspublic383.078689USD
4Original_Sales_PriceOriginal Sales PriceST107Spire MarketCT5San JosePD15Pineapple JuiceSM5David Carl2015-12-012015201542015121ActualActualspublic3110.303679USD
\n", "
" ], "text/plain": [ " AccountId AccountDescription StoreId StoreDescription \\\n", "0 Original_Sales_Price Original Sales Price ST43 franchescini Hand \n", "1 Original_Sales_Price Original Sales Price ST35 Quality Shopping \n", "2 Original_Sales_Price Original Sales Price ST98 Walter Store Inc \n", "3 Original_Sales_Price Original Sales Price ST188 Arrowhead Corner Store \n", "4 Original_Sales_Price Original Sales Price ST107 Spire Market \n", "\n", " LocationId LocationDescription ProductId ProductDescription Sales_ManagerId \\\n", "0 CT14 Salem PD23 IPA SM1 \n", "1 CT14 Salem PD2 Pepsi SM4 \n", "2 CT14 Salem PD24 Amber SM1 \n", "3 CT1 Los Angeles PD19 Coconut Water SM8 \n", "4 CT5 San Jose PD15 Pineapple Juice SM5 \n", "\n", " Sales_ManagerDescription DateDATE DateYear DateQuarter DateMonth \\\n", "0 Janet Bury 2014-12-01 2014 20144 201412 \n", "1 Nancy Miller 2014-10-01 2014 20144 201410 \n", "2 Janet Bury 2014-06-01 2014 20142 201406 \n", "3 Gabriel Walton 2016-11-01 2016 20164 201611 \n", "4 David Carl 2015-12-01 2015 20154 201512 \n", "\n", " DateDAY CategoryVersion CategoryCategory CategoryType Value \\\n", "0 1 Actual Actuals public 1779.763910 \n", "1 1 Actual Actuals public 130.957113 \n", "2 1 Actual Actuals public 10226.449395 \n", "3 1 Actual Actuals public 383.078689 \n", "4 1 Actual Actuals public 3110.303679 \n", "\n", " ValueUnit \n", "0 USD \n", "1 USD \n", "2 USD \n", "3 USD \n", "4 USD " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "modified_model = model.drop(model.columns[model.isna().all()], axis='columns')\n", "modified_model.head(5)" ] }, { "cell_type": "markdown", "id": "29f52d3a", "metadata": {}, "source": [ "**Now for real:**" ] }, { "cell_type": "code", "execution_count": 10, "id": "bb7791df", "metadata": {}, "outputs": [], "source": [ "model.drop(model.columns[model.isna().all()], axis='columns', inplace=True)" ] }, { "cell_type": "markdown", "id": "8724ae31", "metadata": {}, "source": [ "## Fixing Date Values/Columns, And Operating With Date And Time" ] }, { "cell_type": "markdown", "id": "5b2fe935", "metadata": {}, "source": [ "The ``DateDATE`` column contains string values. Convert it to contain real timestamps that one can work with, numerically (e.g. add and subtract)." ] }, { "cell_type": "code", "execution_count": 11, "id": "fca4bd05", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 2014-12-01\n", "1 2014-10-01\n", "2 2014-06-01\n", "3 2016-11-01\n", "4 2015-12-01\n", " ... \n", "1068 2015-12-01\n", "1069 2015-01-01\n", "1070 2016-05-01\n", "1071 2015-12-01\n", "1072 2015-07-01\n", "Name: DateDATE, Length: 1073, dtype: object" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model['DateDATE']" ] }, { "cell_type": "code", "execution_count": 12, "id": "ffecd01b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'2014-12-01'" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model['DateDATE'].iloc[0]" ] }, { "cell_type": "code", "execution_count": 13, "id": "da3d44d5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "str" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(model['DateDATE'].iloc[0])" ] }, { "cell_type": "markdown", "id": "012d7b2a", "metadata": {}, "source": [ "### ``pandas.to_datetime()``" ] }, { "cell_type": "markdown", "id": "669510fd", "metadata": {}, "source": [ "Converts anything to a working timestamp value (or a vectorized version thereof): a ``str``, a ``Series``, ..." ] }, { "cell_type": "code", "execution_count": 14, "id": "99c1625f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Timestamp('2023-03-13 13:02:01')" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ts = pd.to_datetime('2023-03-13 13:02:01') # str\n", "ts" ] }, { "cell_type": "code", "execution_count": 15, "id": "b4b4dc84", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Timestamp('2023-03-13 00:00:00')" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.to_datetime('2023-03-13') # str" ] }, { "cell_type": "markdown", "id": "41bbb578", "metadata": {}, "source": [ "``pandas.Timestamp`` is compatible with ``datetime.datetime`` jjjjj linke to both" ] }, { "cell_type": "code", "execution_count": 16, "id": "1b3cf134", "metadata": {}, "outputs": [], "source": [ "import datetime" ] }, { "cell_type": "code", "execution_count": 17, "id": "fc2ec4e4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "datetime.datetime(2016, 3, 13, 0, 0)" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ref_ts = datetime.datetime(year=2016, month=3, day=13)\n", "ref_ts" ] }, { "cell_type": "code", "execution_count": 18, "id": "f452a153", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ts < ref_ts" ] }, { "cell_type": "code", "execution_count": 19, "id": "42f891b9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ts > ref_ts" ] }, { "cell_type": "markdown", "id": "16ef3cfe", "metadata": {}, "source": [ "### Converting The Entire ``DateDATE`` Column To A Workable Type (And Working With It)" ] }, { "cell_type": "markdown", "id": "73cdb433", "metadata": {}, "source": [ "The following converts a ``Series`` object (of ``str``) to a ``Series`` object. That contains ``numpy.datetime64`` objects - which in turn go well together with any of ``pandas.Timestamp`` and ``datetime``." ] }, { "cell_type": "markdown", "id": "88ebbe41", "metadata": {}, "source": [ "#### Conversion" ] }, { "cell_type": "code", "execution_count": 20, "id": "2cca31c4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 2014-12-01\n", "1 2014-10-01\n", "2 2014-06-01\n", "3 2016-11-01\n", "4 2015-12-01\n", " ... \n", "1068 2015-12-01\n", "1069 2015-01-01\n", "1070 2016-05-01\n", "1071 2015-12-01\n", "1072 2015-07-01\n", "Name: DateDATE, Length: 1073, dtype: datetime64[ns]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.to_datetime(model['DateDATE'])" ] }, { "cell_type": "markdown", "id": "1d736699", "metadata": {}, "source": [ "Long story short: convert damn column!" ] }, { "cell_type": "code", "execution_count": 21, "id": "c605814e", "metadata": {}, "outputs": [], "source": [ "model['DateDATE'] = pd.to_datetime(model['DateDATE'])" ] }, { "cell_type": "code", "execution_count": 22, "id": "f18d72f1", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AccountIdAccountDescriptionStoreIdStoreDescriptionLocationIdLocationDescriptionProductIdProductDescriptionSales_ManagerIdSales_ManagerDescriptionDateDATEDateYearDateQuarterDateMonthDateDAYCategoryVersionCategoryCategoryCategoryTypeValueValueUnit
0Original_Sales_PriceOriginal Sales PriceST43franchescini HandCT14SalemPD23IPASM1Janet Bury2014-12-012014201442014121ActualActualspublic1779.763910USD
1Original_Sales_PriceOriginal Sales PriceST35Quality ShoppingCT14SalemPD2PepsiSM4Nancy Miller2014-10-012014201442014101ActualActualspublic130.957113USD
2Original_Sales_PriceOriginal Sales PriceST98Walter Store IncCT14SalemPD24AmberSM1Janet Bury2014-06-012014201422014061ActualActualspublic10226.449395USD
3Original_Sales_PriceOriginal Sales PriceST188Arrowhead Corner StoreCT1Los AngelesPD19Coconut WaterSM8Gabriel Walton2016-11-012016201642016111ActualActualspublic383.078689USD
4Original_Sales_PriceOriginal Sales PriceST107Spire MarketCT5San JosePD15Pineapple JuiceSM5David Carl2015-12-012015201542015121ActualActualspublic3110.303679USD
\n", "
" ], "text/plain": [ " AccountId AccountDescription StoreId StoreDescription \\\n", "0 Original_Sales_Price Original Sales Price ST43 franchescini Hand \n", "1 Original_Sales_Price Original Sales Price ST35 Quality Shopping \n", "2 Original_Sales_Price Original Sales Price ST98 Walter Store Inc \n", "3 Original_Sales_Price Original Sales Price ST188 Arrowhead Corner Store \n", "4 Original_Sales_Price Original Sales Price ST107 Spire Market \n", "\n", " LocationId LocationDescription ProductId ProductDescription Sales_ManagerId \\\n", "0 CT14 Salem PD23 IPA SM1 \n", "1 CT14 Salem PD2 Pepsi SM4 \n", "2 CT14 Salem PD24 Amber SM1 \n", "3 CT1 Los Angeles PD19 Coconut Water SM8 \n", "4 CT5 San Jose PD15 Pineapple Juice SM5 \n", "\n", " Sales_ManagerDescription DateDATE DateYear DateQuarter DateMonth \\\n", "0 Janet Bury 2014-12-01 2014 20144 201412 \n", "1 Nancy Miller 2014-10-01 2014 20144 201410 \n", "2 Janet Bury 2014-06-01 2014 20142 201406 \n", "3 Gabriel Walton 2016-11-01 2016 20164 201611 \n", "4 David Carl 2015-12-01 2015 20154 201512 \n", "\n", " DateDAY CategoryVersion CategoryCategory CategoryType Value \\\n", "0 1 Actual Actuals public 1779.763910 \n", "1 1 Actual Actuals public 130.957113 \n", "2 1 Actual Actuals public 10226.449395 \n", "3 1 Actual Actuals public 383.078689 \n", "4 1 Actual Actuals public 3110.303679 \n", "\n", " ValueUnit \n", "0 USD \n", "1 USD \n", "2 USD \n", "3 USD \n", "4 USD " ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.head(5)" ] }, { "cell_type": "markdown", "id": "63be6668", "metadata": {}, "source": [ "#### Usage: A Filter" ] }, { "cell_type": "markdown", "id": "d3a5fa29", "metadata": {}, "source": [ "Now we are in a position to define a filter that works with *time* (and not with *strings*)" ] }, { "cell_type": "code", "execution_count": 23, "id": "21690a38", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Timestamp('2015-12-04 00:00:00')" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ref_2015_12_04 = pd.to_datetime('2015-12-04') # or use datetime.datetime()\n", "ref_2015_12_04" ] }, { "cell_type": "code", "execution_count": 24, "id": "afe633ab", "metadata": {}, "outputs": [], "source": [ "flt_before_2015_12_04 = model['DateDATE'] < ref_2015_12_04" ] }, { "cell_type": "code", "execution_count": 25, "id": "13d79149", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 True\n", "1 True\n", "2 True\n", "3 False\n", "4 True\n", " ... \n", "1068 True\n", "1069 True\n", "1070 False\n", "1071 True\n", "1072 True\n", "Name: DateDATE, Length: 1073, dtype: bool" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "flt_before_2015_12_04" ] }, { "cell_type": "markdown", "id": "a79c2159", "metadata": {}, "source": [ "Select row based on filter" ] }, { "cell_type": "code", "execution_count": 26, "id": "44c1481d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AccountIdAccountDescriptionStoreIdStoreDescriptionLocationIdLocationDescriptionProductIdProductDescriptionSales_ManagerIdSales_ManagerDescriptionDateDATEDateYearDateQuarterDateMonthDateDAYCategoryVersionCategoryCategoryCategoryTypeValueValueUnit
0Original_Sales_PriceOriginal Sales PriceST43franchescini HandCT14SalemPD23IPASM1Janet Bury2014-12-012014201442014121ActualActualspublic1779.763910USD
1Original_Sales_PriceOriginal Sales PriceST35Quality ShoppingCT14SalemPD2PepsiSM4Nancy Miller2014-10-012014201442014101ActualActualspublic130.957113USD
2Original_Sales_PriceOriginal Sales PriceST98Walter Store IncCT14SalemPD24AmberSM1Janet Bury2014-06-012014201422014061ActualActualspublic10226.449395USD
4Original_Sales_PriceOriginal Sales PriceST107Spire MarketCT5San JosePD15Pineapple JuiceSM5David Carl2015-12-012015201542015121ActualActualspublic3110.303679USD
6Original_Sales_PriceOriginal Sales PriceST119Pay N GoCT2San FranciscoPD11Orange no pulpSM7Kiran Raj2015-03-012015201512015031ActualActualspublic548.226847USD
\n", "
" ], "text/plain": [ " AccountId AccountDescription StoreId StoreDescription \\\n", "0 Original_Sales_Price Original Sales Price ST43 franchescini Hand \n", "1 Original_Sales_Price Original Sales Price ST35 Quality Shopping \n", "2 Original_Sales_Price Original Sales Price ST98 Walter Store Inc \n", "4 Original_Sales_Price Original Sales Price ST107 Spire Market \n", "6 Original_Sales_Price Original Sales Price ST119 Pay N Go \n", "\n", " LocationId LocationDescription ProductId ProductDescription Sales_ManagerId \\\n", "0 CT14 Salem PD23 IPA SM1 \n", "1 CT14 Salem PD2 Pepsi SM4 \n", "2 CT14 Salem PD24 Amber SM1 \n", "4 CT5 San Jose PD15 Pineapple Juice SM5 \n", "6 CT2 San Francisco PD11 Orange no pulp SM7 \n", "\n", " Sales_ManagerDescription DateDATE DateYear DateQuarter DateMonth \\\n", "0 Janet Bury 2014-12-01 2014 20144 201412 \n", "1 Nancy Miller 2014-10-01 2014 20144 201410 \n", "2 Janet Bury 2014-06-01 2014 20142 201406 \n", "4 David Carl 2015-12-01 2015 20154 201512 \n", "6 Kiran Raj 2015-03-01 2015 20151 201503 \n", "\n", " DateDAY CategoryVersion CategoryCategory CategoryType Value \\\n", "0 1 Actual Actuals public 1779.763910 \n", "1 1 Actual Actuals public 130.957113 \n", "2 1 Actual Actuals public 10226.449395 \n", "4 1 Actual Actuals public 3110.303679 \n", "6 1 Actual Actuals public 548.226847 \n", "\n", " ValueUnit \n", "0 USD \n", "1 USD \n", "2 USD \n", "4 USD \n", "6 USD " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rows_before_2015_12_04 = model.loc[flt_before_2015_12_04]\n", "rows_before_2015_12_04.head(5)" ] }, { "cell_type": "markdown", "id": "f7ab4afb", "metadata": {}, "source": [ "## Modify Rows: Increase All Prices Of One Particular Store By 10%" ] }, { "cell_type": "markdown", "id": "cc41d5ee", "metadata": {}, "source": [ "Let store be ``ST43``, and increment ``Value`` by 10%" ] }, { "cell_type": "markdown", "id": "b7dab77f", "metadata": {}, "source": [ "See how many different products the store has ..." ] }, { "cell_type": "code", "execution_count": 27, "id": "153c1e79", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False 1069\n", "True 4\n", "Name: StoreId, dtype: int64" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(model['StoreId'] == 'ST43').value_counts()" ] }, { "cell_type": "markdown", "id": "e5e67e15", "metadata": {}, "source": [ "Select prices (column ``Value``) of those" ] }, { "cell_type": "code", "execution_count": 28, "id": "dda0ccb4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 1779.763910\n", "465 13627.793342\n", "782 3427.896898\n", "917 15569.353983\n", "Name: Value, dtype: float64" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.loc[model['StoreId'] == 'ST43', 'Value']" ] }, { "cell_type": "markdown", "id": "7c5b425f", "metadata": {}, "source": [ "Create a ``Series`` which contains increases prices" ] }, { "cell_type": "code", "execution_count": 29, "id": "eb47c1cf", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 1957.740301\n", "465 14990.572676\n", "782 3770.686587\n", "917 17126.289381\n", "Name: Value, dtype: float64" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.loc[model['StoreId'] == 'ST43', 'Value'] * 1.1" ] }, { "cell_type": "markdown", "id": "aa043e2a", "metadata": {}, "source": [ "Assign increased prices back into frame" ] }, { "cell_type": "code", "execution_count": 30, "id": "0f3ff18f", "metadata": {}, "outputs": [], "source": [ "model.loc[model['StoreId'] == 'ST43', 'Value'] = model.loc[model['StoreId'] == 'ST43', 'Value'] * 1.1" ] }, { "cell_type": "code", "execution_count": 31, "id": "4d699670", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AccountIdAccountDescriptionStoreIdStoreDescriptionLocationIdLocationDescriptionProductIdProductDescriptionSales_ManagerIdSales_ManagerDescriptionDateDATEDateYearDateQuarterDateMonthDateDAYCategoryVersionCategoryCategoryCategoryTypeValueValueUnit
0Original_Sales_PriceOriginal Sales PriceST43franchescini HandCT14SalemPD23IPASM1Janet Bury2014-12-012014201442014121ActualActualspublic1957.740301USD
1Original_Sales_PriceOriginal Sales PriceST35Quality ShoppingCT14SalemPD2PepsiSM4Nancy Miller2014-10-012014201442014101ActualActualspublic130.957113USD
2Original_Sales_PriceOriginal Sales PriceST98Walter Store IncCT14SalemPD24AmberSM1Janet Bury2014-06-012014201422014061ActualActualspublic10226.449395USD
3Original_Sales_PriceOriginal Sales PriceST188Arrowhead Corner StoreCT1Los AngelesPD19Coconut WaterSM8Gabriel Walton2016-11-012016201642016111ActualActualspublic383.078689USD
4Original_Sales_PriceOriginal Sales PriceST107Spire MarketCT5San JosePD15Pineapple JuiceSM5David Carl2015-12-012015201542015121ActualActualspublic3110.303679USD
\n", "
" ], "text/plain": [ " AccountId AccountDescription StoreId StoreDescription \\\n", "0 Original_Sales_Price Original Sales Price ST43 franchescini Hand \n", "1 Original_Sales_Price Original Sales Price ST35 Quality Shopping \n", "2 Original_Sales_Price Original Sales Price ST98 Walter Store Inc \n", "3 Original_Sales_Price Original Sales Price ST188 Arrowhead Corner Store \n", "4 Original_Sales_Price Original Sales Price ST107 Spire Market \n", "\n", " LocationId LocationDescription ProductId ProductDescription Sales_ManagerId \\\n", "0 CT14 Salem PD23 IPA SM1 \n", "1 CT14 Salem PD2 Pepsi SM4 \n", "2 CT14 Salem PD24 Amber SM1 \n", "3 CT1 Los Angeles PD19 Coconut Water SM8 \n", "4 CT5 San Jose PD15 Pineapple Juice SM5 \n", "\n", " Sales_ManagerDescription DateDATE DateYear DateQuarter DateMonth \\\n", "0 Janet Bury 2014-12-01 2014 20144 201412 \n", "1 Nancy Miller 2014-10-01 2014 20144 201410 \n", "2 Janet Bury 2014-06-01 2014 20142 201406 \n", "3 Gabriel Walton 2016-11-01 2016 20164 201611 \n", "4 David Carl 2015-12-01 2015 20154 201512 \n", "\n", " DateDAY CategoryVersion CategoryCategory CategoryType Value \\\n", "0 1 Actual Actuals public 1957.740301 \n", "1 1 Actual Actuals public 130.957113 \n", "2 1 Actual Actuals public 10226.449395 \n", "3 1 Actual Actuals public 383.078689 \n", "4 1 Actual Actuals public 3110.303679 \n", "\n", " ValueUnit \n", "0 USD \n", "1 USD \n", "2 USD \n", "3 USD \n", "4 USD " ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.head(5)" ] }, { "cell_type": "markdown", "id": "9a8ea240", "metadata": {}, "source": [ "## Filter Readability: Combining Filters" ] }, { "cell_type": "code", "execution_count": 32, "id": "989e17e4", "metadata": {}, "outputs": [], "source": [ "flt_product_pd2 = (model['ProductId'] == 'PD2') # only product PD2" ] }, { "cell_type": "code", "execution_count": 33, "id": "92f4d07c", "metadata": {}, "outputs": [], "source": [ "flt_2015 = (model['DateDATE'] < pd.to_datetime('2016')) & \\\n", " (model['DateDATE'] >= pd.to_datetime('2015')) # only rows from 2015" ] }, { "cell_type": "code", "execution_count": 34, "id": "8c0b3aa2", "metadata": {}, "outputs": [], "source": [ "flt_only_sold = (model['AccountId'] == 'Quantity_sold') # only \"sold\" type records" ] }, { "cell_type": "code", "execution_count": 35, "id": "b1034749", "metadata": {}, "outputs": [], "source": [ "flt_combined = flt_product_pd2 & flt_2015 & flt_only_sold" ] }, { "cell_type": "code", "execution_count": 36, "id": "c4ef1f2e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AccountIdAccountDescriptionStoreIdStoreDescriptionLocationIdLocationDescriptionProductIdProductDescriptionSales_ManagerIdSales_ManagerDescriptionDateDATEDateYearDateQuarterDateMonthDateDAYCategoryVersionCategoryCategoryCategoryTypeValueValueUnit
160Quantity_soldQuantity soldST139FarlosCT15EugenePD2PepsiSM11James Frank2015-11-012015201542015111ActualActualspublic6858.0NaN
828Quantity_soldQuantity soldST178Rays Super FoodsCT8Beverly HillsPD2PepsiSM4Nancy Miller2015-08-012015201532015081ActualActualspublic200.0NaN
\n", "
" ], "text/plain": [ " AccountId AccountDescription StoreId StoreDescription LocationId \\\n", "160 Quantity_sold Quantity sold ST139 Farlos CT15 \n", "828 Quantity_sold Quantity sold ST178 Rays Super Foods CT8 \n", "\n", " LocationDescription ProductId ProductDescription Sales_ManagerId \\\n", "160 Eugene PD2 Pepsi SM11 \n", "828 Beverly Hills PD2 Pepsi SM4 \n", "\n", " Sales_ManagerDescription DateDATE DateYear DateQuarter DateMonth \\\n", "160 James Frank 2015-11-01 2015 20154 201511 \n", "828 Nancy Miller 2015-08-01 2015 20153 201508 \n", "\n", " DateDAY CategoryVersion CategoryCategory CategoryType Value ValueUnit \n", "160 1 Actual Actuals public 6858.0 NaN \n", "828 1 Actual Actuals public 200.0 NaN " ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.loc[flt_combined]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.7" } }, "nbformat": 4, "nbformat_minor": 5 }