{
"cells": [
{
"cell_type": "markdown",
"id": "2913c9d5",
"metadata": {},
"source": [
"# Some Usecases On A Test Dataset From SAP"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "8f816aaf",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "d14f9562",
"metadata": {},
"outputs": [],
"source": [
"model = pd.read_csv('sap-dataset.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "d3f79565",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" AccountId | \n",
" AccountDescription | \n",
" AccountParentId | \n",
" StoreId | \n",
" StoreDescription | \n",
" LocationId | \n",
" LocationDescription | \n",
" ProductId | \n",
" ProductDescription | \n",
" Sales_ManagerId | \n",
" ... | \n",
" DateDATE | \n",
" DateYear | \n",
" DateQuarter | \n",
" DateMonth | \n",
" DateDAY | \n",
" CategoryVersion | \n",
" CategoryCategory | \n",
" CategoryType | \n",
" Value | \n",
" ValueUnit | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" NaN | \n",
" ST43 | \n",
" franchescini Hand | \n",
" CT14 | \n",
" Salem | \n",
" PD23 | \n",
" IPA | \n",
" SM1 | \n",
" ... | \n",
" 2014-12-01 | \n",
" 2014 | \n",
" 20144 | \n",
" 201412 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 1779.763910 | \n",
" USD | \n",
"
\n",
" \n",
" 1 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" NaN | \n",
" ST35 | \n",
" Quality Shopping | \n",
" CT14 | \n",
" Salem | \n",
" PD2 | \n",
" Pepsi | \n",
" SM4 | \n",
" ... | \n",
" 2014-10-01 | \n",
" 2014 | \n",
" 20144 | \n",
" 201410 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 130.957113 | \n",
" USD | \n",
"
\n",
" \n",
" 2 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" NaN | \n",
" ST98 | \n",
" Walter Store Inc | \n",
" CT14 | \n",
" Salem | \n",
" PD24 | \n",
" Amber | \n",
" SM1 | \n",
" ... | \n",
" 2014-06-01 | \n",
" 2014 | \n",
" 20142 | \n",
" 201406 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 10226.449395 | \n",
" USD | \n",
"
\n",
" \n",
" 3 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" NaN | \n",
" ST188 | \n",
" Arrowhead Corner Store | \n",
" CT1 | \n",
" Los Angeles | \n",
" PD19 | \n",
" Coconut Water | \n",
" SM8 | \n",
" ... | \n",
" 2016-11-01 | \n",
" 2016 | \n",
" 20164 | \n",
" 201611 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 383.078689 | \n",
" USD | \n",
"
\n",
" \n",
" 4 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" NaN | \n",
" ST107 | \n",
" Spire Market | \n",
" CT5 | \n",
" San Jose | \n",
" PD15 | \n",
" Pineapple Juice | \n",
" SM5 | \n",
" ... | \n",
" 2015-12-01 | \n",
" 2015 | \n",
" 20154 | \n",
" 201512 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 3110.303679 | \n",
" USD | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 1068 | \n",
" Quantity_sold | \n",
" Quantity sold | \n",
" NaN | \n",
" ST4 | \n",
" Gstore | \n",
" CT13 | \n",
" Portland | \n",
" PD26 | \n",
" Red Wine | \n",
" SM4 | \n",
" ... | \n",
" 2015-12-01 | \n",
" 2015 | \n",
" 20154 | \n",
" 201512 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 250.000000 | \n",
" NaN | \n",
"
\n",
" \n",
" 1069 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" NaN | \n",
" ST49 | \n",
" English Bay depot | \n",
" CT4 | \n",
" Sacramento | \n",
" PD27 | \n",
" White Wine | \n",
" SM5 | \n",
" ... | \n",
" 2015-01-01 | \n",
" 2015 | \n",
" 20151 | \n",
" 201501 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 11348.592904 | \n",
" USD | \n",
"
\n",
" \n",
" 1070 | \n",
" Discount | \n",
" Discount | \n",
" NaN | \n",
" ST36 | \n",
" PriceLow Arena | \n",
" CT10 | \n",
" Reno | \n",
" PD22 | \n",
" Lager | \n",
" SM3 | \n",
" ... | \n",
" 2016-05-01 | \n",
" 2016 | \n",
" 20162 | \n",
" 201605 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 4351.045830 | \n",
" USD | \n",
"
\n",
" \n",
" 1071 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" NaN | \n",
" ST7 | \n",
" Main Street Co | \n",
" CT4 | \n",
" Sacramento | \n",
" PD16 | \n",
" Watermelon Juice | \n",
" SM5 | \n",
" ... | \n",
" 2015-12-01 | \n",
" 2015 | \n",
" 20154 | \n",
" 201512 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 7905.590606 | \n",
" USD | \n",
"
\n",
" \n",
" 1072 | \n",
" Quantity_sold | \n",
" Quantity sold | \n",
" NaN | \n",
" ST98 | \n",
" Walter Store Inc | \n",
" CT14 | \n",
" Salem | \n",
" PD12 | \n",
" Lemonade | \n",
" SM1 | \n",
" ... | \n",
" 2015-07-01 | \n",
" 2015 | \n",
" 20153 | \n",
" 201507 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 100.000000 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
1073 rows × 21 columns
\n",
"
"
],
"text/plain": [
" AccountId AccountDescription AccountParentId StoreId \\\n",
"0 Original_Sales_Price Original Sales Price NaN ST43 \n",
"1 Original_Sales_Price Original Sales Price NaN ST35 \n",
"2 Original_Sales_Price Original Sales Price NaN ST98 \n",
"3 Original_Sales_Price Original Sales Price NaN ST188 \n",
"4 Original_Sales_Price Original Sales Price NaN ST107 \n",
"... ... ... ... ... \n",
"1068 Quantity_sold Quantity sold NaN ST4 \n",
"1069 Original_Sales_Price Original Sales Price NaN ST49 \n",
"1070 Discount Discount NaN ST36 \n",
"1071 Original_Sales_Price Original Sales Price NaN ST7 \n",
"1072 Quantity_sold Quantity sold NaN ST98 \n",
"\n",
" StoreDescription LocationId LocationDescription ProductId \\\n",
"0 franchescini Hand CT14 Salem PD23 \n",
"1 Quality Shopping CT14 Salem PD2 \n",
"2 Walter Store Inc CT14 Salem PD24 \n",
"3 Arrowhead Corner Store CT1 Los Angeles PD19 \n",
"4 Spire Market CT5 San Jose PD15 \n",
"... ... ... ... ... \n",
"1068 Gstore CT13 Portland PD26 \n",
"1069 English Bay depot CT4 Sacramento PD27 \n",
"1070 PriceLow Arena CT10 Reno PD22 \n",
"1071 Main Street Co CT4 Sacramento PD16 \n",
"1072 Walter Store Inc CT14 Salem PD12 \n",
"\n",
" ProductDescription Sales_ManagerId ... DateDATE DateYear \\\n",
"0 IPA SM1 ... 2014-12-01 2014 \n",
"1 Pepsi SM4 ... 2014-10-01 2014 \n",
"2 Amber SM1 ... 2014-06-01 2014 \n",
"3 Coconut Water SM8 ... 2016-11-01 2016 \n",
"4 Pineapple Juice SM5 ... 2015-12-01 2015 \n",
"... ... ... ... ... ... \n",
"1068 Red Wine SM4 ... 2015-12-01 2015 \n",
"1069 White Wine SM5 ... 2015-01-01 2015 \n",
"1070 Lager SM3 ... 2016-05-01 2016 \n",
"1071 Watermelon Juice SM5 ... 2015-12-01 2015 \n",
"1072 Lemonade SM1 ... 2015-07-01 2015 \n",
"\n",
" DateQuarter DateMonth DateDAY CategoryVersion CategoryCategory \\\n",
"0 20144 201412 1 Actual Actuals \n",
"1 20144 201410 1 Actual Actuals \n",
"2 20142 201406 1 Actual Actuals \n",
"3 20164 201611 1 Actual Actuals \n",
"4 20154 201512 1 Actual Actuals \n",
"... ... ... ... ... ... \n",
"1068 20154 201512 1 Actual Actuals \n",
"1069 20151 201501 1 Actual Actuals \n",
"1070 20162 201605 1 Actual Actuals \n",
"1071 20154 201512 1 Actual Actuals \n",
"1072 20153 201507 1 Actual Actuals \n",
"\n",
" CategoryType Value ValueUnit \n",
"0 public 1779.763910 USD \n",
"1 public 130.957113 USD \n",
"2 public 10226.449395 USD \n",
"3 public 383.078689 USD \n",
"4 public 3110.303679 USD \n",
"... ... ... ... \n",
"1068 public 250.000000 NaN \n",
"1069 public 11348.592904 USD \n",
"1070 public 4351.045830 USD \n",
"1071 public 7905.590606 USD \n",
"1072 public 100.000000 NaN \n",
"\n",
"[1073 rows x 21 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model"
]
},
{
"cell_type": "markdown",
"id": "642b7955",
"metadata": {},
"source": [
"## Dropping Columns That Contain Only NaN Values"
]
},
{
"cell_type": "markdown",
"id": "e5e63075",
"metadata": {},
"source": [
"``DataFrame.drop()`` accepts a ``inplace`` parameter.\n",
"\n",
"* When not given, ``drop()`` returns a modified *copy* of the frame\n",
"* When given, it modified the frame in place"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "9e9f22c3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" AccountId | \n",
" AccountDescription | \n",
" StoreId | \n",
" StoreDescription | \n",
" LocationId | \n",
" LocationDescription | \n",
" ProductId | \n",
" ProductDescription | \n",
" Sales_ManagerId | \n",
" Sales_ManagerDescription | \n",
" DateDATE | \n",
" DateYear | \n",
" DateQuarter | \n",
" DateMonth | \n",
" DateDAY | \n",
" CategoryVersion | \n",
" CategoryCategory | \n",
" CategoryType | \n",
" Value | \n",
" ValueUnit | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST43 | \n",
" franchescini Hand | \n",
" CT14 | \n",
" Salem | \n",
" PD23 | \n",
" IPA | \n",
" SM1 | \n",
" Janet Bury | \n",
" 2014-12-01 | \n",
" 2014 | \n",
" 20144 | \n",
" 201412 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 1779.763910 | \n",
" USD | \n",
"
\n",
" \n",
" 1 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST35 | \n",
" Quality Shopping | \n",
" CT14 | \n",
" Salem | \n",
" PD2 | \n",
" Pepsi | \n",
" SM4 | \n",
" Nancy Miller | \n",
" 2014-10-01 | \n",
" 2014 | \n",
" 20144 | \n",
" 201410 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 130.957113 | \n",
" USD | \n",
"
\n",
" \n",
" 2 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST98 | \n",
" Walter Store Inc | \n",
" CT14 | \n",
" Salem | \n",
" PD24 | \n",
" Amber | \n",
" SM1 | \n",
" Janet Bury | \n",
" 2014-06-01 | \n",
" 2014 | \n",
" 20142 | \n",
" 201406 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 10226.449395 | \n",
" USD | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" AccountId AccountDescription StoreId StoreDescription \\\n",
"0 Original_Sales_Price Original Sales Price ST43 franchescini Hand \n",
"1 Original_Sales_Price Original Sales Price ST35 Quality Shopping \n",
"2 Original_Sales_Price Original Sales Price ST98 Walter Store Inc \n",
"\n",
" LocationId LocationDescription ProductId ProductDescription Sales_ManagerId \\\n",
"0 CT14 Salem PD23 IPA SM1 \n",
"1 CT14 Salem PD2 Pepsi SM4 \n",
"2 CT14 Salem PD24 Amber SM1 \n",
"\n",
" Sales_ManagerDescription DateDATE DateYear DateQuarter DateMonth \\\n",
"0 Janet Bury 2014-12-01 2014 20144 201412 \n",
"1 Nancy Miller 2014-10-01 2014 20144 201410 \n",
"2 Janet Bury 2014-06-01 2014 20142 201406 \n",
"\n",
" DateDAY CategoryVersion CategoryCategory CategoryType Value \\\n",
"0 1 Actual Actuals public 1779.763910 \n",
"1 1 Actual Actuals public 130.957113 \n",
"2 1 Actual Actuals public 10226.449395 \n",
"\n",
" ValueUnit \n",
"0 USD \n",
"1 USD \n",
"2 USD "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"modified_model = model.drop(model.columns[model.isna().all()], axis='columns')\n",
"modified_model.head(3)"
]
},
{
"cell_type": "markdown",
"id": "50794e0b",
"metadata": {},
"source": [
"Apparently the ``AccountParentId`` column has been dropped. So how did this work? Lets dissect this setp by step."
]
},
{
"cell_type": "markdown",
"id": "4ddefc0d",
"metadata": {},
"source": [
"### ``DataFrame.isna()``"
]
},
{
"cell_type": "markdown",
"id": "98de2572",
"metadata": {},
"source": [
"``isna()`` on a frame returns an equally shaped matrix, only with ``bool`` values representing the ``Nan``-ness of the respective field in the original frame"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "7612fda7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" AccountId | \n",
" AccountDescription | \n",
" AccountParentId | \n",
" StoreId | \n",
" StoreDescription | \n",
" LocationId | \n",
" LocationDescription | \n",
" ProductId | \n",
" ProductDescription | \n",
" Sales_ManagerId | \n",
" ... | \n",
" DateDATE | \n",
" DateYear | \n",
" DateQuarter | \n",
" DateMonth | \n",
" DateDAY | \n",
" CategoryVersion | \n",
" CategoryCategory | \n",
" CategoryType | \n",
" Value | \n",
" ValueUnit | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" False | \n",
" False | \n",
" True | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" ... | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 1 | \n",
" False | \n",
" False | \n",
" True | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" ... | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 2 | \n",
" False | \n",
" False | \n",
" True | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" ... | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 3 | \n",
" False | \n",
" False | \n",
" True | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" ... | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 4 | \n",
" False | \n",
" False | \n",
" True | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" ... | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 21 columns
\n",
"
"
],
"text/plain": [
" AccountId AccountDescription AccountParentId StoreId StoreDescription \\\n",
"0 False False True False False \n",
"1 False False True False False \n",
"2 False False True False False \n",
"3 False False True False False \n",
"4 False False True False False \n",
"\n",
" LocationId LocationDescription ProductId ProductDescription \\\n",
"0 False False False False \n",
"1 False False False False \n",
"2 False False False False \n",
"3 False False False False \n",
"4 False False False False \n",
"\n",
" Sales_ManagerId ... DateDATE DateYear DateQuarter DateMonth DateDAY \\\n",
"0 False ... False False False False False \n",
"1 False ... False False False False False \n",
"2 False ... False False False False False \n",
"3 False ... False False False False False \n",
"4 False ... False False False False False \n",
"\n",
" CategoryVersion CategoryCategory CategoryType Value ValueUnit \n",
"0 False False False False False \n",
"1 False False False False False \n",
"2 False False False False False \n",
"3 False False False False False \n",
"4 False False False False False \n",
"\n",
"[5 rows x 21 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nan_fields = model.isna()\n",
"nan_fields.head(5)"
]
},
{
"cell_type": "markdown",
"id": "09ec3fa7",
"metadata": {},
"source": [
"### ``DataFrame.all()`` (and ``any()``)"
]
},
{
"cell_type": "markdown",
"id": "dab18bde",
"metadata": {},
"source": [
"Returns a bitfield (err, a ``bool`` type ``Series`` object) to indicate whether *all* elements are ``True``. By default, it operates on *columns* - *all* elements of a *column* are ``True``."
]
},
{
"cell_type": "markdown",
"id": "86036453",
"metadata": {},
"source": [
"The companion method, ``any()``, indicates whether there is *at least one* element whose value is ``True``. "
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "ecc29f7e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"AccountId False\n",
"AccountDescription False\n",
"AccountParentId True\n",
"StoreId False\n",
"StoreDescription False\n",
"LocationId False\n",
"LocationDescription False\n",
"ProductId False\n",
"ProductDescription False\n",
"Sales_ManagerId False\n",
"Sales_ManagerDescription False\n",
"DateDATE False\n",
"DateYear False\n",
"DateQuarter False\n",
"DateMonth False\n",
"DateDAY False\n",
"CategoryVersion False\n",
"CategoryCategory False\n",
"CategoryType False\n",
"Value False\n",
"ValueUnit False\n",
"dtype: bool"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nan_fields.all()"
]
},
{
"cell_type": "markdown",
"id": "46a01112",
"metadata": {},
"source": [
"### Column Selection"
]
},
{
"cell_type": "markdown",
"id": "ab4137e0",
"metadata": {},
"source": [
"A bitfied applied to a ``Series`` (using the ``[]`` operator of ``Series``) selects the elements.\n",
"jjj link to **filters**"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "27492288",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['AccountId', 'AccountDescription', 'AccountParentId', 'StoreId',\n",
" 'StoreDescription', 'LocationId', 'LocationDescription', 'ProductId',\n",
" 'ProductDescription', 'Sales_ManagerId', 'Sales_ManagerDescription',\n",
" 'DateDATE', 'DateYear', 'DateQuarter', 'DateMonth', 'DateDAY',\n",
" 'CategoryVersion', 'CategoryCategory', 'CategoryType', 'Value',\n",
" 'ValueUnit'],\n",
" dtype='object')"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.columns"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "bf7c6719",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['AccountParentId'], dtype='object')"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nan_columns = model.columns[model.isna().all()]\n",
"nan_columns"
]
},
{
"cell_type": "markdown",
"id": "be032616",
"metadata": {},
"source": [
"### Dropping Columns"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "2a65f5ae",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" AccountId | \n",
" AccountDescription | \n",
" StoreId | \n",
" StoreDescription | \n",
" LocationId | \n",
" LocationDescription | \n",
" ProductId | \n",
" ProductDescription | \n",
" Sales_ManagerId | \n",
" Sales_ManagerDescription | \n",
" DateDATE | \n",
" DateYear | \n",
" DateQuarter | \n",
" DateMonth | \n",
" DateDAY | \n",
" CategoryVersion | \n",
" CategoryCategory | \n",
" CategoryType | \n",
" Value | \n",
" ValueUnit | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST43 | \n",
" franchescini Hand | \n",
" CT14 | \n",
" Salem | \n",
" PD23 | \n",
" IPA | \n",
" SM1 | \n",
" Janet Bury | \n",
" 2014-12-01 | \n",
" 2014 | \n",
" 20144 | \n",
" 201412 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 1779.763910 | \n",
" USD | \n",
"
\n",
" \n",
" 1 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST35 | \n",
" Quality Shopping | \n",
" CT14 | \n",
" Salem | \n",
" PD2 | \n",
" Pepsi | \n",
" SM4 | \n",
" Nancy Miller | \n",
" 2014-10-01 | \n",
" 2014 | \n",
" 20144 | \n",
" 201410 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 130.957113 | \n",
" USD | \n",
"
\n",
" \n",
" 2 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST98 | \n",
" Walter Store Inc | \n",
" CT14 | \n",
" Salem | \n",
" PD24 | \n",
" Amber | \n",
" SM1 | \n",
" Janet Bury | \n",
" 2014-06-01 | \n",
" 2014 | \n",
" 20142 | \n",
" 201406 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 10226.449395 | \n",
" USD | \n",
"
\n",
" \n",
" 3 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST188 | \n",
" Arrowhead Corner Store | \n",
" CT1 | \n",
" Los Angeles | \n",
" PD19 | \n",
" Coconut Water | \n",
" SM8 | \n",
" Gabriel Walton | \n",
" 2016-11-01 | \n",
" 2016 | \n",
" 20164 | \n",
" 201611 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 383.078689 | \n",
" USD | \n",
"
\n",
" \n",
" 4 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST107 | \n",
" Spire Market | \n",
" CT5 | \n",
" San Jose | \n",
" PD15 | \n",
" Pineapple Juice | \n",
" SM5 | \n",
" David Carl | \n",
" 2015-12-01 | \n",
" 2015 | \n",
" 20154 | \n",
" 201512 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 3110.303679 | \n",
" USD | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" AccountId AccountDescription StoreId StoreDescription \\\n",
"0 Original_Sales_Price Original Sales Price ST43 franchescini Hand \n",
"1 Original_Sales_Price Original Sales Price ST35 Quality Shopping \n",
"2 Original_Sales_Price Original Sales Price ST98 Walter Store Inc \n",
"3 Original_Sales_Price Original Sales Price ST188 Arrowhead Corner Store \n",
"4 Original_Sales_Price Original Sales Price ST107 Spire Market \n",
"\n",
" LocationId LocationDescription ProductId ProductDescription Sales_ManagerId \\\n",
"0 CT14 Salem PD23 IPA SM1 \n",
"1 CT14 Salem PD2 Pepsi SM4 \n",
"2 CT14 Salem PD24 Amber SM1 \n",
"3 CT1 Los Angeles PD19 Coconut Water SM8 \n",
"4 CT5 San Jose PD15 Pineapple Juice SM5 \n",
"\n",
" Sales_ManagerDescription DateDATE DateYear DateQuarter DateMonth \\\n",
"0 Janet Bury 2014-12-01 2014 20144 201412 \n",
"1 Nancy Miller 2014-10-01 2014 20144 201410 \n",
"2 Janet Bury 2014-06-01 2014 20142 201406 \n",
"3 Gabriel Walton 2016-11-01 2016 20164 201611 \n",
"4 David Carl 2015-12-01 2015 20154 201512 \n",
"\n",
" DateDAY CategoryVersion CategoryCategory CategoryType Value \\\n",
"0 1 Actual Actuals public 1779.763910 \n",
"1 1 Actual Actuals public 130.957113 \n",
"2 1 Actual Actuals public 10226.449395 \n",
"3 1 Actual Actuals public 383.078689 \n",
"4 1 Actual Actuals public 3110.303679 \n",
"\n",
" ValueUnit \n",
"0 USD \n",
"1 USD \n",
"2 USD \n",
"3 USD \n",
"4 USD "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"modified_model = model.drop(model.columns[model.isna().all()], axis='columns')\n",
"modified_model.head(5)"
]
},
{
"cell_type": "markdown",
"id": "29f52d3a",
"metadata": {},
"source": [
"**Now for real:**"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "bb7791df",
"metadata": {},
"outputs": [],
"source": [
"model.drop(model.columns[model.isna().all()], axis='columns', inplace=True)"
]
},
{
"cell_type": "markdown",
"id": "8724ae31",
"metadata": {},
"source": [
"## Fixing Date Values/Columns, And Operating With Date And Time"
]
},
{
"cell_type": "markdown",
"id": "5b2fe935",
"metadata": {},
"source": [
"The ``DateDATE`` column contains string values. Convert it to contain real timestamps that one can work with, numerically (e.g. add and subtract)."
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "fca4bd05",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 2014-12-01\n",
"1 2014-10-01\n",
"2 2014-06-01\n",
"3 2016-11-01\n",
"4 2015-12-01\n",
" ... \n",
"1068 2015-12-01\n",
"1069 2015-01-01\n",
"1070 2016-05-01\n",
"1071 2015-12-01\n",
"1072 2015-07-01\n",
"Name: DateDATE, Length: 1073, dtype: object"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model['DateDATE']"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "ffecd01b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'2014-12-01'"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model['DateDATE'].iloc[0]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "da3d44d5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"str"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(model['DateDATE'].iloc[0])"
]
},
{
"cell_type": "markdown",
"id": "012d7b2a",
"metadata": {},
"source": [
"### ``pandas.to_datetime()``"
]
},
{
"cell_type": "markdown",
"id": "669510fd",
"metadata": {},
"source": [
"Converts anything to a working timestamp value (or a vectorized version thereof): a ``str``, a ``Series``, ..."
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "99c1625f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timestamp('2023-03-13 13:02:01')"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts = pd.to_datetime('2023-03-13 13:02:01') # str\n",
"ts"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "b4b4dc84",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timestamp('2023-03-13 00:00:00')"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.to_datetime('2023-03-13') # str"
]
},
{
"cell_type": "markdown",
"id": "41bbb578",
"metadata": {},
"source": [
"``pandas.Timestamp`` is compatible with ``datetime.datetime`` jjjjj linke to both"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "1b3cf134",
"metadata": {},
"outputs": [],
"source": [
"import datetime"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "fc2ec4e4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"datetime.datetime(2016, 3, 13, 0, 0)"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ref_ts = datetime.datetime(year=2016, month=3, day=13)\n",
"ref_ts"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "f452a153",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts < ref_ts"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "42f891b9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts > ref_ts"
]
},
{
"cell_type": "markdown",
"id": "16ef3cfe",
"metadata": {},
"source": [
"### Converting The Entire ``DateDATE`` Column To A Workable Type (And Working With It)"
]
},
{
"cell_type": "markdown",
"id": "73cdb433",
"metadata": {},
"source": [
"The following converts a ``Series`` object (of ``str``) to a ``Series`` object. That contains ``numpy.datetime64`` objects - which in turn go well together with any of ``pandas.Timestamp`` and ``datetime``."
]
},
{
"cell_type": "markdown",
"id": "88ebbe41",
"metadata": {},
"source": [
"#### Conversion"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "2cca31c4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 2014-12-01\n",
"1 2014-10-01\n",
"2 2014-06-01\n",
"3 2016-11-01\n",
"4 2015-12-01\n",
" ... \n",
"1068 2015-12-01\n",
"1069 2015-01-01\n",
"1070 2016-05-01\n",
"1071 2015-12-01\n",
"1072 2015-07-01\n",
"Name: DateDATE, Length: 1073, dtype: datetime64[ns]"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.to_datetime(model['DateDATE'])"
]
},
{
"cell_type": "markdown",
"id": "1d736699",
"metadata": {},
"source": [
"Long story short: convert damn column!"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "c605814e",
"metadata": {},
"outputs": [],
"source": [
"model['DateDATE'] = pd.to_datetime(model['DateDATE'])"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "f18d72f1",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" AccountId | \n",
" AccountDescription | \n",
" StoreId | \n",
" StoreDescription | \n",
" LocationId | \n",
" LocationDescription | \n",
" ProductId | \n",
" ProductDescription | \n",
" Sales_ManagerId | \n",
" Sales_ManagerDescription | \n",
" DateDATE | \n",
" DateYear | \n",
" DateQuarter | \n",
" DateMonth | \n",
" DateDAY | \n",
" CategoryVersion | \n",
" CategoryCategory | \n",
" CategoryType | \n",
" Value | \n",
" ValueUnit | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST43 | \n",
" franchescini Hand | \n",
" CT14 | \n",
" Salem | \n",
" PD23 | \n",
" IPA | \n",
" SM1 | \n",
" Janet Bury | \n",
" 2014-12-01 | \n",
" 2014 | \n",
" 20144 | \n",
" 201412 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 1779.763910 | \n",
" USD | \n",
"
\n",
" \n",
" 1 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST35 | \n",
" Quality Shopping | \n",
" CT14 | \n",
" Salem | \n",
" PD2 | \n",
" Pepsi | \n",
" SM4 | \n",
" Nancy Miller | \n",
" 2014-10-01 | \n",
" 2014 | \n",
" 20144 | \n",
" 201410 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 130.957113 | \n",
" USD | \n",
"
\n",
" \n",
" 2 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST98 | \n",
" Walter Store Inc | \n",
" CT14 | \n",
" Salem | \n",
" PD24 | \n",
" Amber | \n",
" SM1 | \n",
" Janet Bury | \n",
" 2014-06-01 | \n",
" 2014 | \n",
" 20142 | \n",
" 201406 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 10226.449395 | \n",
" USD | \n",
"
\n",
" \n",
" 3 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST188 | \n",
" Arrowhead Corner Store | \n",
" CT1 | \n",
" Los Angeles | \n",
" PD19 | \n",
" Coconut Water | \n",
" SM8 | \n",
" Gabriel Walton | \n",
" 2016-11-01 | \n",
" 2016 | \n",
" 20164 | \n",
" 201611 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 383.078689 | \n",
" USD | \n",
"
\n",
" \n",
" 4 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST107 | \n",
" Spire Market | \n",
" CT5 | \n",
" San Jose | \n",
" PD15 | \n",
" Pineapple Juice | \n",
" SM5 | \n",
" David Carl | \n",
" 2015-12-01 | \n",
" 2015 | \n",
" 20154 | \n",
" 201512 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 3110.303679 | \n",
" USD | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" AccountId AccountDescription StoreId StoreDescription \\\n",
"0 Original_Sales_Price Original Sales Price ST43 franchescini Hand \n",
"1 Original_Sales_Price Original Sales Price ST35 Quality Shopping \n",
"2 Original_Sales_Price Original Sales Price ST98 Walter Store Inc \n",
"3 Original_Sales_Price Original Sales Price ST188 Arrowhead Corner Store \n",
"4 Original_Sales_Price Original Sales Price ST107 Spire Market \n",
"\n",
" LocationId LocationDescription ProductId ProductDescription Sales_ManagerId \\\n",
"0 CT14 Salem PD23 IPA SM1 \n",
"1 CT14 Salem PD2 Pepsi SM4 \n",
"2 CT14 Salem PD24 Amber SM1 \n",
"3 CT1 Los Angeles PD19 Coconut Water SM8 \n",
"4 CT5 San Jose PD15 Pineapple Juice SM5 \n",
"\n",
" Sales_ManagerDescription DateDATE DateYear DateQuarter DateMonth \\\n",
"0 Janet Bury 2014-12-01 2014 20144 201412 \n",
"1 Nancy Miller 2014-10-01 2014 20144 201410 \n",
"2 Janet Bury 2014-06-01 2014 20142 201406 \n",
"3 Gabriel Walton 2016-11-01 2016 20164 201611 \n",
"4 David Carl 2015-12-01 2015 20154 201512 \n",
"\n",
" DateDAY CategoryVersion CategoryCategory CategoryType Value \\\n",
"0 1 Actual Actuals public 1779.763910 \n",
"1 1 Actual Actuals public 130.957113 \n",
"2 1 Actual Actuals public 10226.449395 \n",
"3 1 Actual Actuals public 383.078689 \n",
"4 1 Actual Actuals public 3110.303679 \n",
"\n",
" ValueUnit \n",
"0 USD \n",
"1 USD \n",
"2 USD \n",
"3 USD \n",
"4 USD "
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.head(5)"
]
},
{
"cell_type": "markdown",
"id": "63be6668",
"metadata": {},
"source": [
"#### Usage: A Filter"
]
},
{
"cell_type": "markdown",
"id": "d3a5fa29",
"metadata": {},
"source": [
"Now we are in a position to define a filter that works with *time* (and not with *strings*)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "21690a38",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timestamp('2015-12-04 00:00:00')"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ref_2015_12_04 = pd.to_datetime('2015-12-04') # or use datetime.datetime()\n",
"ref_2015_12_04"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "afe633ab",
"metadata": {},
"outputs": [],
"source": [
"flt_before_2015_12_04 = model['DateDATE'] < ref_2015_12_04"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "13d79149",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 True\n",
"1 True\n",
"2 True\n",
"3 False\n",
"4 True\n",
" ... \n",
"1068 True\n",
"1069 True\n",
"1070 False\n",
"1071 True\n",
"1072 True\n",
"Name: DateDATE, Length: 1073, dtype: bool"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"flt_before_2015_12_04"
]
},
{
"cell_type": "markdown",
"id": "a79c2159",
"metadata": {},
"source": [
"Select row based on filter"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "44c1481d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" AccountId | \n",
" AccountDescription | \n",
" StoreId | \n",
" StoreDescription | \n",
" LocationId | \n",
" LocationDescription | \n",
" ProductId | \n",
" ProductDescription | \n",
" Sales_ManagerId | \n",
" Sales_ManagerDescription | \n",
" DateDATE | \n",
" DateYear | \n",
" DateQuarter | \n",
" DateMonth | \n",
" DateDAY | \n",
" CategoryVersion | \n",
" CategoryCategory | \n",
" CategoryType | \n",
" Value | \n",
" ValueUnit | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST43 | \n",
" franchescini Hand | \n",
" CT14 | \n",
" Salem | \n",
" PD23 | \n",
" IPA | \n",
" SM1 | \n",
" Janet Bury | \n",
" 2014-12-01 | \n",
" 2014 | \n",
" 20144 | \n",
" 201412 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 1779.763910 | \n",
" USD | \n",
"
\n",
" \n",
" 1 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST35 | \n",
" Quality Shopping | \n",
" CT14 | \n",
" Salem | \n",
" PD2 | \n",
" Pepsi | \n",
" SM4 | \n",
" Nancy Miller | \n",
" 2014-10-01 | \n",
" 2014 | \n",
" 20144 | \n",
" 201410 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 130.957113 | \n",
" USD | \n",
"
\n",
" \n",
" 2 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST98 | \n",
" Walter Store Inc | \n",
" CT14 | \n",
" Salem | \n",
" PD24 | \n",
" Amber | \n",
" SM1 | \n",
" Janet Bury | \n",
" 2014-06-01 | \n",
" 2014 | \n",
" 20142 | \n",
" 201406 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 10226.449395 | \n",
" USD | \n",
"
\n",
" \n",
" 4 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST107 | \n",
" Spire Market | \n",
" CT5 | \n",
" San Jose | \n",
" PD15 | \n",
" Pineapple Juice | \n",
" SM5 | \n",
" David Carl | \n",
" 2015-12-01 | \n",
" 2015 | \n",
" 20154 | \n",
" 201512 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 3110.303679 | \n",
" USD | \n",
"
\n",
" \n",
" 6 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST119 | \n",
" Pay N Go | \n",
" CT2 | \n",
" San Francisco | \n",
" PD11 | \n",
" Orange no pulp | \n",
" SM7 | \n",
" Kiran Raj | \n",
" 2015-03-01 | \n",
" 2015 | \n",
" 20151 | \n",
" 201503 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 548.226847 | \n",
" USD | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" AccountId AccountDescription StoreId StoreDescription \\\n",
"0 Original_Sales_Price Original Sales Price ST43 franchescini Hand \n",
"1 Original_Sales_Price Original Sales Price ST35 Quality Shopping \n",
"2 Original_Sales_Price Original Sales Price ST98 Walter Store Inc \n",
"4 Original_Sales_Price Original Sales Price ST107 Spire Market \n",
"6 Original_Sales_Price Original Sales Price ST119 Pay N Go \n",
"\n",
" LocationId LocationDescription ProductId ProductDescription Sales_ManagerId \\\n",
"0 CT14 Salem PD23 IPA SM1 \n",
"1 CT14 Salem PD2 Pepsi SM4 \n",
"2 CT14 Salem PD24 Amber SM1 \n",
"4 CT5 San Jose PD15 Pineapple Juice SM5 \n",
"6 CT2 San Francisco PD11 Orange no pulp SM7 \n",
"\n",
" Sales_ManagerDescription DateDATE DateYear DateQuarter DateMonth \\\n",
"0 Janet Bury 2014-12-01 2014 20144 201412 \n",
"1 Nancy Miller 2014-10-01 2014 20144 201410 \n",
"2 Janet Bury 2014-06-01 2014 20142 201406 \n",
"4 David Carl 2015-12-01 2015 20154 201512 \n",
"6 Kiran Raj 2015-03-01 2015 20151 201503 \n",
"\n",
" DateDAY CategoryVersion CategoryCategory CategoryType Value \\\n",
"0 1 Actual Actuals public 1779.763910 \n",
"1 1 Actual Actuals public 130.957113 \n",
"2 1 Actual Actuals public 10226.449395 \n",
"4 1 Actual Actuals public 3110.303679 \n",
"6 1 Actual Actuals public 548.226847 \n",
"\n",
" ValueUnit \n",
"0 USD \n",
"1 USD \n",
"2 USD \n",
"4 USD \n",
"6 USD "
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rows_before_2015_12_04 = model.loc[flt_before_2015_12_04]\n",
"rows_before_2015_12_04.head(5)"
]
},
{
"cell_type": "markdown",
"id": "f7ab4afb",
"metadata": {},
"source": [
"## Modify Rows: Increase All Prices Of One Particular Store By 10%"
]
},
{
"cell_type": "markdown",
"id": "cc41d5ee",
"metadata": {},
"source": [
"Let store be ``ST43``, and increment ``Value`` by 10%"
]
},
{
"cell_type": "markdown",
"id": "b7dab77f",
"metadata": {},
"source": [
"See how many different products the store has ..."
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "153c1e79",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False 1069\n",
"True 4\n",
"Name: StoreId, dtype: int64"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(model['StoreId'] == 'ST43').value_counts()"
]
},
{
"cell_type": "markdown",
"id": "e5e67e15",
"metadata": {},
"source": [
"Select prices (column ``Value``) of those"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "dda0ccb4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 1779.763910\n",
"465 13627.793342\n",
"782 3427.896898\n",
"917 15569.353983\n",
"Name: Value, dtype: float64"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.loc[model['StoreId'] == 'ST43', 'Value']"
]
},
{
"cell_type": "markdown",
"id": "7c5b425f",
"metadata": {},
"source": [
"Create a ``Series`` which contains increases prices"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "eb47c1cf",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 1957.740301\n",
"465 14990.572676\n",
"782 3770.686587\n",
"917 17126.289381\n",
"Name: Value, dtype: float64"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.loc[model['StoreId'] == 'ST43', 'Value'] * 1.1"
]
},
{
"cell_type": "markdown",
"id": "aa043e2a",
"metadata": {},
"source": [
"Assign increased prices back into frame"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "0f3ff18f",
"metadata": {},
"outputs": [],
"source": [
"model.loc[model['StoreId'] == 'ST43', 'Value'] = model.loc[model['StoreId'] == 'ST43', 'Value'] * 1.1"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "4d699670",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" AccountId | \n",
" AccountDescription | \n",
" StoreId | \n",
" StoreDescription | \n",
" LocationId | \n",
" LocationDescription | \n",
" ProductId | \n",
" ProductDescription | \n",
" Sales_ManagerId | \n",
" Sales_ManagerDescription | \n",
" DateDATE | \n",
" DateYear | \n",
" DateQuarter | \n",
" DateMonth | \n",
" DateDAY | \n",
" CategoryVersion | \n",
" CategoryCategory | \n",
" CategoryType | \n",
" Value | \n",
" ValueUnit | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST43 | \n",
" franchescini Hand | \n",
" CT14 | \n",
" Salem | \n",
" PD23 | \n",
" IPA | \n",
" SM1 | \n",
" Janet Bury | \n",
" 2014-12-01 | \n",
" 2014 | \n",
" 20144 | \n",
" 201412 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 1957.740301 | \n",
" USD | \n",
"
\n",
" \n",
" 1 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST35 | \n",
" Quality Shopping | \n",
" CT14 | \n",
" Salem | \n",
" PD2 | \n",
" Pepsi | \n",
" SM4 | \n",
" Nancy Miller | \n",
" 2014-10-01 | \n",
" 2014 | \n",
" 20144 | \n",
" 201410 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 130.957113 | \n",
" USD | \n",
"
\n",
" \n",
" 2 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST98 | \n",
" Walter Store Inc | \n",
" CT14 | \n",
" Salem | \n",
" PD24 | \n",
" Amber | \n",
" SM1 | \n",
" Janet Bury | \n",
" 2014-06-01 | \n",
" 2014 | \n",
" 20142 | \n",
" 201406 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 10226.449395 | \n",
" USD | \n",
"
\n",
" \n",
" 3 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST188 | \n",
" Arrowhead Corner Store | \n",
" CT1 | \n",
" Los Angeles | \n",
" PD19 | \n",
" Coconut Water | \n",
" SM8 | \n",
" Gabriel Walton | \n",
" 2016-11-01 | \n",
" 2016 | \n",
" 20164 | \n",
" 201611 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 383.078689 | \n",
" USD | \n",
"
\n",
" \n",
" 4 | \n",
" Original_Sales_Price | \n",
" Original Sales Price | \n",
" ST107 | \n",
" Spire Market | \n",
" CT5 | \n",
" San Jose | \n",
" PD15 | \n",
" Pineapple Juice | \n",
" SM5 | \n",
" David Carl | \n",
" 2015-12-01 | \n",
" 2015 | \n",
" 20154 | \n",
" 201512 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 3110.303679 | \n",
" USD | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" AccountId AccountDescription StoreId StoreDescription \\\n",
"0 Original_Sales_Price Original Sales Price ST43 franchescini Hand \n",
"1 Original_Sales_Price Original Sales Price ST35 Quality Shopping \n",
"2 Original_Sales_Price Original Sales Price ST98 Walter Store Inc \n",
"3 Original_Sales_Price Original Sales Price ST188 Arrowhead Corner Store \n",
"4 Original_Sales_Price Original Sales Price ST107 Spire Market \n",
"\n",
" LocationId LocationDescription ProductId ProductDescription Sales_ManagerId \\\n",
"0 CT14 Salem PD23 IPA SM1 \n",
"1 CT14 Salem PD2 Pepsi SM4 \n",
"2 CT14 Salem PD24 Amber SM1 \n",
"3 CT1 Los Angeles PD19 Coconut Water SM8 \n",
"4 CT5 San Jose PD15 Pineapple Juice SM5 \n",
"\n",
" Sales_ManagerDescription DateDATE DateYear DateQuarter DateMonth \\\n",
"0 Janet Bury 2014-12-01 2014 20144 201412 \n",
"1 Nancy Miller 2014-10-01 2014 20144 201410 \n",
"2 Janet Bury 2014-06-01 2014 20142 201406 \n",
"3 Gabriel Walton 2016-11-01 2016 20164 201611 \n",
"4 David Carl 2015-12-01 2015 20154 201512 \n",
"\n",
" DateDAY CategoryVersion CategoryCategory CategoryType Value \\\n",
"0 1 Actual Actuals public 1957.740301 \n",
"1 1 Actual Actuals public 130.957113 \n",
"2 1 Actual Actuals public 10226.449395 \n",
"3 1 Actual Actuals public 383.078689 \n",
"4 1 Actual Actuals public 3110.303679 \n",
"\n",
" ValueUnit \n",
"0 USD \n",
"1 USD \n",
"2 USD \n",
"3 USD \n",
"4 USD "
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.head(5)"
]
},
{
"cell_type": "markdown",
"id": "9a8ea240",
"metadata": {},
"source": [
"## Filter Readability: Combining Filters"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "989e17e4",
"metadata": {},
"outputs": [],
"source": [
"flt_product_pd2 = (model['ProductId'] == 'PD2') # only product PD2"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "92f4d07c",
"metadata": {},
"outputs": [],
"source": [
"flt_2015 = (model['DateDATE'] < pd.to_datetime('2016')) & \\\n",
" (model['DateDATE'] >= pd.to_datetime('2015')) # only rows from 2015"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "8c0b3aa2",
"metadata": {},
"outputs": [],
"source": [
"flt_only_sold = (model['AccountId'] == 'Quantity_sold') # only \"sold\" type records"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "b1034749",
"metadata": {},
"outputs": [],
"source": [
"flt_combined = flt_product_pd2 & flt_2015 & flt_only_sold"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "c4ef1f2e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" AccountId | \n",
" AccountDescription | \n",
" StoreId | \n",
" StoreDescription | \n",
" LocationId | \n",
" LocationDescription | \n",
" ProductId | \n",
" ProductDescription | \n",
" Sales_ManagerId | \n",
" Sales_ManagerDescription | \n",
" DateDATE | \n",
" DateYear | \n",
" DateQuarter | \n",
" DateMonth | \n",
" DateDAY | \n",
" CategoryVersion | \n",
" CategoryCategory | \n",
" CategoryType | \n",
" Value | \n",
" ValueUnit | \n",
"
\n",
" \n",
" \n",
" \n",
" 160 | \n",
" Quantity_sold | \n",
" Quantity sold | \n",
" ST139 | \n",
" Farlos | \n",
" CT15 | \n",
" Eugene | \n",
" PD2 | \n",
" Pepsi | \n",
" SM11 | \n",
" James Frank | \n",
" 2015-11-01 | \n",
" 2015 | \n",
" 20154 | \n",
" 201511 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 6858.0 | \n",
" NaN | \n",
"
\n",
" \n",
" 828 | \n",
" Quantity_sold | \n",
" Quantity sold | \n",
" ST178 | \n",
" Rays Super Foods | \n",
" CT8 | \n",
" Beverly Hills | \n",
" PD2 | \n",
" Pepsi | \n",
" SM4 | \n",
" Nancy Miller | \n",
" 2015-08-01 | \n",
" 2015 | \n",
" 20153 | \n",
" 201508 | \n",
" 1 | \n",
" Actual | \n",
" Actuals | \n",
" public | \n",
" 200.0 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" AccountId AccountDescription StoreId StoreDescription LocationId \\\n",
"160 Quantity_sold Quantity sold ST139 Farlos CT15 \n",
"828 Quantity_sold Quantity sold ST178 Rays Super Foods CT8 \n",
"\n",
" LocationDescription ProductId ProductDescription Sales_ManagerId \\\n",
"160 Eugene PD2 Pepsi SM11 \n",
"828 Beverly Hills PD2 Pepsi SM4 \n",
"\n",
" Sales_ManagerDescription DateDATE DateYear DateQuarter DateMonth \\\n",
"160 James Frank 2015-11-01 2015 20154 201511 \n",
"828 Nancy Miller 2015-08-01 2015 20153 201508 \n",
"\n",
" DateDAY CategoryVersion CategoryCategory CategoryType Value ValueUnit \n",
"160 1 Actual Actuals public 6858.0 NaN \n",
"828 1 Actual Actuals public 200.0 NaN "
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.loc[flt_combined]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}