commit bdad942fa806d9869cf30212899f0b35bb0f49e5 Author: Gerardo Marx Date: Thu Jan 29 11:58:42 2026 -0600 session one added diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2fd794b --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.DS_Store +.venv diff --git a/session-1-intro/main.ipynb b/session-1-intro/main.ipynb new file mode 100644 index 0000000..b18d0fb --- /dev/null +++ b/session-1-intro/main.ipynb @@ -0,0 +1,581 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7c384478", + "metadata": {}, + "source": [ + "# Título\n", + "## Subtítulo\n", + "### sub-subtítulo\n", + "Esto es un párrafo en **markdow**. La *siguiente* ecuación $f(x)=x^3$ es evaluada en: \n", + "- primer elemento\n", + "- segundo elemento\n", + "- tercer elemento\n", + "\n", + "1. Primer\n", + "2. Segundo\n", + "3. Tercer\n", + " - Sub elemento\n", + " - sub elemento" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "461cd70f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5\n", + "Hola\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "\n", + "x = 5 \n", + "print(x)\n", + "x= \"Hola\"\n", + "print(x)" + ] + }, + { + "cell_type": "markdown", + "id": "dd0b1c6a", + "metadata": {}, + "source": [ + "# The tips dataset\n", + "This dataset comes from a restaurant and is used to teach EDA. Each row represents a bill (table) and registers the complete bill, tip, among other parameters during the service.\n", + "\n", + "- `total_bill`: Conplete amount without tip.\n", + "- `tip`: The given tip.\n", + "- `sex`: Sex identification (pay)\n", + "- `smoker`: if there are smokers included in the table\n", + "- `day`: day of the week\n", + "- `time`: type of food(Lunch/Dinner)\n", + "- `size`: Number of guessings" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a3367dd4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
total_billtipsexsmokerdaytimesize
016.991.01FemaleNoSunDinner2
110.341.66MaleNoSunDinner3
221.013.50MaleNoSunDinner3
323.683.31MaleNoSunDinner2
424.593.61FemaleNoSunDinner4
........................
23929.035.92MaleNoSatDinner3
24027.182.00FemaleYesSatDinner2
24122.672.00MaleYesSatDinner2
24217.821.75MaleNoSatDinner2
24318.783.00FemaleNoThurDinner2
\n", + "

244 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " total_bill tip sex smoker day time size\n", + "0 16.99 1.01 Female No Sun Dinner 2\n", + "1 10.34 1.66 Male No Sun Dinner 3\n", + "2 21.01 3.50 Male No Sun Dinner 3\n", + "3 23.68 3.31 Male No Sun Dinner 2\n", + "4 24.59 3.61 Female No Sun Dinner 4\n", + ".. ... ... ... ... ... ... ...\n", + "239 29.03 5.92 Male No Sat Dinner 3\n", + "240 27.18 2.00 Female Yes Sat Dinner 2\n", + "241 22.67 2.00 Male Yes Sat Dinner 2\n", + "242 17.82 1.75 Male No Sat Dinner 2\n", + "243 18.78 3.00 Female No Thur Dinner 2\n", + "\n", + "[244 rows x 7 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# importing the tips dataset\n", + "import seaborn as sns\n", + "df = sns.load_dataset(\"tips\")\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ab96e102", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 244 entries, 0 to 243\n", + "Data columns (total 7 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 total_bill 244 non-null float64 \n", + " 1 tip 244 non-null float64 \n", + " 2 sex 244 non-null category\n", + " 3 smoker 244 non-null category\n", + " 4 day 244 non-null category\n", + " 5 time 244 non-null category\n", + " 6 size 244 non-null int64 \n", + "dtypes: category(4), float64(2), int64(1)\n", + "memory usage: 7.4 KB\n" + ] + } + ], + "source": [ + "df.head() # dataframe example \n", + "df.info() " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "ea342b45", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
total_billtipsexsmokerdaytimesize
count244.000000244.000000244244244244244.000000
uniqueNaNNaN2242NaN
topNaNNaNMaleNoSatDinnerNaN
freqNaNNaN15715187176NaN
mean19.7859432.998279NaNNaNNaNNaN2.569672
std8.9024121.383638NaNNaNNaNNaN0.951100
min3.0700001.000000NaNNaNNaNNaN1.000000
25%13.3475002.000000NaNNaNNaNNaN2.000000
50%17.7950002.900000NaNNaNNaNNaN2.000000
75%24.1275003.562500NaNNaNNaNNaN3.000000
max50.81000010.000000NaNNaNNaNNaN6.000000
\n", + "
" + ], + "text/plain": [ + " total_bill tip sex smoker day time size\n", + "count 244.000000 244.000000 244 244 244 244 244.000000\n", + "unique NaN NaN 2 2 4 2 NaN\n", + "top NaN NaN Male No Sat Dinner NaN\n", + "freq NaN NaN 157 151 87 176 NaN\n", + "mean 19.785943 2.998279 NaN NaN NaN NaN 2.569672\n", + "std 8.902412 1.383638 NaN NaN NaN NaN 0.951100\n", + "min 3.070000 1.000000 NaN NaN NaN NaN 1.000000\n", + "25% 13.347500 2.000000 NaN NaN NaN NaN 2.000000\n", + "50% 17.795000 2.900000 NaN NaN NaN NaN 2.000000\n", + "75% 24.127500 3.562500 NaN NaN NaN NaN 3.000000\n", + "max 50.810000 10.000000 NaN NaN NaN NaN 6.000000" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe(include='all')" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "ebdbde94", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "total_bill 0\n", + "tip 0\n", + "sex 0\n", + "smoker 0\n", + "day 0\n", + "time 0\n", + "size 0\n", + "dtype: int64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "27cf57bb", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "tb = df['total_bill']\n", + "plt.figure()\n", + "plt.hist(tb, bins=25)\n", + "plt.title(\"Histogram: total_bill\")\n", + "plt.xlabel(\"total_bill\")\n", + "plt.ylabel(\"count\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "03a4ee9a", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure()\n", + "plt.boxplot(tb, vert=False)\n", + "plt.title(\"Boxplot: total_bill\")\n", + "plt.ylabel(\"total_bill\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55a01dd2", + "metadata": {}, + "outputs": [], + "source": [ + "# Task 1: Nmerically state the quartile values (IQR)\n", + "# Task 2: Scatter plot total_bill vs tip\n", + "# Task 3: Scatter plot tip vs size\n", + "# Task 4: \n", + " # -What does the data represent?\n", + "\t#- What are typical ranges?\n", + "\t#- Any suspicious values? Why?\n", + "\t#- One conclusion in plain language." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b6f2d01", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv (3.14.2)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.14.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}