{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "tags": [ "remove-cell" ] }, "outputs": [], "source": [ "import sys\n", "import os\n", "if not any(path.endswith('textbook') for path in sys.path):\n", " sys.path.append(os.path.abspath('../../..'))\n", "from textbook_utils import *" ] }, { "cell_type": "markdown", "metadata": { "user_expressions": [] }, "source": [ "(sec:sql_aggregating)=\n", "# Aggregating\n", "\n", "This section introduces grouping and aggregating in SQL.\n", "We'll work with the baby names data, as in the previous section:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import sqlalchemy\n", "db = sqlalchemy.create_engine('sqlite:///babynames.db')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Name | \n", "Sex | \n", "Count | \n", "Year | \n", "
---|---|---|---|---|
0 | \n", "Liam | \n", "M | \n", "19659 | \n", "2020 | \n", "
1 | \n", "Noah | \n", "M | \n", "18252 | \n", "2020 | \n", "
2 | \n", "Oliver | \n", "M | \n", "14147 | \n", "2020 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
7 | \n", "Lucas | \n", "M | \n", "11281 | \n", "2020 | \n", "
8 | \n", "Henry | \n", "M | \n", "10705 | \n", "2020 | \n", "
9 | \n", "Alexander | \n", "M | \n", "10151 | \n", "2020 | \n", "
10 rows × 4 columns
\n", "\n", " | SUM(Count) | \n", "
---|---|
0 | \n", "352554503 | \n", "
\n", " | Year | \n", "SUM(Count) | \n", "
---|---|---|
0 | \n", "1880 | \n", "194419 | \n", "
1 | \n", "1881 | \n", "185772 | \n", "
2 | \n", "1882 | \n", "213385 | \n", "
... | \n", "... | \n", "... | \n", "
138 | \n", "2018 | \n", "3487193 | \n", "
139 | \n", "2019 | \n", "3437438 | \n", "
140 | \n", "2020 | \n", "3287724 | \n", "
141 rows × 2 columns
\n", "\n", " | Year | \n", "Sex | \n", "SUM(Count) | \n", "
---|---|---|---|
0 | \n", "1880 | \n", "F | \n", "83929 | \n", "
1 | \n", "1880 | \n", "M | \n", "110490 | \n", "
2 | \n", "1881 | \n", "F | \n", "85034 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
279 | \n", "2019 | \n", "M | \n", "1785527 | \n", "
280 | \n", "2020 | \n", "F | \n", "1581301 | \n", "
281 | \n", "2020 | \n", "M | \n", "1706423 | \n", "
282 rows × 3 columns
\n", "\n", " | Year | \n", "MAX(Count) | \n", "
---|---|---|
0 | \n", "1880 | \n", "9655 | \n", "
1 | \n", "1881 | \n", "8769 | \n", "
2 | \n", "1882 | \n", "9557 | \n", "
... | \n", "... | \n", "... | \n", "
138 | \n", "2018 | \n", "19924 | \n", "
139 | \n", "2019 | \n", "20555 | \n", "
140 | \n", "2020 | \n", "19659 | \n", "
141 rows × 2 columns
\n", "