diff --git a/compareDBs.ipynb b/compareDBs.ipynb
index 5ab1d11..ae4e834 100644
--- a/compareDBs.ipynb
+++ b/compareDBs.ipynb
@@ -47,25 +47,16 @@
},
{
"cell_type": "code",
- "execution_count": 68,
+ "execution_count": null,
"id": "ab6c6c81-6ac1-4668-a79b-a9a0341fb35a",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "False"
- ]
- },
- "execution_count": 68,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"import configparser\n",
+ "import time\n",
+ "import timeit\n",
"from datetime import datetime\n",
"\n",
"import duckdb\n",
@@ -98,7 +89,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 42,
"id": "968403e3-2e5e-4834-b969-be4600e2963a",
"metadata": {
"tags": []
@@ -138,19 +129,10 @@
"metadata": {},
"outputs": [],
"source": [
- "%%time\n",
+ "# %%time\n",
"# Load Dataset\n",
- "df = pd.read_csv(\"out.csv\", index_col=0)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "7e7c46b6-90ee-4ca3-8b5a-553b09ece913",
- "metadata": {},
- "outputs": [],
- "source": [
- "# df.head()"
+ "df = pd.read_csv(\"out.csv\", index_col=0)\n",
+ "df.head()"
]
},
{
@@ -202,83 +184,287 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "9cf86669-7722-4a2c-895c-51f0a5eebefc",
+ "execution_count": 44,
+ "id": "c3202bbb-2655-45b2-b166-9f45a3ef854c",
"metadata": {
"tags": []
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Database created'"
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# !! O client oficial usa um driver http, nesse exemplo vamos usar a biblioteca\n",
"# de terceirtos clickhouse_driver recomendada, por sua vez que usa tcp.\n",
- "client = Client(\n",
- " host=ClickHouseUrl,\n",
- " user=ClickHouseUser,\n",
- " password=ClickHouseKey,\n",
- " settings={\"use_numpy\": True},\n",
- ")"
+ "def cHouseConnect():\n",
+ " client = Client(\n",
+ " host=ClickHouseUrl,\n",
+ " user=ClickHouseUser,\n",
+ " password=ClickHouseKey,\n",
+ " settings={\"use_numpy\": True},\n",
+ " )\n",
+ " return client\n",
+ "\n",
+ "\n",
+ "# Create Tables in ClickHouse\n",
+ "# !! ALTERAR TIPOS !!\n",
+ "# ENGINE: 'Memory' desaparece quando server é reiniciado\n",
+ "def cHouseCreateDb(databasename):\n",
+ " client = cHouseConnect()\n",
+ " client.execute(\n",
+ " \"CREATE TABLE IF NOT EXISTS {} (id UInt32,\"\n",
+ " \"from DateTime, at UInt64, to DateTime, open Float64,\"\n",
+ " \"close Float64, min Float64, max Float64, volume UInt32)\"\n",
+ " \"ENGINE MergeTree ORDER BY to\".format(databasename)\n",
+ " )\n",
+ " client.disconnect()\n",
+ " return \"Database created\"\n",
+ "\n",
+ "\n",
+ "# Write dataframe to db\n",
+ "def cHouseInsertDf(dbName, dataframe):\n",
+ " client = cHouseConnect()\n",
+ " client.insert_dataframe(\"INSERT INTO {} VALUES\".format(dbName), dataframe)\n",
+ " client.disconnect()\n",
+ " return \" dataframe {} inserted in clickhouse database\".format(dataframe)\n",
+ "\n",
+ "\n",
+ "def cHouseQueryDf(databaseName):\n",
+ " client = cHouseConnect()\n",
+ " dfQuery = client.query_dataframe(\n",
+ " \"SELECT * FROM default.{}\".format(databaseName)\n",
+ " ) # LIMIT 10000\n",
+ " client.disconnect()\n",
+ " return dfQuery\n",
+ "\n",
+ "\n",
+ "cHouseCreateDb(dbname)"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "a0a1f67b-2e63-462e-be66-d322d99837ea",
+ "execution_count": 47,
+ "id": "cc4865b3-a1bc-4a35-9624-15334754b3a1",
"metadata": {},
"outputs": [],
"source": [
- "# Create Tables in ClickHouse\n",
- "# !! ALTERAR TIPOS !!\n",
- "# ENGINE: 'Memory' desaparece quando server é reiniciado\n",
- "client.execute(\n",
- " \"CREATE TABLE IF NOT EXISTS {} (id UInt32,\"\n",
- " \"from DateTime, at UInt64, to DateTime, open Float64,\"\n",
- " \"close Float64, min Float64, max Float64, volume UInt32)\"\n",
- " \"ENGINE MergeTree ORDER BY to\".format(dbname)\n",
- ")"
+ "# Insert to db and benchmark time\n",
+ "start = timeit.default_timer()\n",
+ "cHouseInsertDf(dbname, df)\n",
+ "stop = timeit.default_timer()\n",
+ "cHouse_write_execution_time = stop - start"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "3a029a09-46f4-43c3-b3df-cfbed33fb0dc",
+ "execution_count": 48,
+ "id": "1fac82c1-2d04-44ef-893a-dc13b755e6d8",
"metadata": {},
"outputs": [],
"source": [
- "%%time\n",
- "# Write dataframe to db\n",
- "client.insert_dataframe(\"INSERT INTO {} VALUES\".format(dbname), df)"
+ "# read from db and benchmark time\n",
+ "start = timeit.default_timer()\n",
+ "dfCh = cHouseQueryDf(dbname)\n",
+ "stop = timeit.default_timer()\n",
+ "cHouse_read_execution_time = stop - start"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "17251288-2442-43ee-98f2-ca680c3c4f13",
+ "execution_count": 49,
+ "id": "597ae7bd-2eea-44d7-b379-f0eb7e745c15",
"metadata": {
"tags": []
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " from | \n",
+ " at | \n",
+ " to | \n",
+ " open | \n",
+ " close | \n",
+ " min | \n",
+ " max | \n",
+ " volume | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 7730801 | \n",
+ " 2023-01-02 15:58:45 | \n",
+ " 1672675140000000000 | \n",
+ " 2023-01-02 15:59:00 | \n",
+ " 1.065995 | \n",
+ " 1.066035 | \n",
+ " 1.065930 | \n",
+ " 1.066070 | \n",
+ " 57 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 7730801 | \n",
+ " 2023-01-02 15:58:45 | \n",
+ " 1672675140000000000 | \n",
+ " 2023-01-02 15:59:00 | \n",
+ " 1.065995 | \n",
+ " 1.066035 | \n",
+ " 1.065930 | \n",
+ " 1.066070 | \n",
+ " 57 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 7730802 | \n",
+ " 2023-01-02 15:59:00 | \n",
+ " 1672675155000000000 | \n",
+ " 2023-01-02 15:59:15 | \n",
+ " 1.066055 | \n",
+ " 1.066085 | \n",
+ " 1.066005 | \n",
+ " 1.066115 | \n",
+ " 52 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 7730802 | \n",
+ " 2023-01-02 15:59:00 | \n",
+ " 1672675155000000000 | \n",
+ " 2023-01-02 15:59:15 | \n",
+ " 1.066055 | \n",
+ " 1.066085 | \n",
+ " 1.066005 | \n",
+ " 1.066115 | \n",
+ " 52 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 7730803 | \n",
+ " 2023-01-02 15:59:15 | \n",
+ " 1672675170000000000 | \n",
+ " 2023-01-02 15:59:30 | \n",
+ " 1.066080 | \n",
+ " 1.066025 | \n",
+ " 1.066025 | \n",
+ " 1.066110 | \n",
+ " 57 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id from at to \\\n",
+ "0 7730801 2023-01-02 15:58:45 1672675140000000000 2023-01-02 15:59:00 \n",
+ "1 7730801 2023-01-02 15:58:45 1672675140000000000 2023-01-02 15:59:00 \n",
+ "2 7730802 2023-01-02 15:59:00 1672675155000000000 2023-01-02 15:59:15 \n",
+ "3 7730802 2023-01-02 15:59:00 1672675155000000000 2023-01-02 15:59:15 \n",
+ "4 7730803 2023-01-02 15:59:15 1672675170000000000 2023-01-02 15:59:30 \n",
+ "\n",
+ " open close min max volume \n",
+ "0 1.065995 1.066035 1.065930 1.066070 57 \n",
+ "1 1.065995 1.066035 1.065930 1.066070 57 \n",
+ "2 1.066055 1.066085 1.066005 1.066115 52 \n",
+ "3 1.066055 1.066085 1.066005 1.066115 52 \n",
+ "4 1.066080 1.066025 1.066025 1.066110 57 "
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "%%time\n",
- "client.query_dataframe(\"SELECT * FROM default.{}\".format(dbname)) # LIMIT 10000"
+ "dfCh.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "id": "86794e47-611f-4ca8-a7e8-07e71afafe67",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "5.175396532999002\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(cHouse_read_execution_time)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "id": "e7926062-8e84-4d3f-90a9-32807ce4f3d4",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "6.163630739996734\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(cHouse_write_execution_time)"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "51497522-bd6c-44a8-aaea-ec5dda30b95b",
+ "id": "8faa5683-a204-461d-80c3-67644aa714ce",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
- "# %%time\n",
- "# df = pd.DataFrame(client.query_dataframe(\"SELECT * FROM default.{}\".format(dbname)))"
+ "%%time\n",
+ "dfCh = cHouseQueryDf(dbname)"
]
},
{
"cell_type": "markdown",
"id": "1d389546-911f-43f7-aad1-49f7bcc83503",
"metadata": {
+ "jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
@@ -427,6 +613,14 @@
"conn.close()"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "73de4294-1284-49b0-b31e-45db6e835877",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -434,7 +628,36 @@
"metadata": {},
"outputs": [],
"source": [
- "# Read"
+ "start = time.time()\n",
+ "# %%time\n",
+ "# Read\n",
+ "df = pd.read_sql_query('select * from \"comparedbs\"', con=engine)\n",
+ "end = time.time()\n",
+ "postgresql_read_time = exec_time(start, end)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6d1b7480-5bc7-4f08-8cf3-b9590802d8f7",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "print(postgresql_read_time)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6acb2959-3255-43bd-aea5-9ef70acc8902",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "df.head()"
]
},
{
@@ -674,7 +897,7 @@
},
{
"cell_type": "code",
- "execution_count": 69,
+ "execution_count": null,
"id": "14f63810-1943-4e28-9bce-2148be6be02d",
"metadata": {},
"outputs": [],
@@ -687,7 +910,7 @@
},
{
"cell_type": "code",
- "execution_count": 70,
+ "execution_count": null,
"id": "8ff6c090-7e02-435a-a179-f2aab81da972",
"metadata": {},
"outputs": [],
@@ -698,7 +921,7 @@
},
{
"cell_type": "code",
- "execution_count": 71,
+ "execution_count": null,
"id": "b4eb8ab9-81e8-4732-8cf7-51f0981d3d57",
"metadata": {
"tags": []
@@ -712,19 +935,10 @@
},
{
"cell_type": "code",
- "execution_count": 75,
+ "execution_count": null,
"id": "97cb6b5b-65a5-46a0-a4ee-e5c535a716ab",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 925 ms, sys: 40 ms, total: 965 ms\n",
- "Wall time: 1.43 s\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"%%time\n",
"# send df to kd+ in memory bank\n",
@@ -733,21 +947,10 @@
},
{
"cell_type": "code",
- "execution_count": 76,
+ "execution_count": null,
"id": "c2ed2d51-bc8e-4207-892a-35fc55d43570",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "b':/home/sandman/q/tab1'"
- ]
- },
- "execution_count": 76,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# write to on disk table\n",
"q.sendSync(\"`:/home/sandman/q/tab1 set t\")"
@@ -755,21 +958,12 @@
},
{
"cell_type": "code",
- "execution_count": 77,
+ "execution_count": null,
"id": "9c055a95-f73f-43a3-8fbd-61e42235117e",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 1.94 ms, sys: 1 µs, total: 1.94 ms\n",
- "Wall time: 426 ms\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"%%time\n",
"# read from on disk table\n",
@@ -778,7 +972,7 @@
},
{
"cell_type": "code",
- "execution_count": 78,
+ "execution_count": null,
"id": "9760de38-9f04-4322-bfff-c7ee12d5dee5",
"metadata": {
"tags": []
@@ -790,21 +984,12 @@
},
{
"cell_type": "code",
- "execution_count": 79,
+ "execution_count": null,
"id": "c06c9222-c69d-4872-9d21-052281a013e2",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 1.08 s, sys: 116 ms, total: 1.2 s\n",
- "Wall time: 1.27 s\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"%%time\n",
"# load to variable df2\n",
@@ -813,7 +998,7 @@
},
{
"cell_type": "code",
- "execution_count": 80,
+ "execution_count": null,
"id": "8815f01c-fd0a-4f94-ab7f-f8ede84ba4e7",
"metadata": {
"tags": []
@@ -825,144 +1010,12 @@
},
{
"cell_type": "code",
- "execution_count": 82,
+ "execution_count": null,
"id": "e6ed3927-4395-45cd-9a28-88c5db01f2e5",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 1.25 s, sys: 132 ms, total: 1.39 s\n",
- "Wall time: 1.46 s\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Unnamed: 0 | \n",
- " id | \n",
- " from | \n",
- " at | \n",
- " to | \n",
- " open | \n",
- " close | \n",
- " min | \n",
- " max | \n",
- " volume | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 0 | \n",
- " 7730801 | \n",
- " b'2023-01-02 15:58:45' | \n",
- " 1672675140000000000 | \n",
- " b'2023-01-02 15:59:00' | \n",
- " 1.065995 | \n",
- " 1.066035 | \n",
- " 1.065930 | \n",
- " 1.066070 | \n",
- " 57 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 1 | \n",
- " 7730802 | \n",
- " b'2023-01-02 15:59:00' | \n",
- " 1672675155000000000 | \n",
- " b'2023-01-02 15:59:15' | \n",
- " 1.066055 | \n",
- " 1.066085 | \n",
- " 1.066005 | \n",
- " 1.066115 | \n",
- " 52 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 2 | \n",
- " 7730803 | \n",
- " b'2023-01-02 15:59:15' | \n",
- " 1672675170000000000 | \n",
- " b'2023-01-02 15:59:30' | \n",
- " 1.066080 | \n",
- " 1.066025 | \n",
- " 1.066025 | \n",
- " 1.066110 | \n",
- " 57 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 3 | \n",
- " 7730804 | \n",
- " b'2023-01-02 15:59:30' | \n",
- " 1672675185000000000 | \n",
- " b'2023-01-02 15:59:45' | \n",
- " 1.065980 | \n",
- " 1.065985 | \n",
- " 1.065885 | \n",
- " 1.066045 | \n",
- " 64 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 4 | \n",
- " 7730805 | \n",
- " b'2023-01-02 15:59:45' | \n",
- " 1672675200000000000 | \n",
- " b'2023-01-02 16:00:00' | \n",
- " 1.065975 | \n",
- " 1.066055 | \n",
- " 1.065830 | \n",
- " 1.066055 | \n",
- " 50 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Unnamed: 0 id from at \n",
- "0 0 7730801 b'2023-01-02 15:58:45' 1672675140000000000 \\\n",
- "1 1 7730802 b'2023-01-02 15:59:00' 1672675155000000000 \n",
- "2 2 7730803 b'2023-01-02 15:59:15' 1672675170000000000 \n",
- "3 3 7730804 b'2023-01-02 15:59:30' 1672675185000000000 \n",
- "4 4 7730805 b'2023-01-02 15:59:45' 1672675200000000000 \n",
- "\n",
- " to open close min max volume \n",
- "0 b'2023-01-02 15:59:00' 1.065995 1.066035 1.065930 1.066070 57 \n",
- "1 b'2023-01-02 15:59:15' 1.066055 1.066085 1.066005 1.066115 52 \n",
- "2 b'2023-01-02 15:59:30' 1.066080 1.066025 1.066025 1.066110 57 \n",
- "3 b'2023-01-02 15:59:45' 1.065980 1.065985 1.065885 1.066045 64 \n",
- "4 b'2023-01-02 16:00:00' 1.065975 1.066055 1.065830 1.066055 50 "
- ]
- },
- "execution_count": 82,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"%%time\n",
"# converto to dataframe\n",
@@ -972,21 +1025,12 @@
},
{
"cell_type": "code",
- "execution_count": 83,
+ "execution_count": null,
"id": "0fc7f16b-6c39-4ebe-88d2-ff857e30ab62",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 1.11 s, sys: 116 ms, total: 1.23 s\n",
- "Wall time: 1.3 s\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"%%time\n",
"# select\n",
@@ -995,7 +1039,7 @@
},
{
"cell_type": "code",
- "execution_count": 84,
+ "execution_count": null,
"id": "c88646ca-3d25-4a85-80b5-f9e559f568dd",
"metadata": {
"tags": []