diff --git a/compareDBs.ipynb b/compareDBs.ipynb
index ef54078..ecec002 100644
--- a/compareDBs.ipynb
+++ b/compareDBs.ipynb
@@ -47,7 +47,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 34,
"id": "ab6c6c81-6ac1-4668-a79b-a9a0341fb35a",
"metadata": {
"tags": []
@@ -59,7 +59,7 @@
"False"
]
},
- "execution_count": 1,
+ "execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
@@ -71,12 +71,13 @@
"from datetime import datetime\n",
"\n",
"import duckdb\n",
- "import influxdb_client\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"from clickhouse_driver import Client\n",
"from dotenv import load_dotenv\n",
+ "from influxdb_client import InfluxDBClient\n",
+ "from influxdb_client.client.write_api import SYNCHRONOUS\n",
"from minio import Minio\n",
"from pymongo import MongoClient\n",
"from pytz import timezone\n",
@@ -100,7 +101,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 42,
"id": "968403e3-2e5e-4834-b969-be4600e2963a",
"metadata": {
"tags": []
@@ -135,7 +136,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 4,
"id": "3634a4ec-04c2-4f1e-8659-5d22eb17a254",
"metadata": {},
"outputs": [
@@ -252,7 +253,7 @@
"999999 2023-03-03 18:14:45 1.062535 1.062520 1.062520 1.062580 59 "
]
},
- "execution_count": 5,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -291,7 +292,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 5,
"id": "27de1ec8-4de1-440a-b555-b4a46c5ef7ce",
"metadata": {},
"outputs": [],
@@ -305,6 +306,7 @@
"cell_type": "markdown",
"id": "4a8d5703-9bc9-4d38-83ff-457159304d58",
"metadata": {
+ "jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
@@ -313,7 +315,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 8,
"id": "c3202bbb-2655-45b2-b166-9f45a3ef854c",
"metadata": {
"tags": []
@@ -325,7 +327,7 @@
"'Database created'"
]
},
- "execution_count": 20,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -379,7 +381,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 9,
"id": "cc4865b3-a1bc-4a35-9624-15334754b3a1",
"metadata": {},
"outputs": [],
@@ -393,7 +395,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 10,
"id": "1fac82c1-2d04-44ef-893a-dc13b755e6d8",
"metadata": {},
"outputs": [],
@@ -407,7 +409,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 11,
"id": "597ae7bd-2eea-44d7-b379-f0eb7e745c15",
"metadata": {
"tags": []
@@ -447,86 +449,86 @@
" \n",
"
\n",
" \n",
- " | 2999995 | \n",
- " 8230798 | \n",
- " 2023-05-03 10:35:00 | \n",
- " 1683110115000000000 | \n",
- " 2023-05-03 10:35:15 | \n",
- " 1.103340 | \n",
- " 1.10330 | \n",
- " 1.103275 | \n",
- " 1.103340 | \n",
- " 61 | \n",
+ " 999995 | \n",
+ " 7857774 | \n",
+ " 2023-02-01 17:06:00 | \n",
+ " 1675271175000000000 | \n",
+ " 2023-02-01 17:06:15 | \n",
+ " 1.091725 | \n",
+ " 1.091670 | \n",
+ " 1.09166 | \n",
+ " 1.09176 | \n",
+ " 84 | \n",
"
\n",
" \n",
- " | 2999996 | \n",
- " 8230799 | \n",
- " 2023-05-03 10:35:15 | \n",
- " 1683110130000000000 | \n",
- " 2023-05-03 10:35:30 | \n",
- " 1.103300 | \n",
- " 1.10341 | \n",
- " 1.103300 | \n",
- " 1.103410 | \n",
- " 44 | \n",
+ " 999996 | \n",
+ " 7857775 | \n",
+ " 2023-02-01 17:06:15 | \n",
+ " 1675271190000000000 | \n",
+ " 2023-02-01 17:06:30 | \n",
+ " 1.091680 | \n",
+ " 1.091660 | \n",
+ " 1.09165 | \n",
+ " 1.09168 | \n",
+ " 51 | \n",
"
\n",
" \n",
- " | 2999997 | \n",
- " 8230799 | \n",
- " 2023-05-03 10:35:15 | \n",
- " 1683110130000000000 | \n",
- " 2023-05-03 10:35:30 | \n",
- " 1.103300 | \n",
- " 1.10341 | \n",
- " 1.103300 | \n",
- " 1.103410 | \n",
- " 44 | \n",
+ " 999997 | \n",
+ " 7857775 | \n",
+ " 2023-02-01 17:06:15 | \n",
+ " 1675271190000000000 | \n",
+ " 2023-02-01 17:06:30 | \n",
+ " 1.091680 | \n",
+ " 1.091660 | \n",
+ " 1.09165 | \n",
+ " 1.09168 | \n",
+ " 51 | \n",
"
\n",
" \n",
- " | 2999998 | \n",
- " 8230800 | \n",
- " 2023-05-03 10:35:30 | \n",
- " 1683110145000000000 | \n",
- " 2023-05-03 10:35:45 | \n",
- " 1.103415 | \n",
- " 1.10351 | \n",
- " 1.103385 | \n",
- " 1.103515 | \n",
- " 51 | \n",
+ " 999998 | \n",
+ " 7857776 | \n",
+ " 2023-02-01 17:06:30 | \n",
+ " 1675271205000000000 | \n",
+ " 2023-02-01 17:06:45 | \n",
+ " 1.091660 | \n",
+ " 1.091655 | \n",
+ " 1.09164 | \n",
+ " 1.09168 | \n",
+ " 63 | \n",
"
\n",
" \n",
- " | 2999999 | \n",
- " 8230800 | \n",
- " 2023-05-03 10:35:30 | \n",
- " 1683110145000000000 | \n",
- " 2023-05-03 10:35:45 | \n",
- " 1.103415 | \n",
- " 1.10351 | \n",
- " 1.103385 | \n",
- " 1.103515 | \n",
- " 51 | \n",
+ " 999999 | \n",
+ " 7857776 | \n",
+ " 2023-02-01 17:06:30 | \n",
+ " 1675271205000000000 | \n",
+ " 2023-02-01 17:06:45 | \n",
+ " 1.091660 | \n",
+ " 1.091655 | \n",
+ " 1.09164 | \n",
+ " 1.09168 | \n",
+ " 63 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " id from at to \\\n",
- "2999995 8230798 2023-05-03 10:35:00 1683110115000000000 2023-05-03 10:35:15 \n",
- "2999996 8230799 2023-05-03 10:35:15 1683110130000000000 2023-05-03 10:35:30 \n",
- "2999997 8230799 2023-05-03 10:35:15 1683110130000000000 2023-05-03 10:35:30 \n",
- "2999998 8230800 2023-05-03 10:35:30 1683110145000000000 2023-05-03 10:35:45 \n",
- "2999999 8230800 2023-05-03 10:35:30 1683110145000000000 2023-05-03 10:35:45 \n",
+ " id from at to \\\n",
+ "999995 7857774 2023-02-01 17:06:00 1675271175000000000 2023-02-01 17:06:15 \n",
+ "999996 7857775 2023-02-01 17:06:15 1675271190000000000 2023-02-01 17:06:30 \n",
+ "999997 7857775 2023-02-01 17:06:15 1675271190000000000 2023-02-01 17:06:30 \n",
+ "999998 7857776 2023-02-01 17:06:30 1675271205000000000 2023-02-01 17:06:45 \n",
+ "999999 7857776 2023-02-01 17:06:30 1675271205000000000 2023-02-01 17:06:45 \n",
"\n",
- " open close min max volume \n",
- "2999995 1.103340 1.10330 1.103275 1.103340 61 \n",
- "2999996 1.103300 1.10341 1.103300 1.103410 44 \n",
- "2999997 1.103300 1.10341 1.103300 1.103410 44 \n",
- "2999998 1.103415 1.10351 1.103385 1.103515 51 \n",
- "2999999 1.103415 1.10351 1.103385 1.103515 51 "
+ " open close min max volume \n",
+ "999995 1.091725 1.091670 1.09166 1.09176 84 \n",
+ "999996 1.091680 1.091660 1.09165 1.09168 51 \n",
+ "999997 1.091680 1.091660 1.09165 1.09168 51 \n",
+ "999998 1.091660 1.091655 1.09164 1.09168 63 \n",
+ "999999 1.091660 1.091655 1.09164 1.09168 63 "
]
},
- "execution_count": 23,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -537,7 +539,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 12,
"id": "86794e47-611f-4ca8-a7e8-07e71afafe67",
"metadata": {
"tags": []
@@ -547,7 +549,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "10.64297915700081\n"
+ "5.7032926019999195\n"
]
}
],
@@ -557,7 +559,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 13,
"id": "e7926062-8e84-4d3f-90a9-32807ce4f3d4",
"metadata": {
"tags": []
@@ -567,7 +569,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "6.190685558998666\n"
+ "6.242225094000105\n"
]
}
],
@@ -593,15 +595,14 @@
}
],
"source": [
- "%%time\n",
- "dfCh = cHouseQueryDf(dbname)"
+ "# %%time\n",
+ "# dfCh = cHouseQueryDf(dbname)"
]
},
{
"cell_type": "markdown",
"id": "1d389546-911f-43f7-aad1-49f7bcc83503",
"metadata": {
- "jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
@@ -610,61 +611,325 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 54,
"id": "c3e7ebfd-76f1-4ac4-9833-312eb1a531af",
"metadata": {},
"outputs": [],
"source": [
- "client = influxdb_client.InfluxDBClient(\n",
- " url=InfluxDBUrl, token=InfluxDBKey, org=InfluxDBUser\n",
- ")"
+ "def influxdbConnect():\n",
+ " client = influxdb_client.InfluxDBClient(\n",
+ " url=InfluxDBUrl, token=InfluxDBKey, org=InfluxDBUser\n",
+ " )\n",
+ " return client\n",
+ "\n",
+ "\n",
+ "def influxdbLoadCsv(csv=\"out.csv\", dictDates=[\"from\", \"to\"], index=\"from\"):\n",
+ " # Read data from CSV without index and parse 'TimeStamp' as date.\n",
+ " df = pd.read_csv(csv, sep=\",\", index_col=False, parse_dates=dictDates)\n",
+ " # Set 'TimeStamp' field as index of dataframe # test another indexs\n",
+ " df.set_index(index, inplace=True)\n",
+ " return df\n",
+ "\n",
+ "\n",
+ "def influxdbWriteCsv(dataFrame, bucket, measurement=\"id\", tag=\"volume\"):\n",
+ " client = influxdbConnect()\n",
+ " # write_options=SYNCHRONOUS\n",
+ " with client.write_api() as writer:\n",
+ " writer.write(\n",
+ " bucket=bucket,\n",
+ " record=dataFrame,\n",
+ " data_frame_measurement_name=measurement,\n",
+ " data_frame_tag_columns=[tag],\n",
+ " )\n",
+ " writer.__del__()\n",
+ " client.__del__()\n",
+ " return 0"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "cbf61f12-830b-4c57-804a-2257d8b3599a",
+ "execution_count": 55,
+ "id": "e05266b8-ff32-462c-b059-325a40a53d25",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "client = influxdbLoadCsv()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "id": "71300e86-2e39-4c71-8b0c-6d6e21728e48",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
- "# Read data from CSV without index and parse 'TimeStamp' as date.\n",
- "df = pd.read_csv(\"out.csv\", sep=\",\", index_col=False, parse_dates=[\"from\"])\n",
- "# Set 'TimeStamp' field as index of dataframe # test another indexs\n",
- "df.set_index(\"from\", inplace=True)"
+ "# write from db and benchmark time\n",
+ "start = timeit.default_timer()\n",
+ "influxdbWriteCsv(df, InfluxDBBucket)\n",
+ "# dfIdw = cHouseQueryDf(dbname)\n",
+ "stop = timeit.default_timer()\n",
+ "influxdb_write_execution_time = stop - start"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "id": "c8515d8e-2f34-4a2f-b9be-3faefef2d0ca",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "89.17334011999992\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(influxdb_write_execution_time)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "id": "ecd217ab-0e16-40a6-9b92-9212b9bb20e9",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "query = \"\"\"\n",
+ "from(bucket: \"system\")\n",
+ "|> range(start:2023-03-03T18:14:30Z, stop: now())\n",
+ "|> filter(fn: (r) => r._measurement == \"id\")\n",
+ "|> filter(fn: (r) => r._field == \"volume\")\n",
+ "|> pivot(rowKey:[\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\"\"\"\n",
+ "# |> filter(fn: (r) => r._field == \"volume\")\n",
+ "# |> filter(fn: (r) => r.cpu == \"cpu-total\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "id": "09028588-e450-491c-8b84-246807dad9c3",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# 2021-01-01T00:00:00Z\n",
+ "# 2023-03-03T18:14:30Z\n",
+ "\n",
+ "\n",
+ "def influxdRead(org=InfluxDBUrl, query=query):\n",
+ " client = influxdbConnect()\n",
+ " InfluxDf = client.query_api().query_data_frame(org=org, query=query)\n",
+ " display(InfluxDf.head())\n",
+ " return InfluxDf"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "54342a28-ba2b-4ade-a692-00566b53a639",
+ "id": "8f28e32f-59d3-427b-8fe9-3d8bdf0ab675",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
- "df.head()"
+ "dfIdb = influxdRead()\n",
+ "dfIdb.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "f861fab2-f1b1-49dd-b758-12d10aef3462",
+ "id": "86a950d2-1c8e-46d5-aafe-444beb88bb88",
"metadata": {},
"outputs": [],
"source": [
- "%%time\n",
- "# gravando... demorou... mas deu certo\n",
- "with client.write_api() as writer:\n",
- " writer.write(\n",
- " bucket=InfluxDBBucket,\n",
- " record=df,\n",
- " data_frame_measurement_name=\"id\",\n",
- " data_frame_tag_columns=[\"volume\"],\n",
- " )"
+ "# read from db and benchmark time\n",
+ "start = timeit.default_timer()\n",
+ "dfIdr = cHouseQueryDf(dbname)\n",
+ "stop = timeit.default_timer()\n",
+ "influxdb_read_execution_time = stop - start"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "54342a28-ba2b-4ade-a692-00566b53a639",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " id | \n",
+ " at | \n",
+ " to | \n",
+ " open | \n",
+ " close | \n",
+ " min | \n",
+ " max | \n",
+ " volume | \n",
+ "
\n",
+ " \n",
+ " | from | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2023-03-03 18:13:30 | \n",
+ " 999995 | \n",
+ " 7984748 | \n",
+ " 1677867225000000000 | \n",
+ " 2023-03-03 18:13:45 | \n",
+ " 1.062695 | \n",
+ " 1.062635 | \n",
+ " 1.062630 | \n",
+ " 1.062700 | \n",
+ " 64 | \n",
+ "
\n",
+ " \n",
+ " | 2023-03-03 18:13:45 | \n",
+ " 999996 | \n",
+ " 7984749 | \n",
+ " 1677867240000000000 | \n",
+ " 2023-03-03 18:14:00 | \n",
+ " 1.062645 | \n",
+ " 1.062650 | \n",
+ " 1.062625 | \n",
+ " 1.062650 | \n",
+ " 43 | \n",
+ "
\n",
+ " \n",
+ " | 2023-03-03 18:14:00 | \n",
+ " 999997 | \n",
+ " 7984750 | \n",
+ " 1677867255000000000 | \n",
+ " 2023-03-03 18:14:15 | \n",
+ " 1.062640 | \n",
+ " 1.062625 | \n",
+ " 1.062620 | \n",
+ " 1.062665 | \n",
+ " 47 | \n",
+ "
\n",
+ " \n",
+ " | 2023-03-03 18:14:15 | \n",
+ " 999998 | \n",
+ " 7984751 | \n",
+ " 1677867270000000000 | \n",
+ " 2023-03-03 18:14:30 | \n",
+ " 1.062625 | \n",
+ " 1.062535 | \n",
+ " 1.062535 | \n",
+ " 1.062645 | \n",
+ " 43 | \n",
+ "
\n",
+ " \n",
+ " | 2023-03-03 18:14:30 | \n",
+ " 999999 | \n",
+ " 7984752 | \n",
+ " 1677867285000000000 | \n",
+ " 2023-03-03 18:14:45 | \n",
+ " 1.062535 | \n",
+ " 1.062520 | \n",
+ " 1.062520 | \n",
+ " 1.062580 | \n",
+ " 59 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 id at \\\n",
+ "from \n",
+ "2023-03-03 18:13:30 999995 7984748 1677867225000000000 \n",
+ "2023-03-03 18:13:45 999996 7984749 1677867240000000000 \n",
+ "2023-03-03 18:14:00 999997 7984750 1677867255000000000 \n",
+ "2023-03-03 18:14:15 999998 7984751 1677867270000000000 \n",
+ "2023-03-03 18:14:30 999999 7984752 1677867285000000000 \n",
+ "\n",
+ " to open close min \\\n",
+ "from \n",
+ "2023-03-03 18:13:30 2023-03-03 18:13:45 1.062695 1.062635 1.062630 \n",
+ "2023-03-03 18:13:45 2023-03-03 18:14:00 1.062645 1.062650 1.062625 \n",
+ "2023-03-03 18:14:00 2023-03-03 18:14:15 1.062640 1.062625 1.062620 \n",
+ "2023-03-03 18:14:15 2023-03-03 18:14:30 1.062625 1.062535 1.062535 \n",
+ "2023-03-03 18:14:30 2023-03-03 18:14:45 1.062535 1.062520 1.062520 \n",
+ "\n",
+ " max volume \n",
+ "from \n",
+ "2023-03-03 18:13:30 1.062700 64 \n",
+ "2023-03-03 18:13:45 1.062650 43 \n",
+ "2023-03-03 18:14:00 1.062665 47 \n",
+ "2023-03-03 18:14:15 1.062645 43 \n",
+ "2023-03-03 18:14:30 1.062580 59 "
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.tail()"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "f861fab2-f1b1-49dd-b758-12d10aef3462",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 25.8 s, sys: 408 ms, total: 26.2 s\n",
+ "Wall time: 1min 36s\n"
+ ]
+ }
+ ],
+ "source": []
+ },
{
"cell_type": "code",
"execution_count": null,