From 84a2bbd8c4aa4d1e63018194eff41f1f468d7173 Mon Sep 17 00:00:00 2001 From: flashlan Date: Fri, 16 Jun 2023 20:05:29 -0300 Subject: [PATCH] write influxdb read function --- compareDBs.ipynb | 473 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 369 insertions(+), 104 deletions(-) diff --git a/compareDBs.ipynb b/compareDBs.ipynb index ef54078..ecec002 100644 --- a/compareDBs.ipynb +++ b/compareDBs.ipynb @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 34, "id": "ab6c6c81-6ac1-4668-a79b-a9a0341fb35a", "metadata": { "tags": [] @@ -59,7 +59,7 @@ "False" ] }, - "execution_count": 1, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -71,12 +71,13 @@ "from datetime import datetime\n", "\n", "import duckdb\n", - "import influxdb_client\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "from clickhouse_driver import Client\n", "from dotenv import load_dotenv\n", + "from influxdb_client import InfluxDBClient\n", + "from influxdb_client.client.write_api import SYNCHRONOUS\n", "from minio import Minio\n", "from pymongo import MongoClient\n", "from pytz import timezone\n", @@ -100,7 +101,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 42, "id": "968403e3-2e5e-4834-b969-be4600e2963a", "metadata": { "tags": [] @@ -135,7 +136,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "3634a4ec-04c2-4f1e-8659-5d22eb17a254", "metadata": {}, "outputs": [ @@ -252,7 +253,7 @@ "999999 2023-03-03 18:14:45 1.062535 1.062520 1.062520 1.062580 59 " ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -291,7 +292,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "27de1ec8-4de1-440a-b555-b4a46c5ef7ce", "metadata": {}, "outputs": [], @@ -305,6 +306,7 @@ "cell_type": "markdown", "id": "4a8d5703-9bc9-4d38-83ff-457159304d58", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -313,7 +315,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 8, "id": "c3202bbb-2655-45b2-b166-9f45a3ef854c", "metadata": { "tags": [] @@ -325,7 +327,7 @@ "'Database created'" ] }, - "execution_count": 20, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -379,7 +381,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 9, "id": "cc4865b3-a1bc-4a35-9624-15334754b3a1", "metadata": {}, "outputs": [], @@ -393,7 +395,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 10, "id": "1fac82c1-2d04-44ef-893a-dc13b755e6d8", "metadata": {}, "outputs": [], @@ -407,7 +409,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 11, "id": "597ae7bd-2eea-44d7-b379-f0eb7e745c15", "metadata": { "tags": [] @@ -447,86 +449,86 @@ " \n", " \n", " \n", - " 2999995\n", - " 8230798\n", - " 2023-05-03 10:35:00\n", - " 1683110115000000000\n", - " 2023-05-03 10:35:15\n", - " 1.103340\n", - " 1.10330\n", - " 1.103275\n", - " 1.103340\n", - " 61\n", + " 999995\n", + " 7857774\n", + " 2023-02-01 17:06:00\n", + " 1675271175000000000\n", + " 2023-02-01 17:06:15\n", + " 1.091725\n", + " 1.091670\n", + " 1.09166\n", + " 1.09176\n", + " 84\n", " \n", " \n", - " 2999996\n", - " 8230799\n", - " 2023-05-03 10:35:15\n", - " 1683110130000000000\n", - " 2023-05-03 10:35:30\n", - " 1.103300\n", - " 1.10341\n", - " 1.103300\n", - " 1.103410\n", - " 44\n", + " 999996\n", + " 7857775\n", + " 2023-02-01 17:06:15\n", + " 1675271190000000000\n", + " 2023-02-01 17:06:30\n", + " 1.091680\n", + " 1.091660\n", + " 1.09165\n", + " 1.09168\n", + " 51\n", " \n", " \n", - " 2999997\n", - " 8230799\n", - " 2023-05-03 10:35:15\n", - " 1683110130000000000\n", - " 2023-05-03 10:35:30\n", - " 1.103300\n", - " 1.10341\n", - " 1.103300\n", - " 1.103410\n", - " 44\n", + " 999997\n", + " 7857775\n", + " 2023-02-01 17:06:15\n", + " 1675271190000000000\n", + " 2023-02-01 17:06:30\n", + " 1.091680\n", + " 1.091660\n", + " 1.09165\n", + " 1.09168\n", + " 51\n", " \n", " \n", - " 2999998\n", - " 8230800\n", - " 2023-05-03 10:35:30\n", - " 1683110145000000000\n", - " 2023-05-03 10:35:45\n", - " 1.103415\n", - " 1.10351\n", - " 1.103385\n", - " 1.103515\n", - " 51\n", + " 999998\n", + " 7857776\n", + " 2023-02-01 17:06:30\n", + " 1675271205000000000\n", + " 2023-02-01 17:06:45\n", + " 1.091660\n", + " 1.091655\n", + " 1.09164\n", + " 1.09168\n", + " 63\n", " \n", " \n", - " 2999999\n", - " 8230800\n", - " 2023-05-03 10:35:30\n", - " 1683110145000000000\n", - " 2023-05-03 10:35:45\n", - " 1.103415\n", - " 1.10351\n", - " 1.103385\n", - " 1.103515\n", - " 51\n", + " 999999\n", + " 7857776\n", + " 2023-02-01 17:06:30\n", + " 1675271205000000000\n", + " 2023-02-01 17:06:45\n", + " 1.091660\n", + " 1.091655\n", + " 1.09164\n", + " 1.09168\n", + " 63\n", " \n", " \n", "\n", "" ], "text/plain": [ - " id from at to \\\n", - "2999995 8230798 2023-05-03 10:35:00 1683110115000000000 2023-05-03 10:35:15 \n", - "2999996 8230799 2023-05-03 10:35:15 1683110130000000000 2023-05-03 10:35:30 \n", - "2999997 8230799 2023-05-03 10:35:15 1683110130000000000 2023-05-03 10:35:30 \n", - "2999998 8230800 2023-05-03 10:35:30 1683110145000000000 2023-05-03 10:35:45 \n", - "2999999 8230800 2023-05-03 10:35:30 1683110145000000000 2023-05-03 10:35:45 \n", + " id from at to \\\n", + "999995 7857774 2023-02-01 17:06:00 1675271175000000000 2023-02-01 17:06:15 \n", + "999996 7857775 2023-02-01 17:06:15 1675271190000000000 2023-02-01 17:06:30 \n", + "999997 7857775 2023-02-01 17:06:15 1675271190000000000 2023-02-01 17:06:30 \n", + "999998 7857776 2023-02-01 17:06:30 1675271205000000000 2023-02-01 17:06:45 \n", + "999999 7857776 2023-02-01 17:06:30 1675271205000000000 2023-02-01 17:06:45 \n", "\n", - " open close min max volume \n", - "2999995 1.103340 1.10330 1.103275 1.103340 61 \n", - "2999996 1.103300 1.10341 1.103300 1.103410 44 \n", - "2999997 1.103300 1.10341 1.103300 1.103410 44 \n", - "2999998 1.103415 1.10351 1.103385 1.103515 51 \n", - "2999999 1.103415 1.10351 1.103385 1.103515 51 " + " open close min max volume \n", + "999995 1.091725 1.091670 1.09166 1.09176 84 \n", + "999996 1.091680 1.091660 1.09165 1.09168 51 \n", + "999997 1.091680 1.091660 1.09165 1.09168 51 \n", + "999998 1.091660 1.091655 1.09164 1.09168 63 \n", + "999999 1.091660 1.091655 1.09164 1.09168 63 " ] }, - "execution_count": 23, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -537,7 +539,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 12, "id": "86794e47-611f-4ca8-a7e8-07e71afafe67", "metadata": { "tags": [] @@ -547,7 +549,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "10.64297915700081\n" + "5.7032926019999195\n" ] } ], @@ -557,7 +559,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 13, "id": "e7926062-8e84-4d3f-90a9-32807ce4f3d4", "metadata": { "tags": [] @@ -567,7 +569,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "6.190685558998666\n" + "6.242225094000105\n" ] } ], @@ -593,15 +595,14 @@ } ], "source": [ - "%%time\n", - "dfCh = cHouseQueryDf(dbname)" + "# %%time\n", + "# dfCh = cHouseQueryDf(dbname)" ] }, { "cell_type": "markdown", "id": "1d389546-911f-43f7-aad1-49f7bcc83503", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -610,61 +611,325 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 54, "id": "c3e7ebfd-76f1-4ac4-9833-312eb1a531af", "metadata": {}, "outputs": [], "source": [ - "client = influxdb_client.InfluxDBClient(\n", - " url=InfluxDBUrl, token=InfluxDBKey, org=InfluxDBUser\n", - ")" + "def influxdbConnect():\n", + " client = influxdb_client.InfluxDBClient(\n", + " url=InfluxDBUrl, token=InfluxDBKey, org=InfluxDBUser\n", + " )\n", + " return client\n", + "\n", + "\n", + "def influxdbLoadCsv(csv=\"out.csv\", dictDates=[\"from\", \"to\"], index=\"from\"):\n", + " # Read data from CSV without index and parse 'TimeStamp' as date.\n", + " df = pd.read_csv(csv, sep=\",\", index_col=False, parse_dates=dictDates)\n", + " # Set 'TimeStamp' field as index of dataframe # test another indexs\n", + " df.set_index(index, inplace=True)\n", + " return df\n", + "\n", + "\n", + "def influxdbWriteCsv(dataFrame, bucket, measurement=\"id\", tag=\"volume\"):\n", + " client = influxdbConnect()\n", + " # write_options=SYNCHRONOUS\n", + " with client.write_api() as writer:\n", + " writer.write(\n", + " bucket=bucket,\n", + " record=dataFrame,\n", + " data_frame_measurement_name=measurement,\n", + " data_frame_tag_columns=[tag],\n", + " )\n", + " writer.__del__()\n", + " client.__del__()\n", + " return 0" ] }, { "cell_type": "code", - "execution_count": null, - "id": "cbf61f12-830b-4c57-804a-2257d8b3599a", + "execution_count": 55, + "id": "e05266b8-ff32-462c-b059-325a40a53d25", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "client = influxdbLoadCsv()" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "71300e86-2e39-4c71-8b0c-6d6e21728e48", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# Read data from CSV without index and parse 'TimeStamp' as date.\n", - "df = pd.read_csv(\"out.csv\", sep=\",\", index_col=False, parse_dates=[\"from\"])\n", - "# Set 'TimeStamp' field as index of dataframe # test another indexs\n", - "df.set_index(\"from\", inplace=True)" + "# write from db and benchmark time\n", + "start = timeit.default_timer()\n", + "influxdbWriteCsv(df, InfluxDBBucket)\n", + "# dfIdw = cHouseQueryDf(dbname)\n", + "stop = timeit.default_timer()\n", + "influxdb_write_execution_time = stop - start" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "c8515d8e-2f34-4a2f-b9be-3faefef2d0ca", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "89.17334011999992\n" + ] + } + ], + "source": [ + "print(influxdb_write_execution_time)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "ecd217ab-0e16-40a6-9b92-9212b9bb20e9", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "query = \"\"\"\n", + "from(bucket: \"system\")\n", + "|> range(start:2023-03-03T18:14:30Z, stop: now())\n", + "|> filter(fn: (r) => r._measurement == \"id\")\n", + "|> filter(fn: (r) => r._field == \"volume\")\n", + "|> pivot(rowKey:[\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\"\"\"\n", + "# |> filter(fn: (r) => r._field == \"volume\")\n", + "# |> filter(fn: (r) => r.cpu == \"cpu-total\")" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "09028588-e450-491c-8b84-246807dad9c3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# 2021-01-01T00:00:00Z\n", + "# 2023-03-03T18:14:30Z\n", + "\n", + "\n", + "def influxdRead(org=InfluxDBUrl, query=query):\n", + " client = influxdbConnect()\n", + " InfluxDf = client.query_api().query_data_frame(org=org, query=query)\n", + " display(InfluxDf.head())\n", + " return InfluxDf" ] }, { "cell_type": "code", "execution_count": null, - "id": "54342a28-ba2b-4ade-a692-00566b53a639", + "id": "8f28e32f-59d3-427b-8fe9-3d8bdf0ab675", "metadata": { "tags": [] }, "outputs": [], "source": [ - "df.head()" + "dfIdb = influxdRead()\n", + "dfIdb.head()" ] }, { "cell_type": "code", "execution_count": null, - "id": "f861fab2-f1b1-49dd-b758-12d10aef3462", + "id": "86a950d2-1c8e-46d5-aafe-444beb88bb88", "metadata": {}, "outputs": [], "source": [ - "%%time\n", - "# gravando... demorou... mas deu certo\n", - "with client.write_api() as writer:\n", - " writer.write(\n", - " bucket=InfluxDBBucket,\n", - " record=df,\n", - " data_frame_measurement_name=\"id\",\n", - " data_frame_tag_columns=[\"volume\"],\n", - " )" + "# read from db and benchmark time\n", + "start = timeit.default_timer()\n", + "dfIdr = cHouseQueryDf(dbname)\n", + "stop = timeit.default_timer()\n", + "influxdb_read_execution_time = stop - start" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "54342a28-ba2b-4ade-a692-00566b53a639", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0idattoopencloseminmaxvolume
from
2023-03-03 18:13:30999995798474816778672250000000002023-03-03 18:13:451.0626951.0626351.0626301.06270064
2023-03-03 18:13:45999996798474916778672400000000002023-03-03 18:14:001.0626451.0626501.0626251.06265043
2023-03-03 18:14:00999997798475016778672550000000002023-03-03 18:14:151.0626401.0626251.0626201.06266547
2023-03-03 18:14:15999998798475116778672700000000002023-03-03 18:14:301.0626251.0625351.0625351.06264543
2023-03-03 18:14:30999999798475216778672850000000002023-03-03 18:14:451.0625351.0625201.0625201.06258059
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 id at \\\n", + "from \n", + "2023-03-03 18:13:30 999995 7984748 1677867225000000000 \n", + "2023-03-03 18:13:45 999996 7984749 1677867240000000000 \n", + "2023-03-03 18:14:00 999997 7984750 1677867255000000000 \n", + "2023-03-03 18:14:15 999998 7984751 1677867270000000000 \n", + "2023-03-03 18:14:30 999999 7984752 1677867285000000000 \n", + "\n", + " to open close min \\\n", + "from \n", + "2023-03-03 18:13:30 2023-03-03 18:13:45 1.062695 1.062635 1.062630 \n", + "2023-03-03 18:13:45 2023-03-03 18:14:00 1.062645 1.062650 1.062625 \n", + "2023-03-03 18:14:00 2023-03-03 18:14:15 1.062640 1.062625 1.062620 \n", + "2023-03-03 18:14:15 2023-03-03 18:14:30 1.062625 1.062535 1.062535 \n", + "2023-03-03 18:14:30 2023-03-03 18:14:45 1.062535 1.062520 1.062520 \n", + "\n", + " max volume \n", + "from \n", + "2023-03-03 18:13:30 1.062700 64 \n", + "2023-03-03 18:13:45 1.062650 43 \n", + "2023-03-03 18:14:00 1.062665 47 \n", + "2023-03-03 18:14:15 1.062645 43 \n", + "2023-03-03 18:14:30 1.062580 59 " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.tail()" ] }, + { + "cell_type": "code", + "execution_count": 20, + "id": "f861fab2-f1b1-49dd-b758-12d10aef3462", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 25.8 s, sys: 408 ms, total: 26.2 s\n", + "Wall time: 1min 36s\n" + ] + } + ], + "source": [] + }, { "cell_type": "code", "execution_count": null,