diff --git a/compareDBs.ipynb b/compareDBs.ipynb index 6d2d5a0..e1c6322 100644 --- a/compareDBs.ipynb +++ b/compareDBs.ipynb @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 11, "id": "ab6c6c81-6ac1-4668-a79b-a9a0341fb35a", "metadata": { "tags": [] @@ -59,7 +59,7 @@ "False" ] }, - "execution_count": 1, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -71,14 +71,17 @@ "from datetime import datetime\n", "\n", "import duckdb\n", + "\n", + "# from influxdb_client import InfluxDBClient\n", + "import influxdb_client\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import pdmongo as pdm\n", "from clickhouse_driver import Client\n", "from dotenv import load_dotenv\n", - "from influxdb_client import InfluxDBClient\n", - "from influxdb_client.client.write_api import SYNCHRONOUS\n", + "\n", + "# from influxdb_client.client.write_api import SYNCHRONOUS\n", "from minio import Minio\n", "from pymongo import MongoClient\n", "from pytz import timezone\n", @@ -139,10 +142,128 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "3634a4ec-04c2-4f1e-8659-5d22eb17a254", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idfromattoopencloseminmaxvolume
99999579847482023-03-03 18:13:3016778672250000000002023-03-03 18:13:451.0626951.0626351.0626301.06270064
99999679847492023-03-03 18:13:4516778672400000000002023-03-03 18:14:001.0626451.0626501.0626251.06265043
99999779847502023-03-03 18:14:0016778672550000000002023-03-03 18:14:151.0626401.0626251.0626201.06266547
99999879847512023-03-03 18:14:1516778672700000000002023-03-03 18:14:301.0626251.0625351.0625351.06264543
99999979847522023-03-03 18:14:3016778672850000000002023-03-03 18:14:451.0625351.0625201.0625201.06258059
\n", + "
" + ], + "text/plain": [ + " id from at \\\n", + "999995 7984748 2023-03-03 18:13:30 1677867225000000000 \n", + "999996 7984749 2023-03-03 18:13:45 1677867240000000000 \n", + "999997 7984750 2023-03-03 18:14:00 1677867255000000000 \n", + "999998 7984751 2023-03-03 18:14:15 1677867270000000000 \n", + "999999 7984752 2023-03-03 18:14:30 1677867285000000000 \n", + "\n", + " to open close min max volume \n", + "999995 2023-03-03 18:13:45 1.062695 1.062635 1.062630 1.062700 64 \n", + "999996 2023-03-03 18:14:00 1.062645 1.062650 1.062625 1.062650 43 \n", + "999997 2023-03-03 18:14:15 1.062640 1.062625 1.062620 1.062665 47 \n", + "999998 2023-03-03 18:14:30 1.062625 1.062535 1.062535 1.062645 43 \n", + "999999 2023-03-03 18:14:45 1.062535 1.062520 1.062520 1.062580 59 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# %%time\n", "# Load Dataset\n", @@ -486,7 +607,6 @@ "cell_type": "markdown", "id": "1d389546-911f-43f7-aad1-49f7bcc83503", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -495,7 +615,23 @@ }, { "cell_type": "code", - "execution_count": 122, + "execution_count": 33, + "id": "ecd217ab-0e16-40a6-9b92-9212b9bb20e9", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "query = \"\"\"\n", + "from(bucket: \"EURUSDtest\")\n", + "|> range(start:2023-03-03T18:14:30Z, stop: now())\n", + "|> filter(fn: (r) => r._measurement == \"id\")\n", + "|> pivot(rowKey:[\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 34, "id": "c3e7ebfd-76f1-4ac4-9833-312eb1a531af", "metadata": {}, "outputs": [], @@ -582,22 +718,6 @@ "print(influxdb_write_execution_time)" ] }, - { - "cell_type": "code", - "execution_count": 113, - "id": "ecd217ab-0e16-40a6-9b92-9212b9bb20e9", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "query = \"\"\"\n", - "from(bucket: \"EURUSDtest\")\n", - "|> range(start:2023-03-03T18:14:30Z, stop: now())\n", - "|> filter(fn: (r) => r._measurement == \"id\")\n", - "|> pivot(rowKey:[\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\"\"\"" - ] - }, { "cell_type": "code", "execution_count": 120, @@ -1507,7 +1627,6 @@ "cell_type": "markdown", "id": "97405e42-61dc-42c7-8220-237a312c0ec7", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -1521,6 +1640,9 @@ "metadata": {}, "outputs": [], "source": [ + "# https://duckdb.org/2022/07/27/art-storage.html\n", + "\n", + "\n", "def duckdbConnect():\n", " cursor = duckdb.connect()\n", " return cursor\n", @@ -1763,12 +1885,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "bbd217e3-695f-4fa6-ae42-83db1dde8311", "metadata": {}, "outputs": [], "source": [ "# functions\n", + "# cd ~ && q/l64/q -p 5001\n", "\n", "\n", "def kdbConnect():\n", @@ -1793,52 +1916,60 @@ "\n", "def kdbRead():\n", " q = kdbConnect()\n", - " df2 = q.sendSync(\"tab2: get `:/home/sandman/q/tab1\")\n", - " df2 = q.sendSync(\"tab2\")\n", + " # df2 = q.sendSync(\"tab2: get `:/home/sandman/q/tab1\")\n", + " # df2 = q.sendSync(\"tab2\")\n", " df = pd.DataFrame(q(\"t\")) # , pandas=True))\n", - " df3 = q.sendSync(\"select from t\")\n", + " # df3 = q.sendSync(\"select from t\")\n", " # ver todos esses loads\n", - " q.close()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dc239236-bb47-4bcb-8e50-ac900852cd47", - "metadata": {}, - "outputs": [], - "source": [ - "# load" + " q.close()\n", + " return 0" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "67f0c26e-44fb-40b0-a147-5d97bfbbded2", "metadata": {}, "outputs": [], "source": [ - "# write" + "# write\n", + "start = timeit.default_timer()\n", + "dfKdb = kdbWrite()\n", + "stop = timeit.default_timer()\n", + "kdb_write_execution_time = stop - start" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "dcb200be-ffc9-4bcc-8554-5740fb420ab5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.8739770100000896\n" + ] + } + ], "source": [ - "# print write time" + "# print write time\n", + "print(kdb_write_execution_time)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "id": "d4ce0203-b0c7-440b-a3ca-d7b2a7682474", "metadata": {}, "outputs": [], "source": [ - "# read" + "# read\n", + "start = timeit.default_timer()\n", + "dfKdb = kdbRead()\n", + "stop = timeit.default_timer()\n", + "kdb_read_execution_time = stop - start" ] }, { @@ -1846,150 +1977,107 @@ "execution_count": null, "id": "1a16fd76-2158-40fe-9285-c53791f8ed51", "metadata": {}, - "outputs": [], - "source": [ - "# print read time" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "8ff6c090-7e02-435a-a179-f2aab81da972", - "metadata": {}, - "outputs": [], - "source": [ - "# read csv\n", - "data = pd.read_csv(\"out.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b4eb8ab9-81e8-4732-8cf7-51f0981d3d57", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# open connection\n", - "q = qconnection.QConnection(host=\"localhost\", port=5001)\n", - "q.open()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "97cb6b5b-65a5-46a0-a4ee-e5c535a716ab", - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "# send df to kbd+ in memory bank\n", - "q.sendSync(\"{t::x}\", data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c2ed2d51-bc8e-4207-892a-35fc55d43570", - "metadata": {}, - "outputs": [], - "source": [ - "# write to on disk table\n", - "q.sendSync(\"`:/home/sandman/q/tab1 set t\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9c055a95-f73f-43a3-8fbd-61e42235117e", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "%%time\n", - "# read from on disk table\n", - "df2 = q.sendSync(\"tab2: get `:/home/sandman/q/tab1\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9760de38-9f04-4322-bfff-c7ee12d5dee5", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# print(df2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c06c9222-c69d-4872-9d21-052281a013e2", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "%%time\n", - "# load to variable df2\n", - "df2 = q.sendSync(\"tab2\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8815f01c-fd0a-4f94-ab7f-f8ede84ba4e7", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# df2(type)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e6ed3927-4395-45cd-9a28-88c5db01f2e5", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "%%time\n", - "# converto to dataframe\n", - "df = pd.DataFrame(q(\"t\")) # , pandas=True))\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0fc7f16b-6c39-4ebe-88d2-ff857e30ab62", - "metadata": { - "tags": [] - }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4.153738381999574\n" + ] + } + ], "source": [ - "%%time\n", - "# select\n", - "df3 = q.sendSync(\"select from t\")" + "# print read time\n", + "print(kdb_read_execution_time)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "c88646ca-3d25-4a85-80b5-f9e559f568dd", + "execution_count": 46, + "id": "3a09558c-73e6-4324-9fc5-782fcd0d12e5", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Write TimeRead TimeTotal Time
Kdb+2.87 sec4.15 sec7.03 sec
r2fill150
r3fill140
\n", + "
" + ], + "text/plain": [ + " Write Time Read Time Total Time\n", + "Kdb+ 2.87 sec 4.15 sec 7.03 sec\n", + "r2 fill 15 0\n", + "r3 fill 14 0" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "q.close()" + "s = \" sec\"\n", + "data = [\n", + " [\n", + " \"{:.2f}\".format(kdb_write_execution_time) + s,\n", + " \"{:.2f}\".format(kdb_read_execution_time) + s,\n", + " \"{:.2f}\".format(kdb_write_execution_time + kdb_read_execution_time) + s,\n", + " ],\n", + " [\"fill\", 15, 0],\n", + " [\"fill\", 14, 0],\n", + "]\n", + "\n", + "index_labels=['Kdb+','r2','r3']\n", + "# Create the pandas DataFrame\n", + "df = pd.DataFrame(data, columns=[\"Write Time\", \"Read Time\", \"Total Time\"],index=index_labels)\n", + "\n", + "# print dataframe.\n", + "df" ] }, {