Browse Source

end kdb functions, time speed tables init

master
flashlan 3 years ago
parent
commit
a2a3011f6b
  1. 452
      compareDBs.ipynb

452
compareDBs.ipynb

@ -47,7 +47,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 11,
"id": "ab6c6c81-6ac1-4668-a79b-a9a0341fb35a", "id": "ab6c6c81-6ac1-4668-a79b-a9a0341fb35a",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -59,7 +59,7 @@
"False" "False"
] ]
}, },
"execution_count": 1, "execution_count": 11,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -71,14 +71,17 @@
"from datetime import datetime\n", "from datetime import datetime\n",
"\n", "\n",
"import duckdb\n", "import duckdb\n",
"\n",
"# from influxdb_client import InfluxDBClient\n",
"import influxdb_client\n",
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
"import numpy as np\n", "import numpy as np\n",
"import pandas as pd\n", "import pandas as pd\n",
"import pdmongo as pdm\n", "import pdmongo as pdm\n",
"from clickhouse_driver import Client\n", "from clickhouse_driver import Client\n",
"from dotenv import load_dotenv\n", "from dotenv import load_dotenv\n",
"from influxdb_client import InfluxDBClient\n", "\n",
"from influxdb_client.client.write_api import SYNCHRONOUS\n", "# from influxdb_client.client.write_api import SYNCHRONOUS\n",
"from minio import Minio\n", "from minio import Minio\n",
"from pymongo import MongoClient\n", "from pymongo import MongoClient\n",
"from pytz import timezone\n", "from pytz import timezone\n",
@ -139,10 +142,128 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 4,
"id": "3634a4ec-04c2-4f1e-8659-5d22eb17a254", "id": "3634a4ec-04c2-4f1e-8659-5d22eb17a254",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>from</th>\n",
" <th>at</th>\n",
" <th>to</th>\n",
" <th>open</th>\n",
" <th>close</th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" <th>volume</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>999995</th>\n",
" <td>7984748</td>\n",
" <td>2023-03-03 18:13:30</td>\n",
" <td>1677867225000000000</td>\n",
" <td>2023-03-03 18:13:45</td>\n",
" <td>1.062695</td>\n",
" <td>1.062635</td>\n",
" <td>1.062630</td>\n",
" <td>1.062700</td>\n",
" <td>64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999996</th>\n",
" <td>7984749</td>\n",
" <td>2023-03-03 18:13:45</td>\n",
" <td>1677867240000000000</td>\n",
" <td>2023-03-03 18:14:00</td>\n",
" <td>1.062645</td>\n",
" <td>1.062650</td>\n",
" <td>1.062625</td>\n",
" <td>1.062650</td>\n",
" <td>43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999997</th>\n",
" <td>7984750</td>\n",
" <td>2023-03-03 18:14:00</td>\n",
" <td>1677867255000000000</td>\n",
" <td>2023-03-03 18:14:15</td>\n",
" <td>1.062640</td>\n",
" <td>1.062625</td>\n",
" <td>1.062620</td>\n",
" <td>1.062665</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999998</th>\n",
" <td>7984751</td>\n",
" <td>2023-03-03 18:14:15</td>\n",
" <td>1677867270000000000</td>\n",
" <td>2023-03-03 18:14:30</td>\n",
" <td>1.062625</td>\n",
" <td>1.062535</td>\n",
" <td>1.062535</td>\n",
" <td>1.062645</td>\n",
" <td>43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999999</th>\n",
" <td>7984752</td>\n",
" <td>2023-03-03 18:14:30</td>\n",
" <td>1677867285000000000</td>\n",
" <td>2023-03-03 18:14:45</td>\n",
" <td>1.062535</td>\n",
" <td>1.062520</td>\n",
" <td>1.062520</td>\n",
" <td>1.062580</td>\n",
" <td>59</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id from at \\\n",
"999995 7984748 2023-03-03 18:13:30 1677867225000000000 \n",
"999996 7984749 2023-03-03 18:13:45 1677867240000000000 \n",
"999997 7984750 2023-03-03 18:14:00 1677867255000000000 \n",
"999998 7984751 2023-03-03 18:14:15 1677867270000000000 \n",
"999999 7984752 2023-03-03 18:14:30 1677867285000000000 \n",
"\n",
" to open close min max volume \n",
"999995 2023-03-03 18:13:45 1.062695 1.062635 1.062630 1.062700 64 \n",
"999996 2023-03-03 18:14:00 1.062645 1.062650 1.062625 1.062650 43 \n",
"999997 2023-03-03 18:14:15 1.062640 1.062625 1.062620 1.062665 47 \n",
"999998 2023-03-03 18:14:30 1.062625 1.062535 1.062535 1.062645 43 \n",
"999999 2023-03-03 18:14:45 1.062535 1.062520 1.062520 1.062580 59 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# %%time\n", "# %%time\n",
"# Load Dataset\n", "# Load Dataset\n",
@ -486,7 +607,6 @@
"cell_type": "markdown", "cell_type": "markdown",
"id": "1d389546-911f-43f7-aad1-49f7bcc83503", "id": "1d389546-911f-43f7-aad1-49f7bcc83503",
"metadata": { "metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": [] "tags": []
}, },
"source": [ "source": [
@ -495,7 +615,23 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 122, "execution_count": 33,
"id": "ecd217ab-0e16-40a6-9b92-9212b9bb20e9",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"query = \"\"\"\n",
"from(bucket: \"EURUSDtest\")\n",
"|> range(start:2023-03-03T18:14:30Z, stop: now())\n",
"|> filter(fn: (r) => r._measurement == \"id\")\n",
"|> pivot(rowKey:[\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "c3e7ebfd-76f1-4ac4-9833-312eb1a531af", "id": "c3e7ebfd-76f1-4ac4-9833-312eb1a531af",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -582,22 +718,6 @@
"print(influxdb_write_execution_time)" "print(influxdb_write_execution_time)"
] ]
}, },
{
"cell_type": "code",
"execution_count": 113,
"id": "ecd217ab-0e16-40a6-9b92-9212b9bb20e9",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"query = \"\"\"\n",
"from(bucket: \"EURUSDtest\")\n",
"|> range(start:2023-03-03T18:14:30Z, stop: now())\n",
"|> filter(fn: (r) => r._measurement == \"id\")\n",
"|> pivot(rowKey:[\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\"\"\""
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 120, "execution_count": 120,
@ -1507,7 +1627,6 @@
"cell_type": "markdown", "cell_type": "markdown",
"id": "97405e42-61dc-42c7-8220-237a312c0ec7", "id": "97405e42-61dc-42c7-8220-237a312c0ec7",
"metadata": { "metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": [] "tags": []
}, },
"source": [ "source": [
@ -1521,6 +1640,9 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# https://duckdb.org/2022/07/27/art-storage.html\n",
"\n",
"\n",
"def duckdbConnect():\n", "def duckdbConnect():\n",
" cursor = duckdb.connect()\n", " cursor = duckdb.connect()\n",
" return cursor\n", " return cursor\n",
@ -1763,12 +1885,13 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 25,
"id": "bbd217e3-695f-4fa6-ae42-83db1dde8311", "id": "bbd217e3-695f-4fa6-ae42-83db1dde8311",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# functions\n", "# functions\n",
"# cd ~ && q/l64/q -p 5001\n",
"\n", "\n",
"\n", "\n",
"def kdbConnect():\n", "def kdbConnect():\n",
@ -1793,52 +1916,60 @@
"\n", "\n",
"def kdbRead():\n", "def kdbRead():\n",
" q = kdbConnect()\n", " q = kdbConnect()\n",
" df2 = q.sendSync(\"tab2: get `:/home/sandman/q/tab1\")\n", " # df2 = q.sendSync(\"tab2: get `:/home/sandman/q/tab1\")\n",
" df2 = q.sendSync(\"tab2\")\n", " # df2 = q.sendSync(\"tab2\")\n",
" df = pd.DataFrame(q(\"t\")) # , pandas=True))\n", " df = pd.DataFrame(q(\"t\")) # , pandas=True))\n",
" df3 = q.sendSync(\"select from t\")\n", " # df3 = q.sendSync(\"select from t\")\n",
" # ver todos esses loads\n", " # ver todos esses loads\n",
" q.close()" " q.close()\n",
] " return 0"
},
{
"cell_type": "code",
"execution_count": null,
"id": "dc239236-bb47-4bcb-8e50-ac900852cd47",
"metadata": {},
"outputs": [],
"source": [
"# load"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 28,
"id": "67f0c26e-44fb-40b0-a147-5d97bfbbded2", "id": "67f0c26e-44fb-40b0-a147-5d97bfbbded2",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# write" "# write\n",
"start = timeit.default_timer()\n",
"dfKdb = kdbWrite()\n",
"stop = timeit.default_timer()\n",
"kdb_write_execution_time = stop - start"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 29,
"id": "dcb200be-ffc9-4bcc-8554-5740fb420ab5", "id": "dcb200be-ffc9-4bcc-8554-5740fb420ab5",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2.8739770100000896\n"
]
}
],
"source": [ "source": [
"# print write time" "# print write time\n",
"print(kdb_write_execution_time)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 30,
"id": "d4ce0203-b0c7-440b-a3ca-d7b2a7682474", "id": "d4ce0203-b0c7-440b-a3ca-d7b2a7682474",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# read" "# read\n",
"start = timeit.default_timer()\n",
"dfKdb = kdbRead()\n",
"stop = timeit.default_timer()\n",
"kdb_read_execution_time = stop - start"
] ]
}, },
{ {
@ -1846,150 +1977,107 @@
"execution_count": null, "execution_count": null,
"id": "1a16fd76-2158-40fe-9285-c53791f8ed51", "id": "1a16fd76-2158-40fe-9285-c53791f8ed51",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
"source": [ {
"# print read time" "name": "stdout",
] "output_type": "stream",
}, "text": [
{ "4.153738381999574\n"
"cell_type": "code", ]
"execution_count": 32, }
"id": "8ff6c090-7e02-435a-a179-f2aab81da972", ],
"metadata": {},
"outputs": [],
"source": [
"# read csv\n",
"data = pd.read_csv(\"out.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b4eb8ab9-81e8-4732-8cf7-51f0981d3d57",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# open connection\n",
"q = qconnection.QConnection(host=\"localhost\", port=5001)\n",
"q.open()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "97cb6b5b-65a5-46a0-a4ee-e5c535a716ab",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"# send df to kbd+ in memory bank\n",
"q.sendSync(\"{t::x}\", data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c2ed2d51-bc8e-4207-892a-35fc55d43570",
"metadata": {},
"outputs": [],
"source": [
"# write to on disk table\n",
"q.sendSync(\"`:/home/sandman/q/tab1 set t\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9c055a95-f73f-43a3-8fbd-61e42235117e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%%time\n",
"# read from on disk table\n",
"df2 = q.sendSync(\"tab2: get `:/home/sandman/q/tab1\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9760de38-9f04-4322-bfff-c7ee12d5dee5",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# print(df2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c06c9222-c69d-4872-9d21-052281a013e2",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%%time\n",
"# load to variable df2\n",
"df2 = q.sendSync(\"tab2\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8815f01c-fd0a-4f94-ab7f-f8ede84ba4e7",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# df2(type)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e6ed3927-4395-45cd-9a28-88c5db01f2e5",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%%time\n",
"# converto to dataframe\n",
"df = pd.DataFrame(q(\"t\")) # , pandas=True))\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0fc7f16b-6c39-4ebe-88d2-ff857e30ab62",
"metadata": {
"tags": []
},
"outputs": [],
"source": [ "source": [
"%%time\n", "# print read time\n",
"# select\n", "print(kdb_read_execution_time)"
"df3 = q.sendSync(\"select from t\")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 46,
"id": "c88646ca-3d25-4a85-80b5-f9e559f568dd", "id": "3a09558c-73e6-4324-9fc5-782fcd0d12e5",
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [], "outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Write Time</th>\n",
" <th>Read Time</th>\n",
" <th>Total Time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Kdb+</th>\n",
" <td>2.87 sec</td>\n",
" <td>4.15 sec</td>\n",
" <td>7.03 sec</td>\n",
" </tr>\n",
" <tr>\n",
" <th>r2</th>\n",
" <td>fill</td>\n",
" <td>15</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>r3</th>\n",
" <td>fill</td>\n",
" <td>14</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Write Time Read Time Total Time\n",
"Kdb+ 2.87 sec 4.15 sec 7.03 sec\n",
"r2 fill 15 0\n",
"r3 fill 14 0"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"q.close()" "s = \" sec\"\n",
"data = [\n",
" [\n",
" \"{:.2f}\".format(kdb_write_execution_time) + s,\n",
" \"{:.2f}\".format(kdb_read_execution_time) + s,\n",
" \"{:.2f}\".format(kdb_write_execution_time + kdb_read_execution_time) + s,\n",
" ],\n",
" [\"fill\", 15, 0],\n",
" [\"fill\", 14, 0],\n",
"]\n",
"\n",
"index_labels=['Kdb+','r2','r3']\n",
"# Create the pandas DataFrame\n",
"df = pd.DataFrame(data, columns=[\"Write Time\", \"Read Time\", \"Total Time\"],index=index_labels)\n",
"\n",
"# print dataframe.\n",
"df"
] ]
}, },
{ {

Loading…
Cancel
Save