Browse Source

end kdb functions, time speed tables init

master
flashlan 3 years ago
parent
commit
a2a3011f6b
  1. 452
      compareDBs.ipynb

452
compareDBs.ipynb

@ -47,7 +47,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 11,
"id": "ab6c6c81-6ac1-4668-a79b-a9a0341fb35a",
"metadata": {
"tags": []
@ -59,7 +59,7 @@
"False"
]
},
"execution_count": 1,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@ -71,14 +71,17 @@
"from datetime import datetime\n",
"\n",
"import duckdb\n",
"\n",
"# from influxdb_client import InfluxDBClient\n",
"import influxdb_client\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import pdmongo as pdm\n",
"from clickhouse_driver import Client\n",
"from dotenv import load_dotenv\n",
"from influxdb_client import InfluxDBClient\n",
"from influxdb_client.client.write_api import SYNCHRONOUS\n",
"\n",
"# from influxdb_client.client.write_api import SYNCHRONOUS\n",
"from minio import Minio\n",
"from pymongo import MongoClient\n",
"from pytz import timezone\n",
@ -139,10 +142,128 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"id": "3634a4ec-04c2-4f1e-8659-5d22eb17a254",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>from</th>\n",
" <th>at</th>\n",
" <th>to</th>\n",
" <th>open</th>\n",
" <th>close</th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" <th>volume</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>999995</th>\n",
" <td>7984748</td>\n",
" <td>2023-03-03 18:13:30</td>\n",
" <td>1677867225000000000</td>\n",
" <td>2023-03-03 18:13:45</td>\n",
" <td>1.062695</td>\n",
" <td>1.062635</td>\n",
" <td>1.062630</td>\n",
" <td>1.062700</td>\n",
" <td>64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999996</th>\n",
" <td>7984749</td>\n",
" <td>2023-03-03 18:13:45</td>\n",
" <td>1677867240000000000</td>\n",
" <td>2023-03-03 18:14:00</td>\n",
" <td>1.062645</td>\n",
" <td>1.062650</td>\n",
" <td>1.062625</td>\n",
" <td>1.062650</td>\n",
" <td>43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999997</th>\n",
" <td>7984750</td>\n",
" <td>2023-03-03 18:14:00</td>\n",
" <td>1677867255000000000</td>\n",
" <td>2023-03-03 18:14:15</td>\n",
" <td>1.062640</td>\n",
" <td>1.062625</td>\n",
" <td>1.062620</td>\n",
" <td>1.062665</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999998</th>\n",
" <td>7984751</td>\n",
" <td>2023-03-03 18:14:15</td>\n",
" <td>1677867270000000000</td>\n",
" <td>2023-03-03 18:14:30</td>\n",
" <td>1.062625</td>\n",
" <td>1.062535</td>\n",
" <td>1.062535</td>\n",
" <td>1.062645</td>\n",
" <td>43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999999</th>\n",
" <td>7984752</td>\n",
" <td>2023-03-03 18:14:30</td>\n",
" <td>1677867285000000000</td>\n",
" <td>2023-03-03 18:14:45</td>\n",
" <td>1.062535</td>\n",
" <td>1.062520</td>\n",
" <td>1.062520</td>\n",
" <td>1.062580</td>\n",
" <td>59</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id from at \\\n",
"999995 7984748 2023-03-03 18:13:30 1677867225000000000 \n",
"999996 7984749 2023-03-03 18:13:45 1677867240000000000 \n",
"999997 7984750 2023-03-03 18:14:00 1677867255000000000 \n",
"999998 7984751 2023-03-03 18:14:15 1677867270000000000 \n",
"999999 7984752 2023-03-03 18:14:30 1677867285000000000 \n",
"\n",
" to open close min max volume \n",
"999995 2023-03-03 18:13:45 1.062695 1.062635 1.062630 1.062700 64 \n",
"999996 2023-03-03 18:14:00 1.062645 1.062650 1.062625 1.062650 43 \n",
"999997 2023-03-03 18:14:15 1.062640 1.062625 1.062620 1.062665 47 \n",
"999998 2023-03-03 18:14:30 1.062625 1.062535 1.062535 1.062645 43 \n",
"999999 2023-03-03 18:14:45 1.062535 1.062520 1.062520 1.062580 59 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# %%time\n",
"# Load Dataset\n",
@ -486,7 +607,6 @@
"cell_type": "markdown",
"id": "1d389546-911f-43f7-aad1-49f7bcc83503",
"metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
@ -495,7 +615,23 @@
},
{
"cell_type": "code",
"execution_count": 122,
"execution_count": 33,
"id": "ecd217ab-0e16-40a6-9b92-9212b9bb20e9",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"query = \"\"\"\n",
"from(bucket: \"EURUSDtest\")\n",
"|> range(start:2023-03-03T18:14:30Z, stop: now())\n",
"|> filter(fn: (r) => r._measurement == \"id\")\n",
"|> pivot(rowKey:[\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "c3e7ebfd-76f1-4ac4-9833-312eb1a531af",
"metadata": {},
"outputs": [],
@ -582,22 +718,6 @@
"print(influxdb_write_execution_time)"
]
},
{
"cell_type": "code",
"execution_count": 113,
"id": "ecd217ab-0e16-40a6-9b92-9212b9bb20e9",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"query = \"\"\"\n",
"from(bucket: \"EURUSDtest\")\n",
"|> range(start:2023-03-03T18:14:30Z, stop: now())\n",
"|> filter(fn: (r) => r._measurement == \"id\")\n",
"|> pivot(rowKey:[\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 120,
@ -1507,7 +1627,6 @@
"cell_type": "markdown",
"id": "97405e42-61dc-42c7-8220-237a312c0ec7",
"metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
@ -1521,6 +1640,9 @@
"metadata": {},
"outputs": [],
"source": [
"# https://duckdb.org/2022/07/27/art-storage.html\n",
"\n",
"\n",
"def duckdbConnect():\n",
" cursor = duckdb.connect()\n",
" return cursor\n",
@ -1763,12 +1885,13 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 25,
"id": "bbd217e3-695f-4fa6-ae42-83db1dde8311",
"metadata": {},
"outputs": [],
"source": [
"# functions\n",
"# cd ~ && q/l64/q -p 5001\n",
"\n",
"\n",
"def kdbConnect():\n",
@ -1793,52 +1916,60 @@
"\n",
"def kdbRead():\n",
" q = kdbConnect()\n",
" df2 = q.sendSync(\"tab2: get `:/home/sandman/q/tab1\")\n",
" df2 = q.sendSync(\"tab2\")\n",
" # df2 = q.sendSync(\"tab2: get `:/home/sandman/q/tab1\")\n",
" # df2 = q.sendSync(\"tab2\")\n",
" df = pd.DataFrame(q(\"t\")) # , pandas=True))\n",
" df3 = q.sendSync(\"select from t\")\n",
" # df3 = q.sendSync(\"select from t\")\n",
" # ver todos esses loads\n",
" q.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dc239236-bb47-4bcb-8e50-ac900852cd47",
"metadata": {},
"outputs": [],
"source": [
"# load"
" q.close()\n",
" return 0"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 28,
"id": "67f0c26e-44fb-40b0-a147-5d97bfbbded2",
"metadata": {},
"outputs": [],
"source": [
"# write"
"# write\n",
"start = timeit.default_timer()\n",
"dfKdb = kdbWrite()\n",
"stop = timeit.default_timer()\n",
"kdb_write_execution_time = stop - start"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 29,
"id": "dcb200be-ffc9-4bcc-8554-5740fb420ab5",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2.8739770100000896\n"
]
}
],
"source": [
"# print write time"
"# print write time\n",
"print(kdb_write_execution_time)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 30,
"id": "d4ce0203-b0c7-440b-a3ca-d7b2a7682474",
"metadata": {},
"outputs": [],
"source": [
"# read"
"# read\n",
"start = timeit.default_timer()\n",
"dfKdb = kdbRead()\n",
"stop = timeit.default_timer()\n",
"kdb_read_execution_time = stop - start"
]
},
{
@ -1846,150 +1977,107 @@
"execution_count": null,
"id": "1a16fd76-2158-40fe-9285-c53791f8ed51",
"metadata": {},
"outputs": [],
"source": [
"# print read time"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "8ff6c090-7e02-435a-a179-f2aab81da972",
"metadata": {},
"outputs": [],
"source": [
"# read csv\n",
"data = pd.read_csv(\"out.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b4eb8ab9-81e8-4732-8cf7-51f0981d3d57",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# open connection\n",
"q = qconnection.QConnection(host=\"localhost\", port=5001)\n",
"q.open()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "97cb6b5b-65a5-46a0-a4ee-e5c535a716ab",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"# send df to kbd+ in memory bank\n",
"q.sendSync(\"{t::x}\", data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c2ed2d51-bc8e-4207-892a-35fc55d43570",
"metadata": {},
"outputs": [],
"source": [
"# write to on disk table\n",
"q.sendSync(\"`:/home/sandman/q/tab1 set t\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9c055a95-f73f-43a3-8fbd-61e42235117e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%%time\n",
"# read from on disk table\n",
"df2 = q.sendSync(\"tab2: get `:/home/sandman/q/tab1\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9760de38-9f04-4322-bfff-c7ee12d5dee5",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# print(df2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c06c9222-c69d-4872-9d21-052281a013e2",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%%time\n",
"# load to variable df2\n",
"df2 = q.sendSync(\"tab2\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8815f01c-fd0a-4f94-ab7f-f8ede84ba4e7",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# df2(type)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e6ed3927-4395-45cd-9a28-88c5db01f2e5",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%%time\n",
"# converto to dataframe\n",
"df = pd.DataFrame(q(\"t\")) # , pandas=True))\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0fc7f16b-6c39-4ebe-88d2-ff857e30ab62",
"metadata": {
"tags": []
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"4.153738381999574\n"
]
}
],
"source": [
"%%time\n",
"# select\n",
"df3 = q.sendSync(\"select from t\")"
"# print read time\n",
"print(kdb_read_execution_time)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c88646ca-3d25-4a85-80b5-f9e559f568dd",
"execution_count": 46,
"id": "3a09558c-73e6-4324-9fc5-782fcd0d12e5",
"metadata": {
"tags": []
},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Write Time</th>\n",
" <th>Read Time</th>\n",
" <th>Total Time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Kdb+</th>\n",
" <td>2.87 sec</td>\n",
" <td>4.15 sec</td>\n",
" <td>7.03 sec</td>\n",
" </tr>\n",
" <tr>\n",
" <th>r2</th>\n",
" <td>fill</td>\n",
" <td>15</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>r3</th>\n",
" <td>fill</td>\n",
" <td>14</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Write Time Read Time Total Time\n",
"Kdb+ 2.87 sec 4.15 sec 7.03 sec\n",
"r2 fill 15 0\n",
"r3 fill 14 0"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q.close()"
"s = \" sec\"\n",
"data = [\n",
" [\n",
" \"{:.2f}\".format(kdb_write_execution_time) + s,\n",
" \"{:.2f}\".format(kdb_read_execution_time) + s,\n",
" \"{:.2f}\".format(kdb_write_execution_time + kdb_read_execution_time) + s,\n",
" ],\n",
" [\"fill\", 15, 0],\n",
" [\"fill\", 14, 0],\n",
"]\n",
"\n",
"index_labels=['Kdb+','r2','r3']\n",
"# Create the pandas DataFrame\n",
"df = pd.DataFrame(data, columns=[\"Write Time\", \"Read Time\", \"Total Time\"],index=index_labels)\n",
"\n",
"# print dataframe.\n",
"df"
]
},
{

Loading…
Cancel
Save