Browse Source

mongodb and duckdb functions

master
flashlan 3 years ago
parent
commit
300c5acd0e
  1. 288
      compareDBs.ipynb

288
compareDBs.ipynb

@ -602,6 +602,7 @@
"cell_type": "markdown",
"id": "1d389546-911f-43f7-aad1-49f7bcc83503",
"metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
@ -610,7 +611,7 @@
},
{
"cell_type": "code",
"execution_count": 95,
"execution_count": 122,
"id": "c3e7ebfd-76f1-4ac4-9833-312eb1a531af",
"metadata": {},
"outputs": [],
@ -710,28 +711,195 @@
"from(bucket: \"EURUSDtest\")\n",
"|> range(start:2023-03-03T18:14:30Z, stop: now())\n",
"|> filter(fn: (r) => r._measurement == \"id\")\n",
"|> pivot(rowKey:[\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\"\"\"\n",
"# |> filter(fn: (r) => r._field == \"volume\")\n",
"# |> filter(fn: (r) => r.cpu == \"cpu-total\")"
"|> pivot(rowKey:[\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 120,
"id": "850c6921-5e1c-417a-bea6-ea18be642008",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>result</th>\n",
" <th>table</th>\n",
" <th>_start</th>\n",
" <th>_stop</th>\n",
" <th>_time</th>\n",
" <th>_measurement</th>\n",
" <th>volume</th>\n",
" <th>Unnamed: 0</th>\n",
" <th>at</th>\n",
" <th>close</th>\n",
" <th>id</th>\n",
" <th>max</th>\n",
" <th>min</th>\n",
" <th>open</th>\n",
" <th>to</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>_result</td>\n",
" <td>0</td>\n",
" <td>2023-03-03 18:14:30+00:00</td>\n",
" <td>2023-06-17 02:47:50.721233+00:00</td>\n",
" <td>2023-03-05 22:01:00+00:00</td>\n",
" <td>id</td>\n",
" <td>0</td>\n",
" <td>115589</td>\n",
" <td>1678053675000000000</td>\n",
" <td>1.063425</td>\n",
" <td>7985654</td>\n",
" <td>1.063425</td>\n",
" <td>1.063425</td>\n",
" <td>1.063425</td>\n",
" <td>2023-03-05 22:01:15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>_result</td>\n",
" <td>0</td>\n",
" <td>2023-03-03 18:14:30+00:00</td>\n",
" <td>2023-06-17 02:47:50.721233+00:00</td>\n",
" <td>2023-03-05 23:58:30+00:00</td>\n",
" <td>id</td>\n",
" <td>0</td>\n",
" <td>116059</td>\n",
" <td>1678060725000000000</td>\n",
" <td>1.062595</td>\n",
" <td>7986124</td>\n",
" <td>1.062595</td>\n",
" <td>1.062595</td>\n",
" <td>1.062595</td>\n",
" <td>2023-03-05 23:58:45</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>_result</td>\n",
" <td>0</td>\n",
" <td>2023-03-03 18:14:30+00:00</td>\n",
" <td>2023-06-17 02:47:50.721233+00:00</td>\n",
" <td>2023-03-06 23:58:30+00:00</td>\n",
" <td>id</td>\n",
" <td>0</td>\n",
" <td>121819</td>\n",
" <td>1678147125000000000</td>\n",
" <td>1.068615</td>\n",
" <td>7991884</td>\n",
" <td>1.068615</td>\n",
" <td>1.068615</td>\n",
" <td>1.068615</td>\n",
" <td>2023-03-06 23:58:45</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>_result</td>\n",
" <td>0</td>\n",
" <td>2023-03-03 18:14:30+00:00</td>\n",
" <td>2023-06-17 02:47:50.721233+00:00</td>\n",
" <td>2023-03-06 23:59:30+00:00</td>\n",
" <td>id</td>\n",
" <td>0</td>\n",
" <td>121823</td>\n",
" <td>1678147185000000000</td>\n",
" <td>1.068605</td>\n",
" <td>7991888</td>\n",
" <td>1.068605</td>\n",
" <td>1.068605</td>\n",
" <td>1.068605</td>\n",
" <td>2023-03-06 23:59:45</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>_result</td>\n",
" <td>0</td>\n",
" <td>2023-03-03 18:14:30+00:00</td>\n",
" <td>2023-06-17 02:47:50.721233+00:00</td>\n",
" <td>2023-03-08 23:59:00+00:00</td>\n",
" <td>id</td>\n",
" <td>0</td>\n",
" <td>182493</td>\n",
" <td>1678319955000000000</td>\n",
" <td>1.054895</td>\n",
" <td>8003406</td>\n",
" <td>1.054895</td>\n",
" <td>1.054895</td>\n",
" <td>1.054895</td>\n",
" <td>2023-03-08 23:59:15</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" result table _start _stop \\\n",
"0 _result 0 2023-03-03 18:14:30+00:00 2023-06-17 02:47:50.721233+00:00 \n",
"1 _result 0 2023-03-03 18:14:30+00:00 2023-06-17 02:47:50.721233+00:00 \n",
"2 _result 0 2023-03-03 18:14:30+00:00 2023-06-17 02:47:50.721233+00:00 \n",
"3 _result 0 2023-03-03 18:14:30+00:00 2023-06-17 02:47:50.721233+00:00 \n",
"4 _result 0 2023-03-03 18:14:30+00:00 2023-06-17 02:47:50.721233+00:00 \n",
"\n",
" _time _measurement volume Unnamed: 0 \\\n",
"0 2023-03-05 22:01:00+00:00 id 0 115589 \n",
"1 2023-03-05 23:58:30+00:00 id 0 116059 \n",
"2 2023-03-06 23:58:30+00:00 id 0 121819 \n",
"3 2023-03-06 23:59:30+00:00 id 0 121823 \n",
"4 2023-03-08 23:59:00+00:00 id 0 182493 \n",
"\n",
" at close id max min open \\\n",
"0 1678053675000000000 1.063425 7985654 1.063425 1.063425 1.063425 \n",
"1 1678060725000000000 1.062595 7986124 1.062595 1.062595 1.062595 \n",
"2 1678147125000000000 1.068615 7991884 1.068615 1.068615 1.068615 \n",
"3 1678147185000000000 1.068605 7991888 1.068605 1.068605 1.068605 \n",
"4 1678319955000000000 1.054895 8003406 1.054895 1.054895 1.054895 \n",
"\n",
" to \n",
"0 2023-03-05 22:01:15 \n",
"1 2023-03-05 23:58:45 \n",
"2 2023-03-06 23:58:45 \n",
"3 2023-03-06 23:59:45 \n",
"4 2023-03-08 23:59:15 "
]
},
"metadata": {},
"outputs": [],
"output_type": "display_data"
}
],
"source": [
"# read from db and benchmark time\n",
"start = timeit.default_timer()\n",
"dfIdr = cHouseQueryDf(dbname)\n",
"influxdRead()\n",
"stop = timeit.default_timer()\n",
"influxdb_read_execution_time = stop - start"
]
},
{
"cell_type": "code",
"execution_count": 118,
"execution_count": 121,
"id": "3ee3c0dd-cb70-4124-a0fb-db8dd2c134c0",
"metadata": {
"tags": []
@ -741,7 +909,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"183.94615754100232\n"
"181.9998163249984\n"
]
}
],
@ -905,6 +1073,7 @@
"cell_type": "markdown",
"id": "b9ddfdc6-c899-4f6c-9b4e-8ec6ab6d7e05",
"metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
@ -1047,6 +1216,7 @@
"cell_type": "markdown",
"id": "f9e0393d-7d1d-406a-a068-9dbf4968e977",
"metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
@ -1322,63 +1492,84 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 123,
"id": "d104d9af-fa34-4261-8478-329a28ee4f2e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Load csv dataset\n",
"data = pd.read_csv(\"out.csv\")"
"def mongoLoadCsv(csvfile):\n",
" data = pd.read_csv(\"out.csv\")\n",
" return data\n",
"\n",
"\n",
"def mongoConnect():\n",
" client = MongoClient(\n",
" \"mongodb://{}:{}@{}/EURUSDtest?retryWrites=true&w=majority\".format(\n",
" MongoUser, MongoKey, MongoUrl\n",
" ),\n",
" authSource=\"admin\",\n",
" )\n",
" return client\n",
"\n",
"\n",
"def mongoWriteDict():\n",
" client = mongoConnect()\n",
" db = client[\"EUROUSDtest\"]\n",
" collection = db[\"finance\"]\n",
" # data.reset_index(inplace=True)\n",
" data_dict = data.to_dict(\"records\")\n",
" return data_dict\n",
" collection.insert_many(data_dict)\n",
" return 0"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0af8f72c-5b58-4dfc-af36-c5b4bc79f127",
"execution_count": 127,
"id": "739de6aa-313f-4ccd-96c8-fa22d0cc687e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Connect to MongoDB\n",
"client = MongoClient(\n",
" # \"mongodb://192.168.1.133:27017\"\n",
" \"mongodb://{}:{}@{}/EURUSDtest?retryWrites=true&w=majority\".format(\n",
" MongoUser, MongoKey, MongoUrl\n",
" ),\n",
" authSource=\"admin\",\n",
")"
"data = mongoLoadCsv(\"out.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f1b20d15-f5af-463c-813f-ffae61119de1",
"execution_count": 128,
"id": "0af8f72c-5b58-4dfc-af36-c5b4bc79f127",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"db = client[\"EUROUSDtest\"]\n",
"collection = db[\"finance\"]\n",
"# data.reset_index(inplace=True)\n",
"data_dict = data.to_dict(\"records\")"
"start = timeit.default_timer()\n",
"dfCh = mongoWriteDict()\n",
"stop = timeit.default_timer()\n",
"mongo_write_execution_time = stop - start"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70674d23-f375-4659-87ec-c745dec96d54",
"execution_count": 129,
"id": "0757f14c-4677-41d3-90d8-63b884e24e7e",
"metadata": {
"tags": []
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"6.021722518999013\n"
]
}
],
"source": [
"%%time\n",
"# Insert collection\n",
"collection.insert_many(data_dict)"
"print(mongo_write_execution_time)"
]
},
{
@ -1397,7 +1588,6 @@
"cell_type": "markdown",
"id": "97405e42-61dc-42c7-8220-237a312c0ec7",
"metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
@ -1406,28 +1596,42 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 130,
"id": "bbcdb883-d6dc-46db-88db-4c90b84522ba",
"metadata": {},
"outputs": [],
"source": [
"cursor = duckdb.connect()\n",
"print(cursor.execute(\"SELECT 42\").fetchall())"
"def duckdbConnect():\n",
" cursor = duckdb.connect()\n",
" print(cursor.execute(\"SELECT 42\").fetchall())\n",
" return cursor"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 131,
"id": "35025a6e-9dc7-46cf-a792-76b3d84f1ac0",
"metadata": {
"tags": []
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 3.66 ms, sys: 4 ms, total: 7.66 ms\n",
"Wall time: 7.27 ms\n"
]
}
],
"source": [
"%%time\n",
"conn = duckdb.connect()\n",
"data = pd.read_csv(\"out.csv\")\n",
"conn.register(\"EURUSDtest\", data)"
"\n",
"# in memory???\n",
"def duckdbWrite():\n",
" data = pd.read_csv(\"out.csv\")\n",
" conn.register(\"EURUSDtest\", data)"
]
},
{

Loading…
Cancel
Save