From 300c5acd0e4fd00771d48b440fe1e1fbd54d87a4 Mon Sep 17 00:00:00 2001 From: flashlan Date: Sat, 17 Jun 2023 00:25:04 -0300 Subject: [PATCH] mongodb and duckdb functions --- compareDBs.ipynb | 290 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 247 insertions(+), 43 deletions(-) diff --git a/compareDBs.ipynb b/compareDBs.ipynb index 8cb822a..fb3f8f3 100644 --- a/compareDBs.ipynb +++ b/compareDBs.ipynb @@ -602,6 +602,7 @@ "cell_type": "markdown", "id": "1d389546-911f-43f7-aad1-49f7bcc83503", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -610,7 +611,7 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 122, "id": "c3e7ebfd-76f1-4ac4-9833-312eb1a531af", "metadata": {}, "outputs": [], @@ -710,28 +711,195 @@ "from(bucket: \"EURUSDtest\")\n", "|> range(start:2023-03-03T18:14:30Z, stop: now())\n", "|> filter(fn: (r) => r._measurement == \"id\")\n", - "|> pivot(rowKey:[\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\"\"\"\n", - "# |> filter(fn: (r) => r._field == \"volume\")\n", - "# |> filter(fn: (r) => r.cpu == \"cpu-total\")" + "|> pivot(rowKey:[\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\"\"\"" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 120, "id": "850c6921-5e1c-417a-bea6-ea18be642008", - "metadata": {}, - "outputs": [], + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
resulttable_start_stop_time_measurementvolumeUnnamed: 0atcloseidmaxminopento
0_result02023-03-03 18:14:30+00:002023-06-17 02:47:50.721233+00:002023-03-05 22:01:00+00:00id011558916780536750000000001.06342579856541.0634251.0634251.0634252023-03-05 22:01:15
1_result02023-03-03 18:14:30+00:002023-06-17 02:47:50.721233+00:002023-03-05 23:58:30+00:00id011605916780607250000000001.06259579861241.0625951.0625951.0625952023-03-05 23:58:45
2_result02023-03-03 18:14:30+00:002023-06-17 02:47:50.721233+00:002023-03-06 23:58:30+00:00id012181916781471250000000001.06861579918841.0686151.0686151.0686152023-03-06 23:58:45
3_result02023-03-03 18:14:30+00:002023-06-17 02:47:50.721233+00:002023-03-06 23:59:30+00:00id012182316781471850000000001.06860579918881.0686051.0686051.0686052023-03-06 23:59:45
4_result02023-03-03 18:14:30+00:002023-06-17 02:47:50.721233+00:002023-03-08 23:59:00+00:00id018249316783199550000000001.05489580034061.0548951.0548951.0548952023-03-08 23:59:15
\n", + "
" + ], + "text/plain": [ + " result table _start _stop \\\n", + "0 _result 0 2023-03-03 18:14:30+00:00 2023-06-17 02:47:50.721233+00:00 \n", + "1 _result 0 2023-03-03 18:14:30+00:00 2023-06-17 02:47:50.721233+00:00 \n", + "2 _result 0 2023-03-03 18:14:30+00:00 2023-06-17 02:47:50.721233+00:00 \n", + "3 _result 0 2023-03-03 18:14:30+00:00 2023-06-17 02:47:50.721233+00:00 \n", + "4 _result 0 2023-03-03 18:14:30+00:00 2023-06-17 02:47:50.721233+00:00 \n", + "\n", + " _time _measurement volume Unnamed: 0 \\\n", + "0 2023-03-05 22:01:00+00:00 id 0 115589 \n", + "1 2023-03-05 23:58:30+00:00 id 0 116059 \n", + "2 2023-03-06 23:58:30+00:00 id 0 121819 \n", + "3 2023-03-06 23:59:30+00:00 id 0 121823 \n", + "4 2023-03-08 23:59:00+00:00 id 0 182493 \n", + "\n", + " at close id max min open \\\n", + "0 1678053675000000000 1.063425 7985654 1.063425 1.063425 1.063425 \n", + "1 1678060725000000000 1.062595 7986124 1.062595 1.062595 1.062595 \n", + "2 1678147125000000000 1.068615 7991884 1.068615 1.068615 1.068615 \n", + "3 1678147185000000000 1.068605 7991888 1.068605 1.068605 1.068605 \n", + "4 1678319955000000000 1.054895 8003406 1.054895 1.054895 1.054895 \n", + "\n", + " to \n", + "0 2023-03-05 22:01:15 \n", + "1 2023-03-05 23:58:45 \n", + "2 2023-03-06 23:58:45 \n", + "3 2023-03-06 23:59:45 \n", + "4 2023-03-08 23:59:15 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# read from db and benchmark time\n", "start = timeit.default_timer()\n", - "dfIdr = cHouseQueryDf(dbname)\n", + "influxdRead()\n", "stop = timeit.default_timer()\n", "influxdb_read_execution_time = stop - start" ] }, { "cell_type": "code", - "execution_count": 118, + "execution_count": 121, "id": "3ee3c0dd-cb70-4124-a0fb-db8dd2c134c0", "metadata": { "tags": [] @@ -741,7 +909,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "183.94615754100232\n" + "181.9998163249984\n" ] } ], @@ -905,6 +1073,7 @@ "cell_type": "markdown", "id": "b9ddfdc6-c899-4f6c-9b4e-8ec6ab6d7e05", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -1047,6 +1216,7 @@ "cell_type": "markdown", "id": "f9e0393d-7d1d-406a-a068-9dbf4968e977", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -1322,63 +1492,84 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 123, "id": "d104d9af-fa34-4261-8478-329a28ee4f2e", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# Load csv dataset\n", - "data = pd.read_csv(\"out.csv\")" + "def mongoLoadCsv(csvfile):\n", + " data = pd.read_csv(\"out.csv\")\n", + " return data\n", + "\n", + "\n", + "def mongoConnect():\n", + " client = MongoClient(\n", + " \"mongodb://{}:{}@{}/EURUSDtest?retryWrites=true&w=majority\".format(\n", + " MongoUser, MongoKey, MongoUrl\n", + " ),\n", + " authSource=\"admin\",\n", + " )\n", + " return client\n", + "\n", + "\n", + "def mongoWriteDict():\n", + " client = mongoConnect()\n", + " db = client[\"EUROUSDtest\"]\n", + " collection = db[\"finance\"]\n", + " # data.reset_index(inplace=True)\n", + " data_dict = data.to_dict(\"records\")\n", + " return data_dict\n", + " collection.insert_many(data_dict)\n", + " return 0" ] }, { "cell_type": "code", - "execution_count": null, - "id": "0af8f72c-5b58-4dfc-af36-c5b4bc79f127", + "execution_count": 127, + "id": "739de6aa-313f-4ccd-96c8-fa22d0cc687e", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# Connect to MongoDB\n", - "client = MongoClient(\n", - " # \"mongodb://192.168.1.133:27017\"\n", - " \"mongodb://{}:{}@{}/EURUSDtest?retryWrites=true&w=majority\".format(\n", - " MongoUser, MongoKey, MongoUrl\n", - " ),\n", - " authSource=\"admin\",\n", - ")" + "data = mongoLoadCsv(\"out.csv\")" ] }, { "cell_type": "code", - "execution_count": null, - "id": "f1b20d15-f5af-463c-813f-ffae61119de1", + "execution_count": 128, + "id": "0af8f72c-5b58-4dfc-af36-c5b4bc79f127", "metadata": { "tags": [] }, "outputs": [], "source": [ - "db = client[\"EUROUSDtest\"]\n", - "collection = db[\"finance\"]\n", - "# data.reset_index(inplace=True)\n", - "data_dict = data.to_dict(\"records\")" + "start = timeit.default_timer()\n", + "dfCh = mongoWriteDict()\n", + "stop = timeit.default_timer()\n", + "mongo_write_execution_time = stop - start" ] }, { "cell_type": "code", - "execution_count": null, - "id": "70674d23-f375-4659-87ec-c745dec96d54", + "execution_count": 129, + "id": "0757f14c-4677-41d3-90d8-63b884e24e7e", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6.021722518999013\n" + ] + } + ], "source": [ - "%%time\n", - "# Insert collection\n", - "collection.insert_many(data_dict)" + "print(mongo_write_execution_time)" ] }, { @@ -1397,7 +1588,6 @@ "cell_type": "markdown", "id": "97405e42-61dc-42c7-8220-237a312c0ec7", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -1406,28 +1596,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 130, "id": "bbcdb883-d6dc-46db-88db-4c90b84522ba", "metadata": {}, "outputs": [], "source": [ - "cursor = duckdb.connect()\n", - "print(cursor.execute(\"SELECT 42\").fetchall())" + "def duckdbConnect():\n", + " cursor = duckdb.connect()\n", + " print(cursor.execute(\"SELECT 42\").fetchall())\n", + " return cursor" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 131, "id": "35025a6e-9dc7-46cf-a792-76b3d84f1ac0", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 3.66 ms, sys: 4 ms, total: 7.66 ms\n", + "Wall time: 7.27 ms\n" + ] + } + ], "source": [ "%%time\n", "conn = duckdb.connect()\n", - "data = pd.read_csv(\"out.csv\")\n", - "conn.register(\"EURUSDtest\", data)" + "\n", + "# in memory???\n", + "def duckdbWrite():\n", + " data = pd.read_csv(\"out.csv\")\n", + " conn.register(\"EURUSDtest\", data)" ] }, {