From 0f0ee40e70431a95ef03829831f262c1daee3f27 Mon Sep 17 00:00:00 2001 From: flashlan Date: Wed, 21 Jun 2023 21:33:38 -0300 Subject: [PATCH] duckdb Functions --- compareDBs.ipynb | 286 ++++++++++++++++++++++------------------------- 1 file changed, 135 insertions(+), 151 deletions(-) diff --git a/compareDBs.ipynb b/compareDBs.ipynb index 28b8ab6..4af5b69 100644 --- a/compareDBs.ipynb +++ b/compareDBs.ipynb @@ -90,7 +90,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 2, "id": "55c3cd57-0996-4723-beb5-8f3196c96009", "metadata": { "tags": [] @@ -104,7 +104,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "id": "968403e3-2e5e-4834-b969-be4600e2963a", "metadata": { "tags": [] @@ -139,128 +139,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "3634a4ec-04c2-4f1e-8659-5d22eb17a254", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idfromattoopencloseminmaxvolume
99999579847482023-03-03 18:13:3016778672250000000002023-03-03 18:13:451.0626951.0626351.0626301.06270064
99999679847492023-03-03 18:13:4516778672400000000002023-03-03 18:14:001.0626451.0626501.0626251.06265043
99999779847502023-03-03 18:14:0016778672550000000002023-03-03 18:14:151.0626401.0626251.0626201.06266547
99999879847512023-03-03 18:14:1516778672700000000002023-03-03 18:14:301.0626251.0625351.0625351.06264543
99999979847522023-03-03 18:14:3016778672850000000002023-03-03 18:14:451.0625351.0625201.0625201.06258059
\n", - "
" - ], - "text/plain": [ - " id from at \\\n", - "999995 7984748 2023-03-03 18:13:30 1677867225000000000 \n", - "999996 7984749 2023-03-03 18:13:45 1677867240000000000 \n", - "999997 7984750 2023-03-03 18:14:00 1677867255000000000 \n", - "999998 7984751 2023-03-03 18:14:15 1677867270000000000 \n", - "999999 7984752 2023-03-03 18:14:30 1677867285000000000 \n", - "\n", - " to open close min max volume \n", - "999995 2023-03-03 18:13:45 1.062695 1.062635 1.062630 1.062700 64 \n", - "999996 2023-03-03 18:14:00 1.062645 1.062650 1.062625 1.062650 43 \n", - "999997 2023-03-03 18:14:15 1.062640 1.062625 1.062620 1.062665 47 \n", - "999998 2023-03-03 18:14:30 1.062625 1.062535 1.062535 1.062645 43 \n", - "999999 2023-03-03 18:14:45 1.062535 1.062520 1.062520 1.062580 59 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# %%time\n", "# Load Dataset\n", @@ -1486,6 +1368,7 @@ "cell_type": "markdown", "id": "50d1fc58-89a7-4507-aff0-6e943656cfe0", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -1632,49 +1515,82 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 61, "id": "bbcdb883-d6dc-46db-88db-4c90b84522ba", "metadata": {}, "outputs": [], "source": [ "def duckdbConnect():\n", " cursor = duckdb.connect()\n", - " print(cursor.execute(\"SELECT 42\").fetchall())\n", - " return cursor" + " return cursor\n", + "\n", + "\n", + "def duckdbLoadCsv(csvFile=\"out.csv\"):\n", + " data = pd.read_csv(\"out.csv\")\n", + " return data\n", + "\n", + "\n", + "# write\n", + "def duckdbWrite(file, dataframe, tableName):\n", + " conn = duckdbConnect()\n", + " conn = duckdb.connect(file)\n", + " # conn.execute(\"DROP TABLE EURUSDtest\")\n", + " # conn.sql('CREATE TABLE EURUSDtest(i INTEGER)')\n", + " conn.register(\"tempTable\", dataframe)\n", + " conn.execute(\"CREATE TABLE {} AS SELECT * FROM tempTable\".format(tableName))\n", + " conn.close()\n", + " return 0\n", + "\n", + "\n", + "def duckdbRead():\n", + " conn = duckdbConnect()\n", + " conn = duckdb.connect(file)\n", + " conn.execute(\"SELECT * FROM EURUSD\").fetchdf()\n", + " conn.close()\n", + " return 0" ] }, { "cell_type": "code", - "execution_count": 81, - "id": "35025a6e-9dc7-46cf-a792-76b3d84f1ac0", + "execution_count": 60, + "id": "1c787f48-5640-4eb5-9456-be8f0a8211eb", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "data = duckdbLoadCsv()" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "f07f03d0-021e-4dc3-bfa8-efc029a9797a", "metadata": { "tags": [] }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 13.6 ms, sys: 151 µs, total: 13.7 ms\n", - "Wall time: 12.7 ms\n" + "ename": "CatalogException", + "evalue": "Catalog Error: Table with name EURUSDtest does not exist!\nDid you mean \"temp.information_schema.tables\"?", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mCatalogException\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[68], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m conn \u001b[38;5;241m=\u001b[39m duckdbConnect()\n\u001b[0;32m----> 2\u001b[0m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mDROP TABLE EURUSDtest\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mCatalogException\u001b[0m: Catalog Error: Table with name EURUSDtest does not exist!\nDid you mean \"temp.information_schema.tables\"?" ] } ], "source": [ - "%%time\n", - "conn = duckdb.connect()\n", - "\n", - "\n", - "# in memory???\n", - "def duckdbWrite():\n", - " data = pd.read_csv(\"out.csv\")\n", - " conn.register(\"EURUSDtest\", data)" + "conn = duckdbConnect()\n", + "conn.execute(\"DROP TABLE EURUSDtest\")" ] }, { "cell_type": "code", - "execution_count": 82, - "id": "c6abdaaa-3ac2-425b-9208-d6cb79afe966", + "execution_count": 71, + "id": "9eb19431-fbad-43b4-84f7-0043e65de162", "metadata": { "tags": [] }, @@ -1704,14 +1620,27 @@ " \n", " \n", " \n", + " \n", + " 0\n", + " EURUSD\n", + " \n", + " \n", + " 1\n", + " EURUSDtest\n", + " \n", + " \n", + " 2\n", + " test\n", + " \n", " \n", "\n", "" ], "text/plain": [ - "Empty DataFrame\n", - "Columns: [name]\n", - "Index: []" + " name\n", + "0 EURUSD\n", + "1 EURUSDtest\n", + "2 test" ] }, "metadata": {}, @@ -1719,13 +1648,15 @@ } ], "source": [ + "conn = duckdb.connect(\"file.db\")\n", + "# conn.sql(\"CREATE TABLE EURUSDtest(i INTEGER)\")\n", "display(conn.execute(\"SHOW TABLES\").df())" ] }, { "cell_type": "code", - "execution_count": 76, - "id": "2acce0f3-f0b2-47d0-8e0d-f9e9687efc18", + "execution_count": 63, + "id": "32037939-def0-48e7-9a47-46bcf1a5883c", "metadata": { "tags": [] }, @@ -1737,17 +1668,70 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mCatalogException\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m:1\u001b[0m\n", + "Cell \u001b[0;32mIn[63], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m conn \u001b[38;5;241m=\u001b[39m duckdb\u001b[38;5;241m.\u001b[39mconnect(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfile.db\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 3\u001b[0m conn \u001b[38;5;241m=\u001b[39m duckdbConnect()\n\u001b[0;32m----> 4\u001b[0m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSELECT * FROM EURUSDtest\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m conn\u001b[38;5;241m.\u001b[39mfetchall()\n", "\u001b[0;31mCatalogException\u001b[0m: Catalog Error: Table with name EURUSDtest does not exist!\nDid you mean \"temp.information_schema.tables\"?\nLINE 1: SELECT * FROM EURUSDtest\n ^" ] } ], "source": [ - "%%time\n", - "df = conn.execute(\"SELECT * FROM EURUSDtest\").df()\n", - "df" + "# conn = duckdbConnect()\n", + "conn.execute(\"SELECT * FROM EURUSDtest\")\n", + "conn.fetchall()" ] }, + { + "cell_type": "code", + "execution_count": 62, + "id": "c6f53d67-684b-4b34-a573-472986ee3e47", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "ename": "CatalogException", + "evalue": "Catalog Error: Table with name \"EURUSDtest\" already exists!", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mCatalogException\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[62], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m start \u001b[38;5;241m=\u001b[39m timeit\u001b[38;5;241m.\u001b[39mdefault_timer()\n\u001b[0;32m----> 2\u001b[0m dfDuckdb \u001b[38;5;241m=\u001b[39m \u001b[43mduckdbWrite\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfile.db\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdbname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m stop \u001b[38;5;241m=\u001b[39m timeit\u001b[38;5;241m.\u001b[39mdefault_timer()\n\u001b[1;32m 4\u001b[0m duckdb_write_execution_time \u001b[38;5;241m=\u001b[39m stop \u001b[38;5;241m-\u001b[39m start\n", + "Cell \u001b[0;32mIn[61], line 18\u001b[0m, in \u001b[0;36mduckdbWrite\u001b[0;34m(file, dataframe, tableName)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# conn.execute(\"DROP TABLE EURUSDtest\")\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# conn.sql('CREATE TABLE EURUSDtest(i INTEGER)')\u001b[39;00m\n\u001b[1;32m 17\u001b[0m conn\u001b[38;5;241m.\u001b[39mregister(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtempTable\u001b[39m\u001b[38;5;124m\"\u001b[39m, dataframe)\n\u001b[0;32m---> 18\u001b[0m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mCREATE TABLE \u001b[39;49m\u001b[38;5;132;43;01m{}\u001b[39;49;00m\u001b[38;5;124;43m AS SELECT * FROM tempTable\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mformat\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtableName\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 19\u001b[0m conn\u001b[38;5;241m.\u001b[39mclose()\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n", + "\u001b[0;31mCatalogException\u001b[0m: Catalog Error: Table with name \"EURUSDtest\" already exists!" + ] + } + ], + "source": [ + "start = timeit.default_timer()\n", + "dfDuckdb = duckdbWrite(\"file.db\", data, dbname)\n", + "stop = timeit.default_timer()\n", + "duckdb_write_execution_time = stop - start" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f630fc1a-0d52-4e3a-9dfe-1ec60d188033", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "start = timeit.default_timer()\n", + "dfDuckdb = duckdbRead()\n", + "stop = timeit.default_timer()\n", + "duckdb_read_execution_time = stop - start" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6abdaaa-3ac2-425b-9208-d6cb79afe966", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "id": "4409cc89-ed14-4313-ac89-65b826038533",