From 86404adc46ae967f7e65d0c4c8d69e730d32db36 Mon Sep 17 00:00:00 2001 From: flashlan Date: Wed, 21 Jun 2023 23:28:22 -0300 Subject: [PATCH] finish dusckdb Functions code --- compareDBs.ipynb | 132 +++++++++++++++++++++++++---------------------- 1 file changed, 69 insertions(+), 63 deletions(-) diff --git a/compareDBs.ipynb b/compareDBs.ipynb index 4af5b69..6a00a33 100644 --- a/compareDBs.ipynb +++ b/compareDBs.ipynb @@ -1368,7 +1368,6 @@ "cell_type": "markdown", "id": "50d1fc58-89a7-4507-aff0-6e943656cfe0", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -1515,7 +1514,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 85, "id": "bbcdb883-d6dc-46db-88db-4c90b84522ba", "metadata": {}, "outputs": [], @@ -1542,17 +1541,17 @@ " return 0\n", "\n", "\n", - "def duckdbRead():\n", + "def duckdbRead(dbfile):\n", " conn = duckdbConnect()\n", - " conn = duckdb.connect(file)\n", - " conn.execute(\"SELECT * FROM EURUSD\").fetchdf()\n", + " conn = duckdb.connect(dbfile)\n", + " conn.execute(\"SELECT * FROM {}\".format(dbname)).fetchdf()\n", " conn.close()\n", " return 0" ] }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 73, "id": "1c787f48-5640-4eb5-9456-be8f0a8211eb", "metadata": { "tags": [] @@ -1564,32 +1563,43 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": null, "id": "f07f03d0-021e-4dc3-bfa8-efc029a9797a", "metadata": { "tags": [] }, + "outputs": [], + "source": [ + "conn = duckdbConnect()\n", + "conn.execute(\"DROP TABLE EURUSD\")" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "45620be8-34c6-4a3a-89f7-09337f665c90", + "metadata": { + "tags": [] + }, "outputs": [ { - "ename": "CatalogException", - "evalue": "Catalog Error: Table with name EURUSDtest does not exist!\nDid you mean \"temp.information_schema.tables\"?", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mCatalogException\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[68], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m conn \u001b[38;5;241m=\u001b[39m duckdbConnect()\n\u001b[0;32m----> 2\u001b[0m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mDROP TABLE EURUSDtest\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", - "\u001b[0;31mCatalogException\u001b[0m: Catalog Error: Table with name EURUSDtest does not exist!\nDid you mean \"temp.information_schema.tables\"?" - ] + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "conn = duckdbConnect()\n", - "conn.execute(\"DROP TABLE EURUSDtest\")" + "conn.register(\"EURUSDtest\", data)" ] }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 84, "id": "9eb19431-fbad-43b4-84f7-0043e65de162", "metadata": { "tags": [] @@ -1622,25 +1632,15 @@ " \n", " \n", " 0\n", - " EURUSD\n", - " \n", - " \n", - " 1\n", " EURUSDtest\n", " \n", - " \n", - " 2\n", - " test\n", - " \n", " \n", "\n", "" ], "text/plain": [ " name\n", - "0 EURUSD\n", - "1 EURUSDtest\n", - "2 test" + "0 EURUSDtest" ] }, "metadata": {}, @@ -1649,30 +1649,19 @@ ], "source": [ "conn = duckdb.connect(\"file.db\")\n", + "# conn.execute(\"DROP TABLE EURUSDtest\")\n", "# conn.sql(\"CREATE TABLE EURUSDtest(i INTEGER)\")\n", "display(conn.execute(\"SHOW TABLES\").df())" ] }, { "cell_type": "code", - "execution_count": 63, + "execution_count": null, "id": "32037939-def0-48e7-9a47-46bcf1a5883c", "metadata": { "tags": [] }, - "outputs": [ - { - "ename": "CatalogException", - "evalue": "Catalog Error: Table with name EURUSDtest does not exist!\nDid you mean \"temp.information_schema.tables\"?\nLINE 1: SELECT * FROM EURUSDtest\n ^", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mCatalogException\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[63], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m conn \u001b[38;5;241m=\u001b[39m duckdb\u001b[38;5;241m.\u001b[39mconnect(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfile.db\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 3\u001b[0m conn \u001b[38;5;241m=\u001b[39m duckdbConnect()\n\u001b[0;32m----> 4\u001b[0m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSELECT * FROM EURUSDtest\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m conn\u001b[38;5;241m.\u001b[39mfetchall()\n", - "\u001b[0;31mCatalogException\u001b[0m: Catalog Error: Table with name EURUSDtest does not exist!\nDid you mean \"temp.information_schema.tables\"?\nLINE 1: SELECT * FROM EURUSDtest\n ^" - ] - } - ], + "outputs": [], "source": [ "# conn = duckdbConnect()\n", "conn.execute(\"SELECT * FROM EURUSDtest\")\n", @@ -1681,35 +1670,42 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 82, "id": "c6f53d67-684b-4b34-a573-472986ee3e47", "metadata": { "tags": [] }, + "outputs": [], + "source": [ + "start = timeit.default_timer()\n", + "dfDuckdb = duckdbWrite(\"file.db\", data, dbname)\n", + "stop = timeit.default_timer()\n", + "duckdb_write_execution_time = stop - start" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "id": "102f363a-b35d-433c-8752-7acc85c27bdc", + "metadata": { + "tags": [] + }, "outputs": [ { - "ename": "CatalogException", - "evalue": "Catalog Error: Table with name \"EURUSDtest\" already exists!", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mCatalogException\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[62], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m start \u001b[38;5;241m=\u001b[39m timeit\u001b[38;5;241m.\u001b[39mdefault_timer()\n\u001b[0;32m----> 2\u001b[0m dfDuckdb \u001b[38;5;241m=\u001b[39m \u001b[43mduckdbWrite\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfile.db\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdbname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m stop \u001b[38;5;241m=\u001b[39m timeit\u001b[38;5;241m.\u001b[39mdefault_timer()\n\u001b[1;32m 4\u001b[0m duckdb_write_execution_time \u001b[38;5;241m=\u001b[39m stop \u001b[38;5;241m-\u001b[39m start\n", - "Cell \u001b[0;32mIn[61], line 18\u001b[0m, in \u001b[0;36mduckdbWrite\u001b[0;34m(file, dataframe, tableName)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# conn.execute(\"DROP TABLE EURUSDtest\")\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# conn.sql('CREATE TABLE EURUSDtest(i INTEGER)')\u001b[39;00m\n\u001b[1;32m 17\u001b[0m conn\u001b[38;5;241m.\u001b[39mregister(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtempTable\u001b[39m\u001b[38;5;124m\"\u001b[39m, dataframe)\n\u001b[0;32m---> 18\u001b[0m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mCREATE TABLE \u001b[39;49m\u001b[38;5;132;43;01m{}\u001b[39;49;00m\u001b[38;5;124;43m AS SELECT * FROM tempTable\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mformat\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtableName\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 19\u001b[0m conn\u001b[38;5;241m.\u001b[39mclose()\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n", - "\u001b[0;31mCatalogException\u001b[0m: Catalog Error: Table with name \"EURUSDtest\" already exists!" + "name": "stdout", + "output_type": "stream", + "text": [ + "1.7639581979965442\n" ] } ], "source": [ - "start = timeit.default_timer()\n", - "dfDuckdb = duckdbWrite(\"file.db\", data, dbname)\n", - "stop = timeit.default_timer()\n", - "duckdb_write_execution_time = stop - start" + "print(duckdb_write_execution_time)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 86, "id": "f630fc1a-0d52-4e3a-9dfe-1ec60d188033", "metadata": { "tags": [] @@ -1717,20 +1713,30 @@ "outputs": [], "source": [ "start = timeit.default_timer()\n", - "dfDuckdb = duckdbRead()\n", + "dfDuckdb = duckdbRead(\"file.db\")\n", "stop = timeit.default_timer()\n", "duckdb_read_execution_time = stop - start" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 88, "id": "c6abdaaa-3ac2-425b-9208-d6cb79afe966", "metadata": { "tags": [] }, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.5125257010004134\n" + ] + } + ], + "source": [ + "print(duckdb_read_execution_time)" + ] }, { "cell_type": "markdown",