@ -90,7 +90,7 @@
},
{
"cell_type": "code",
"execution_count": 55 ,
"execution_count": 2 ,
"id": "55c3cd57-0996-4723-beb5-8f3196c96009",
"metadata": {
"tags": []
@ -104,7 +104,7 @@
},
{
"cell_type": "code",
"execution_count": 6 ,
"execution_count": 3 ,
"id": "968403e3-2e5e-4834-b969-be4600e2963a",
"metadata": {
"tags": []
@ -139,128 +139,10 @@
},
{
"cell_type": "code",
"execution_count": 2 ,
"execution_count": null ,
"id": "3634a4ec-04c2-4f1e-8659-5d22eb17a254",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>from</th>\n",
" <th>at</th>\n",
" <th>to</th>\n",
" <th>open</th>\n",
" <th>close</th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" <th>volume</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>999995</th>\n",
" <td>7984748</td>\n",
" <td>2023-03-03 18:13:30</td>\n",
" <td>1677867225000000000</td>\n",
" <td>2023-03-03 18:13:45</td>\n",
" <td>1.062695</td>\n",
" <td>1.062635</td>\n",
" <td>1.062630</td>\n",
" <td>1.062700</td>\n",
" <td>64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999996</th>\n",
" <td>7984749</td>\n",
" <td>2023-03-03 18:13:45</td>\n",
" <td>1677867240000000000</td>\n",
" <td>2023-03-03 18:14:00</td>\n",
" <td>1.062645</td>\n",
" <td>1.062650</td>\n",
" <td>1.062625</td>\n",
" <td>1.062650</td>\n",
" <td>43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999997</th>\n",
" <td>7984750</td>\n",
" <td>2023-03-03 18:14:00</td>\n",
" <td>1677867255000000000</td>\n",
" <td>2023-03-03 18:14:15</td>\n",
" <td>1.062640</td>\n",
" <td>1.062625</td>\n",
" <td>1.062620</td>\n",
" <td>1.062665</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999998</th>\n",
" <td>7984751</td>\n",
" <td>2023-03-03 18:14:15</td>\n",
" <td>1677867270000000000</td>\n",
" <td>2023-03-03 18:14:30</td>\n",
" <td>1.062625</td>\n",
" <td>1.062535</td>\n",
" <td>1.062535</td>\n",
" <td>1.062645</td>\n",
" <td>43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999999</th>\n",
" <td>7984752</td>\n",
" <td>2023-03-03 18:14:30</td>\n",
" <td>1677867285000000000</td>\n",
" <td>2023-03-03 18:14:45</td>\n",
" <td>1.062535</td>\n",
" <td>1.062520</td>\n",
" <td>1.062520</td>\n",
" <td>1.062580</td>\n",
" <td>59</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id from at \\\n",
"999995 7984748 2023-03-03 18:13:30 1677867225000000000 \n",
"999996 7984749 2023-03-03 18:13:45 1677867240000000000 \n",
"999997 7984750 2023-03-03 18:14:00 1677867255000000000 \n",
"999998 7984751 2023-03-03 18:14:15 1677867270000000000 \n",
"999999 7984752 2023-03-03 18:14:30 1677867285000000000 \n",
"\n",
" to open close min max volume \n",
"999995 2023-03-03 18:13:45 1.062695 1.062635 1.062630 1.062700 64 \n",
"999996 2023-03-03 18:14:00 1.062645 1.062650 1.062625 1.062650 43 \n",
"999997 2023-03-03 18:14:15 1.062640 1.062625 1.062620 1.062665 47 \n",
"999998 2023-03-03 18:14:30 1.062625 1.062535 1.062535 1.062645 43 \n",
"999999 2023-03-03 18:14:45 1.062535 1.062520 1.062520 1.062580 59 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# %%time\n",
"# Load Dataset\n",
@ -1486,6 +1368,7 @@
"cell_type": "markdown",
"id": "50d1fc58-89a7-4507-aff0-6e943656cfe0",
"metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
@ -1632,49 +1515,82 @@
},
{
"cell_type": "code",
"execution_count": 80 ,
"execution_count": 61 ,
"id": "bbcdb883-d6dc-46db-88db-4c90b84522ba",
"metadata": {},
"outputs": [],
"source": [
"def duckdbConnect():\n",
" cursor = duckdb.connect()\n",
" print(cursor.execute(\"SELECT 42\").fetchall())\n",
" return cursor"
" return cursor\n",
"\n",
"\n",
"def duckdbLoadCsv(csvFile=\"out.csv\"):\n",
" data = pd.read_csv(\"out.csv\")\n",
" return data\n",
"\n",
"\n",
"# write\n",
"def duckdbWrite(file, dataframe, tableName):\n",
" conn = duckdbConnect()\n",
" conn = duckdb.connect(file)\n",
" # conn.execute(\"DROP TABLE EURUSDtest\")\n",
" # conn.sql('CREATE TABLE EURUSDtest(i INTEGER)')\n",
" conn.register(\"tempTable\", dataframe)\n",
" conn.execute(\"CREATE TABLE {} AS SELECT * FROM tempTable\".format(tableName))\n",
" conn.close()\n",
" return 0\n",
"\n",
"\n",
"def duckdbRead():\n",
" conn = duckdbConnect()\n",
" conn = duckdb.connect(file)\n",
" conn.execute(\"SELECT * FROM EURUSD\").fetchdf()\n",
" conn.close()\n",
" return 0"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "35025a6e-9dc7-46cf-a792-76b3d84f1ac0",
"execution_count": 60,
"id": "1c787f48-5640-4eb5-9456-be8f0a8211eb",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"data = duckdbLoadCsv()"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "f07f03d0-021e-4dc3-bfa8-efc029a9797a",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 13.6 ms, sys: 151 µs, total: 13.7 ms\n",
"Wall time: 12.7 ms\n"
"ename": "CatalogException",
"evalue": "Catalog Error: Table with name EURUSDtest does not exist!\nDid you mean \"temp.information_schema.tables\"?",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mCatalogException\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[68], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m conn \u001b[38;5;241m=\u001b[39m duckdbConnect()\n\u001b[0;32m----> 2\u001b[0m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mDROP TABLE EURUSDtest\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
"\u001b[0;31mCatalogException\u001b[0m: Catalog Error: Table with name EURUSDtest does not exist!\nDid you mean \"temp.information_schema.tables\"?"
]
}
],
"source": [
"%%time\n",
"conn = duckdb.connect()\n",
"\n",
"\n",
"# in memory???\n",
"def duckdbWrite():\n",
" data = pd.read_csv(\"out.csv\")\n",
" conn.register(\"EURUSDtest\", data)"
"conn = duckdbConnect()\n",
"conn.execute(\"DROP TABLE EURUSDtest\")"
]
},
{
"cell_type": "code",
"execution_count": 82 ,
"id": "c6abdaaa-3ac2-425b-9208-d6cb79afe966 ",
"execution_count": 71,
"id": "9eb19431-fbad-43b4-84f7-0043e65de162",
"metadata": {
"tags": []
},
@ -1704,14 +1620,27 @@
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>EURUSD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>EURUSDtest</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>test</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [name]\n",
"Index: []"
" name\n",
"0 EURUSD\n",
"1 EURUSDtest\n",
"2 test"
]
},
"metadata": {},
@ -1719,13 +1648,15 @@
}
],
"source": [
"conn = duckdb.connect(\"file.db\")\n",
"# conn.sql(\"CREATE TABLE EURUSDtest(i INTEGER)\")\n",
"display(conn.execute(\"SHOW TABLES\").df())"
]
},
{
"cell_type": "code",
"execution_count": 7 6,
"id": "2acce0f3-f0b2-47d0-8e0d-f9e9687efc18 ",
"execution_count": 63 ,
"id": "32037939-def0-48e7-9a47-46bcf1a5883c ",
"metadata": {
"tags": []
},
@ -1737,17 +1668,70 @@
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mCatalogException\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m<timed exec>:1\u001b[0m \n",
"Cell \u001b[0;32mIn[63], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m conn \u001b[38;5;241m=\u001b[39m duckdb\u001b[38;5;241m.\u001b[39mconnect(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfile.db\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 3\u001b[0m conn \u001b[38;5;241m=\u001b[39m duckdbConnect()\n\u001b[0;32m----> 4\u001b[0m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSELECT * FROM EURUSDtest\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m conn\u001b[38;5;241m.\u001b[39mfetchall() \n",
"\u001b[0;31mCatalogException\u001b[0m: Catalog Error: Table with name EURUSDtest does not exist!\nDid you mean \"temp.information_schema.tables\"?\nLINE 1: SELECT * FROM EURUSDtest\n ^"
]
}
],
"source": [
"%%time \n",
"df = conn.execute(\"SELECT * FROM EURUSDtest\").df( )\n",
"df "
"# conn = duckdbConnect() \n",
"conn.execute(\"SELECT * FROM EURUSDtest\")\n",
"conn.fetchall() "
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "c6f53d67-684b-4b34-a573-472986ee3e47",
"metadata": {
"tags": []
},
"outputs": [
{
"ename": "CatalogException",
"evalue": "Catalog Error: Table with name \"EURUSDtest\" already exists!",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mCatalogException\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[62], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m start \u001b[38;5;241m=\u001b[39m timeit\u001b[38;5;241m.\u001b[39mdefault_timer()\n\u001b[0;32m----> 2\u001b[0m dfDuckdb \u001b[38;5;241m=\u001b[39m \u001b[43mduckdbWrite\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfile.db\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdbname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m stop \u001b[38;5;241m=\u001b[39m timeit\u001b[38;5;241m.\u001b[39mdefault_timer()\n\u001b[1;32m 4\u001b[0m duckdb_write_execution_time \u001b[38;5;241m=\u001b[39m stop \u001b[38;5;241m-\u001b[39m start\n",
"Cell \u001b[0;32mIn[61], line 18\u001b[0m, in \u001b[0;36mduckdbWrite\u001b[0;34m(file, dataframe, tableName)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# conn.execute(\"DROP TABLE EURUSDtest\")\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# conn.sql('CREATE TABLE EURUSDtest(i INTEGER)')\u001b[39;00m\n\u001b[1;32m 17\u001b[0m conn\u001b[38;5;241m.\u001b[39mregister(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtempTable\u001b[39m\u001b[38;5;124m\"\u001b[39m, dataframe)\n\u001b[0;32m---> 18\u001b[0m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mCREATE TABLE \u001b[39;49m\u001b[38;5;132;43;01m{}\u001b[39;49;00m\u001b[38;5;124;43m AS SELECT * FROM tempTable\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mformat\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtableName\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 19\u001b[0m conn\u001b[38;5;241m.\u001b[39mclose()\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n",
"\u001b[0;31mCatalogException\u001b[0m: Catalog Error: Table with name \"EURUSDtest\" already exists!"
]
}
],
"source": [
"start = timeit.default_timer()\n",
"dfDuckdb = duckdbWrite(\"file.db\", data, dbname)\n",
"stop = timeit.default_timer()\n",
"duckdb_write_execution_time = stop - start"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f630fc1a-0d52-4e3a-9dfe-1ec60d188033",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"start = timeit.default_timer()\n",
"dfDuckdb = duckdbRead()\n",
"stop = timeit.default_timer()\n",
"duckdb_read_execution_time = stop - start"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c6abdaaa-3ac2-425b-9208-d6cb79afe966",
"metadata": {
"tags": []
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "4409cc89-ed14-4313-ac89-65b826038533",