diff --git a/compareDBs.ipynb b/compareDBs.ipynb index fb3f8f3..28b8ab6 100644 --- a/compareDBs.ipynb +++ b/compareDBs.ipynb @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 1, "id": "ab6c6c81-6ac1-4668-a79b-a9a0341fb35a", "metadata": { "tags": [] @@ -59,7 +59,7 @@ "False" ] }, - "execution_count": 34, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -74,11 +74,13 @@ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", + "import pdmongo as pdm\n", "from clickhouse_driver import Client\n", "from dotenv import load_dotenv\n", "from influxdb_client import InfluxDBClient\n", "from influxdb_client.client.write_api import SYNCHRONOUS\n", "from minio import Minio\n", + "from monary import Monary\n", "from pymongo import MongoClient\n", "from pytz import timezone\n", "from sqlalchemy import create_engine\n", @@ -88,7 +90,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 55, "id": "55c3cd57-0996-4723-beb5-8f3196c96009", "metadata": { "tags": [] @@ -96,12 +98,13 @@ "outputs": [], "source": [ "# Variables\n", - "dbname = \"EURUSDtest\"" + "dbname = \"EURUSDtest\"\n", + "collection = \"finance\"" ] }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 6, "id": "968403e3-2e5e-4834-b969-be4600e2963a", "metadata": { "tags": [] @@ -136,7 +139,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "id": "3634a4ec-04c2-4f1e-8659-5d22eb17a254", "metadata": {}, "outputs": [ @@ -253,7 +256,7 @@ "999999 2023-03-03 18:14:45 1.062535 1.062520 1.062520 1.062580 59 " ] }, - "execution_count": 4, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -306,6 +309,7 @@ "cell_type": "markdown", "id": "4a8d5703-9bc9-4d38-83ff-457159304d58", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -368,9 +372,7 @@ "\n", "def cHouseQueryDf(databaseName):\n", " client = cHouseConnect()\n", - " dfQuery = client.query_dataframe(\n", - " \"SELECT * FROM default.{}\".format(databaseName)\n", - " ) # LIMIT 10000\n", + " dfQuery = client.query_dataframe(\"SELECT * FROM default.{}\".format(databaseName))\n", " client.disconnect()\n", " return dfQuery\n", "\n", @@ -1492,13 +1494,16 @@ }, { "cell_type": "code", - "execution_count": 123, + "execution_count": 71, "id": "d104d9af-fa34-4261-8478-329a28ee4f2e", "metadata": { "tags": [] }, "outputs": [], "source": [ + "port = \"27017\"\n", + "\n", + "\n", "def mongoLoadCsv(csvfile):\n", " data = pd.read_csv(\"out.csv\")\n", " return data\n", @@ -1514,32 +1519,40 @@ " return client\n", "\n", "\n", - "def mongoWriteDict():\n", + "def mongoWriteDict(dados, dbs, collection):\n", " client = mongoConnect()\n", - " db = client[\"EUROUSDtest\"]\n", - " collection = db[\"finance\"]\n", + " db = client[dbs]\n", + " collection = db[collection]\n", " # data.reset_index(inplace=True)\n", - " data_dict = data.to_dict(\"records\")\n", - " return data_dict\n", + " data_dict = dados.to_dict(\"records\")\n", + " # return data_dict\n", " collection.insert_many(data_dict)\n", - " return 0" + " return 0\n", + "\n", + "\n", + "def mongoRead():\n", + " df = pdm.read_mongo(\n", + " collection, [], \"mongodb://192.168.1.162:27017/{}\".format(dbname)\n", + " )\n", + " return df" ] }, { "cell_type": "code", - "execution_count": 127, + "execution_count": null, "id": "739de6aa-313f-4ccd-96c8-fa22d0cc687e", "metadata": { "tags": [] }, "outputs": [], "source": [ - "data = mongoLoadCsv(\"out.csv\")" + "data = mongoLoadCsv(\"out.csv\")\n", + "data.head()" ] }, { "cell_type": "code", - "execution_count": 128, + "execution_count": 51, "id": "0af8f72c-5b58-4dfc-af36-c5b4bc79f127", "metadata": { "tags": [] @@ -1547,14 +1560,14 @@ "outputs": [], "source": [ "start = timeit.default_timer()\n", - "dfCh = mongoWriteDict()\n", + "dfCh = mongoWriteDict(data, dbname, \"finance\")\n", "stop = timeit.default_timer()\n", "mongo_write_execution_time = stop - start" ] }, { "cell_type": "code", - "execution_count": 129, + "execution_count": 52, "id": "0757f14c-4677-41d3-90d8-63b884e24e7e", "metadata": { "tags": [] @@ -1564,7 +1577,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "6.021722518999013\n" + "46.76343438199547\n" ] } ], @@ -1574,14 +1587,37 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "81a4a33d-5914-45d8-af4e-2b0aabd2ac38", + "execution_count": 72, + "id": "e7922312-74cb-4df3-8dea-e5ee0d99fab7", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# read" + "start = timeit.default_timer()\n", + "dfMongo = mongoRead()\n", + "stop = timeit.default_timer()\n", + "mongo_read_execution_time = stop - start" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "93fb22ea-b283-4447-b774-fe755a782223", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "56.66832709600567\n" + ] + } + ], + "source": [ + "print(mongo_read_execution_time)" ] }, { @@ -1596,7 +1632,7 @@ }, { "cell_type": "code", - "execution_count": 130, + "execution_count": 80, "id": "bbcdb883-d6dc-46db-88db-4c90b84522ba", "metadata": {}, "outputs": [], @@ -1609,7 +1645,7 @@ }, { "cell_type": "code", - "execution_count": 131, + "execution_count": 81, "id": "35025a6e-9dc7-46cf-a792-76b3d84f1ac0", "metadata": { "tags": [] @@ -1619,8 +1655,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 3.66 ms, sys: 4 ms, total: 7.66 ms\n", - "Wall time: 7.27 ms\n" + "CPU times: user 13.6 ms, sys: 151 µs, total: 13.7 ms\n", + "Wall time: 12.7 ms\n" ] } ], @@ -1628,6 +1664,7 @@ "%%time\n", "conn = duckdb.connect()\n", "\n", + "\n", "# in memory???\n", "def duckdbWrite():\n", " data = pd.read_csv(\"out.csv\")\n", @@ -1636,24 +1673,75 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 82, "id": "c6abdaaa-3ac2-425b-9208-d6cb79afe966", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | name | \n", + "
|---|