Browse Source

mongo read function

master
flashlan 3 years ago
parent
commit
b16963d18c
  1. 156
      compareDBs.ipynb

156
compareDBs.ipynb

@ -47,7 +47,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 34, "execution_count": 1,
"id": "ab6c6c81-6ac1-4668-a79b-a9a0341fb35a", "id": "ab6c6c81-6ac1-4668-a79b-a9a0341fb35a",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -59,7 +59,7 @@
"False" "False"
] ]
}, },
"execution_count": 34, "execution_count": 1,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -74,11 +74,13 @@
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
"import numpy as np\n", "import numpy as np\n",
"import pandas as pd\n", "import pandas as pd\n",
"import pdmongo as pdm\n",
"from clickhouse_driver import Client\n", "from clickhouse_driver import Client\n",
"from dotenv import load_dotenv\n", "from dotenv import load_dotenv\n",
"from influxdb_client import InfluxDBClient\n", "from influxdb_client import InfluxDBClient\n",
"from influxdb_client.client.write_api import SYNCHRONOUS\n", "from influxdb_client.client.write_api import SYNCHRONOUS\n",
"from minio import Minio\n", "from minio import Minio\n",
"from monary import Monary\n",
"from pymongo import MongoClient\n", "from pymongo import MongoClient\n",
"from pytz import timezone\n", "from pytz import timezone\n",
"from sqlalchemy import create_engine\n", "from sqlalchemy import create_engine\n",
@ -88,7 +90,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 55,
"id": "55c3cd57-0996-4723-beb5-8f3196c96009", "id": "55c3cd57-0996-4723-beb5-8f3196c96009",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -96,12 +98,13 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# Variables\n", "# Variables\n",
"dbname = \"EURUSDtest\"" "dbname = \"EURUSDtest\"\n",
"collection = \"finance\""
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 94, "execution_count": 6,
"id": "968403e3-2e5e-4834-b969-be4600e2963a", "id": "968403e3-2e5e-4834-b969-be4600e2963a",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -136,7 +139,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 2,
"id": "3634a4ec-04c2-4f1e-8659-5d22eb17a254", "id": "3634a4ec-04c2-4f1e-8659-5d22eb17a254",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -253,7 +256,7 @@
"999999 2023-03-03 18:14:45 1.062535 1.062520 1.062520 1.062580 59 " "999999 2023-03-03 18:14:45 1.062535 1.062520 1.062520 1.062580 59 "
] ]
}, },
"execution_count": 4, "execution_count": 2,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -306,6 +309,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"id": "4a8d5703-9bc9-4d38-83ff-457159304d58", "id": "4a8d5703-9bc9-4d38-83ff-457159304d58",
"metadata": { "metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": [] "tags": []
}, },
"source": [ "source": [
@ -368,9 +372,7 @@
"\n", "\n",
"def cHouseQueryDf(databaseName):\n", "def cHouseQueryDf(databaseName):\n",
" client = cHouseConnect()\n", " client = cHouseConnect()\n",
" dfQuery = client.query_dataframe(\n", " dfQuery = client.query_dataframe(\"SELECT * FROM default.{}\".format(databaseName))\n",
" \"SELECT * FROM default.{}\".format(databaseName)\n",
" ) # LIMIT 10000\n",
" client.disconnect()\n", " client.disconnect()\n",
" return dfQuery\n", " return dfQuery\n",
"\n", "\n",
@ -1492,13 +1494,16 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 123, "execution_count": 71,
"id": "d104d9af-fa34-4261-8478-329a28ee4f2e", "id": "d104d9af-fa34-4261-8478-329a28ee4f2e",
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"port = \"27017\"\n",
"\n",
"\n",
"def mongoLoadCsv(csvfile):\n", "def mongoLoadCsv(csvfile):\n",
" data = pd.read_csv(\"out.csv\")\n", " data = pd.read_csv(\"out.csv\")\n",
" return data\n", " return data\n",
@ -1514,32 +1519,40 @@
" return client\n", " return client\n",
"\n", "\n",
"\n", "\n",
"def mongoWriteDict():\n", "def mongoWriteDict(dados, dbs, collection):\n",
" client = mongoConnect()\n", " client = mongoConnect()\n",
" db = client[\"EUROUSDtest\"]\n", " db = client[dbs]\n",
" collection = db[\"finance\"]\n", " collection = db[collection]\n",
" # data.reset_index(inplace=True)\n", " # data.reset_index(inplace=True)\n",
" data_dict = data.to_dict(\"records\")\n", " data_dict = dados.to_dict(\"records\")\n",
" return data_dict\n", " # return data_dict\n",
" collection.insert_many(data_dict)\n", " collection.insert_many(data_dict)\n",
" return 0" " return 0\n",
"\n",
"\n",
"def mongoRead():\n",
" df = pdm.read_mongo(\n",
" collection, [], \"mongodb://192.168.1.162:27017/{}\".format(dbname)\n",
" )\n",
" return df"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 127, "execution_count": null,
"id": "739de6aa-313f-4ccd-96c8-fa22d0cc687e", "id": "739de6aa-313f-4ccd-96c8-fa22d0cc687e",
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"data = mongoLoadCsv(\"out.csv\")" "data = mongoLoadCsv(\"out.csv\")\n",
"data.head()"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 128, "execution_count": 51,
"id": "0af8f72c-5b58-4dfc-af36-c5b4bc79f127", "id": "0af8f72c-5b58-4dfc-af36-c5b4bc79f127",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -1547,14 +1560,14 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"start = timeit.default_timer()\n", "start = timeit.default_timer()\n",
"dfCh = mongoWriteDict()\n", "dfCh = mongoWriteDict(data, dbname, \"finance\")\n",
"stop = timeit.default_timer()\n", "stop = timeit.default_timer()\n",
"mongo_write_execution_time = stop - start" "mongo_write_execution_time = stop - start"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 129, "execution_count": 52,
"id": "0757f14c-4677-41d3-90d8-63b884e24e7e", "id": "0757f14c-4677-41d3-90d8-63b884e24e7e",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -1564,7 +1577,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"6.021722518999013\n" "46.76343438199547\n"
] ]
} }
], ],
@ -1574,14 +1587,37 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 72,
"id": "81a4a33d-5914-45d8-af4e-2b0aabd2ac38", "id": "e7922312-74cb-4df3-8dea-e5ee0d99fab7",
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"# read" "start = timeit.default_timer()\n",
"dfMongo = mongoRead()\n",
"stop = timeit.default_timer()\n",
"mongo_read_execution_time = stop - start"
]
},
{
"cell_type": "code",
"execution_count": 73,
"id": "93fb22ea-b283-4447-b774-fe755a782223",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"56.66832709600567\n"
]
}
],
"source": [
"print(mongo_read_execution_time)"
] ]
}, },
{ {
@ -1596,7 +1632,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 130, "execution_count": 80,
"id": "bbcdb883-d6dc-46db-88db-4c90b84522ba", "id": "bbcdb883-d6dc-46db-88db-4c90b84522ba",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -1609,7 +1645,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 131, "execution_count": 81,
"id": "35025a6e-9dc7-46cf-a792-76b3d84f1ac0", "id": "35025a6e-9dc7-46cf-a792-76b3d84f1ac0",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -1619,8 +1655,8 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"CPU times: user 3.66 ms, sys: 4 ms, total: 7.66 ms\n", "CPU times: user 13.6 ms, sys: 151 µs, total: 13.7 ms\n",
"Wall time: 7.27 ms\n" "Wall time: 12.7 ms\n"
] ]
} }
], ],
@ -1628,6 +1664,7 @@
"%%time\n", "%%time\n",
"conn = duckdb.connect()\n", "conn = duckdb.connect()\n",
"\n", "\n",
"\n",
"# in memory???\n", "# in memory???\n",
"def duckdbWrite():\n", "def duckdbWrite():\n",
" data = pd.read_csv(\"out.csv\")\n", " data = pd.read_csv(\"out.csv\")\n",
@ -1636,24 +1673,75 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 82,
"id": "c6abdaaa-3ac2-425b-9208-d6cb79afe966", "id": "c6abdaaa-3ac2-425b-9208-d6cb79afe966",
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [], "outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [name]\n",
"Index: []"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [ "source": [
"display(conn.execute(\"SHOW TABLES\").df())" "display(conn.execute(\"SHOW TABLES\").df())"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 76,
"id": "2acce0f3-f0b2-47d0-8e0d-f9e9687efc18", "id": "2acce0f3-f0b2-47d0-8e0d-f9e9687efc18",
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [], "outputs": [
{
"ename": "CatalogException",
"evalue": "Catalog Error: Table with name EURUSDtest does not exist!\nDid you mean \"temp.information_schema.tables\"?\nLINE 1: SELECT * FROM EURUSDtest\n ^",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mCatalogException\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m<timed exec>:1\u001b[0m\n",
"\u001b[0;31mCatalogException\u001b[0m: Catalog Error: Table with name EURUSDtest does not exist!\nDid you mean \"temp.information_schema.tables\"?\nLINE 1: SELECT * FROM EURUSDtest\n ^"
]
}
],
"source": [ "source": [
"%%time\n", "%%time\n",
"df = conn.execute(\"SELECT * FROM EURUSDtest\").df()\n", "df = conn.execute(\"SELECT * FROM EURUSDtest\").df()\n",

Loading…
Cancel
Save