@ -47,7 +47,7 @@
},
{
"cell_type": "code",
"execution_count": 34 ,
"execution_count": 1 ,
"id": "ab6c6c81-6ac1-4668-a79b-a9a0341fb35a",
"metadata": {
"tags": []
@ -59,7 +59,7 @@
"False"
]
},
"execution_count": 34 ,
"execution_count": 1 ,
"metadata": {},
"output_type": "execute_result"
}
@ -74,11 +74,13 @@
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import pdmongo as pdm\n",
"from clickhouse_driver import Client\n",
"from dotenv import load_dotenv\n",
"from influxdb_client import InfluxDBClient\n",
"from influxdb_client.client.write_api import SYNCHRONOUS\n",
"from minio import Minio\n",
"from monary import Monary\n",
"from pymongo import MongoClient\n",
"from pytz import timezone\n",
"from sqlalchemy import create_engine\n",
@ -88,7 +90,7 @@
},
{
"cell_type": "code",
"execution_count": 2 ,
"execution_count": 55 ,
"id": "55c3cd57-0996-4723-beb5-8f3196c96009",
"metadata": {
"tags": []
@ -96,12 +98,13 @@
"outputs": [],
"source": [
"# Variables\n",
"dbname = \"EURUSDtest\""
"dbname = \"EURUSDtest\"\n",
"collection = \"finance\""
]
},
{
"cell_type": "code",
"execution_count": 94 ,
"execution_count": 6 ,
"id": "968403e3-2e5e-4834-b969-be4600e2963a",
"metadata": {
"tags": []
@ -136,7 +139,7 @@
},
{
"cell_type": "code",
"execution_count": 4 ,
"execution_count": 2 ,
"id": "3634a4ec-04c2-4f1e-8659-5d22eb17a254",
"metadata": {},
"outputs": [
@ -253,7 +256,7 @@
"999999 2023-03-03 18:14:45 1.062535 1.062520 1.062520 1.062580 59 "
]
},
"execution_count": 4 ,
"execution_count": 2 ,
"metadata": {},
"output_type": "execute_result"
}
@ -306,6 +309,7 @@
"cell_type": "markdown",
"id": "4a8d5703-9bc9-4d38-83ff-457159304d58",
"metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
@ -368,9 +372,7 @@
"\n",
"def cHouseQueryDf(databaseName):\n",
" client = cHouseConnect()\n",
" dfQuery = client.query_dataframe(\n",
" \"SELECT * FROM default.{}\".format(databaseName)\n",
" ) # LIMIT 10000\n",
" dfQuery = client.query_dataframe(\"SELECT * FROM default.{}\".format(databaseName))\n",
" client.disconnect()\n",
" return dfQuery\n",
"\n",
@ -1492,13 +1494,16 @@
},
{
"cell_type": "code",
"execution_count": 123 ,
"execution_count": 7 1,
"id": "d104d9af-fa34-4261-8478-329a28ee4f2e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"port = \"27017\"\n",
"\n",
"\n",
"def mongoLoadCsv(csvfile):\n",
" data = pd.read_csv(\"out.csv\")\n",
" return data\n",
@ -1514,32 +1519,40 @@
" return client\n",
"\n",
"\n",
"def mongoWriteDict():\n",
"def mongoWriteDict(dados, dbs, collection ):\n",
" client = mongoConnect()\n",
" db = client[\"EUROUSDtest\" ]\n",
" collection = db[\"finance\" ]\n",
" db = client[dbs ]\n",
" collection = db[collection ]\n",
" # data.reset_index(inplace=True)\n",
" data_dict = data .to_dict(\"records\")\n",
" return data_dict\n",
" data_dict = dados .to_dict(\"records\")\n",
" # return data_dict\n",
" collection.insert_many(data_dict)\n",
" return 0"
" return 0\n",
"\n",
"\n",
"def mongoRead():\n",
" df = pdm.read_mongo(\n",
" collection, [], \"mongodb://192.168.1.162:27017/{}\".format(dbname)\n",
" )\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 127,
"execution_count": null ,
"id": "739de6aa-313f-4ccd-96c8-fa22d0cc687e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"data = mongoLoadCsv(\"out.csv\")"
"data = mongoLoadCsv(\"out.csv\")\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 128 ,
"execution_count": 5 1,
"id": "0af8f72c-5b58-4dfc-af36-c5b4bc79f127",
"metadata": {
"tags": []
@ -1547,14 +1560,14 @@
"outputs": [],
"source": [
"start = timeit.default_timer()\n",
"dfCh = mongoWriteDict()\n",
"dfCh = mongoWriteDict(data, dbname, \"finance\" )\n",
"stop = timeit.default_timer()\n",
"mongo_write_execution_time = stop - start"
]
},
{
"cell_type": "code",
"execution_count": 129 ,
"execution_count": 52 ,
"id": "0757f14c-4677-41d3-90d8-63b884e24e7e",
"metadata": {
"tags": []
@ -1564,7 +1577,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"6.021722518999013 \n"
"46.76343438199547 \n"
]
}
],
@ -1574,14 +1587,37 @@
},
{
"cell_type": "code",
"execution_count": null ,
"id": "81a4a33d-5914-45d8-af4e-2b0aabd2ac38 ",
"execution_count": 72 ,
"id": "e7922312-74cb-4df3-8dea-e5ee0d99fab7 ",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# read"
"start = timeit.default_timer()\n",
"dfMongo = mongoRead()\n",
"stop = timeit.default_timer()\n",
"mongo_read_execution_time = stop - start"
]
},
{
"cell_type": "code",
"execution_count": 73,
"id": "93fb22ea-b283-4447-b774-fe755a782223",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"56.66832709600567\n"
]
}
],
"source": [
"print(mongo_read_execution_time)"
]
},
{
@ -1596,7 +1632,7 @@
},
{
"cell_type": "code",
"execution_count": 13 0,
"execution_count": 8 0,
"id": "bbcdb883-d6dc-46db-88db-4c90b84522ba",
"metadata": {},
"outputs": [],
@ -1609,7 +1645,7 @@
},
{
"cell_type": "code",
"execution_count": 13 1,
"execution_count": 8 1,
"id": "35025a6e-9dc7-46cf-a792-76b3d84f1ac0",
"metadata": {
"tags": []
@ -1619,8 +1655,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 3.66 ms, sys: 4 ms, total: 7.66 ms\n",
"Wall time: 7.2 7 ms\n"
"CPU times: user 13.6 ms, sys: 151 µs, total: 13.7 ms\n",
"Wall time: 12. 7 ms\n"
]
}
],
@ -1628,6 +1664,7 @@
"%%time\n",
"conn = duckdb.connect()\n",
"\n",
"\n",
"# in memory???\n",
"def duckdbWrite():\n",
" data = pd.read_csv(\"out.csv\")\n",
@ -1636,24 +1673,75 @@
},
{
"cell_type": "code",
"execution_count": null ,
"execution_count": 82 ,
"id": "c6abdaaa-3ac2-425b-9208-d6cb79afe966",
"metadata": {
"tags": []
},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [name]\n",
"Index: []"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display(conn.execute(\"SHOW TABLES\").df())"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 76 ,
"id": "2acce0f3-f0b2-47d0-8e0d-f9e9687efc18",
"metadata": {
"tags": []
},
"outputs": [],
"outputs": [
{
"ename": "CatalogException",
"evalue": "Catalog Error: Table with name EURUSDtest does not exist!\nDid you mean \"temp.information_schema.tables\"?\nLINE 1: SELECT * FROM EURUSDtest\n ^",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mCatalogException\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m<timed exec>:1\u001b[0m\n",
"\u001b[0;31mCatalogException\u001b[0m: Catalog Error: Table with name EURUSDtest does not exist!\nDid you mean \"temp.information_schema.tables\"?\nLINE 1: SELECT * FROM EURUSDtest\n ^"
]
}
],
"source": [
"%%time\n",
"df = conn.execute(\"SELECT * FROM EURUSDtest\").df()\n",