diff --git a/compareDBs.ipynb b/compareDBs.ipynb
index 7082ef4..64419cd 100644
--- a/compareDBs.ipynb
+++ b/compareDBs.ipynb
@@ -47,7 +47,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 1,
"id": "ab6c6c81-6ac1-4668-a79b-a9a0341fb35a",
"metadata": {
"tags": []
@@ -59,17 +59,14 @@
"False"
]
},
- "execution_count": 12,
+ "execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import configparser\n",
- "\n",
- "# import pymongo\n",
"import io\n",
- "import time\n",
"import timeit\n",
"from datetime import datetime\n",
"\n",
@@ -138,7 +135,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 5,
"id": "3634a4ec-04c2-4f1e-8659-5d22eb17a254",
"metadata": {},
"outputs": [
@@ -176,86 +173,86 @@
" \n",
"
\n",
" \n",
- " | 0 | \n",
- " 7730801 | \n",
- " 2023-01-02 15:58:45 | \n",
- " 1672675140000000000 | \n",
- " 2023-01-02 15:59:00 | \n",
- " 1.065995 | \n",
- " 1.066035 | \n",
- " 1.065930 | \n",
- " 1.066070 | \n",
- " 57 | \n",
+ " 999995 | \n",
+ " 7984748 | \n",
+ " 2023-03-03 18:13:30 | \n",
+ " 1677867225000000000 | \n",
+ " 2023-03-03 18:13:45 | \n",
+ " 1.062695 | \n",
+ " 1.062635 | \n",
+ " 1.062630 | \n",
+ " 1.062700 | \n",
+ " 64 | \n",
"
\n",
" \n",
- " | 1 | \n",
- " 7730802 | \n",
- " 2023-01-02 15:59:00 | \n",
- " 1672675155000000000 | \n",
- " 2023-01-02 15:59:15 | \n",
- " 1.066055 | \n",
- " 1.066085 | \n",
- " 1.066005 | \n",
- " 1.066115 | \n",
- " 52 | \n",
+ " 999996 | \n",
+ " 7984749 | \n",
+ " 2023-03-03 18:13:45 | \n",
+ " 1677867240000000000 | \n",
+ " 2023-03-03 18:14:00 | \n",
+ " 1.062645 | \n",
+ " 1.062650 | \n",
+ " 1.062625 | \n",
+ " 1.062650 | \n",
+ " 43 | \n",
"
\n",
" \n",
- " | 2 | \n",
- " 7730803 | \n",
- " 2023-01-02 15:59:15 | \n",
- " 1672675170000000000 | \n",
- " 2023-01-02 15:59:30 | \n",
- " 1.066080 | \n",
- " 1.066025 | \n",
- " 1.066025 | \n",
- " 1.066110 | \n",
- " 57 | \n",
+ " 999997 | \n",
+ " 7984750 | \n",
+ " 2023-03-03 18:14:00 | \n",
+ " 1677867255000000000 | \n",
+ " 2023-03-03 18:14:15 | \n",
+ " 1.062640 | \n",
+ " 1.062625 | \n",
+ " 1.062620 | \n",
+ " 1.062665 | \n",
+ " 47 | \n",
"
\n",
" \n",
- " | 3 | \n",
- " 7730804 | \n",
- " 2023-01-02 15:59:30 | \n",
- " 1672675185000000000 | \n",
- " 2023-01-02 15:59:45 | \n",
- " 1.065980 | \n",
- " 1.065985 | \n",
- " 1.065885 | \n",
- " 1.066045 | \n",
- " 64 | \n",
+ " 999998 | \n",
+ " 7984751 | \n",
+ " 2023-03-03 18:14:15 | \n",
+ " 1677867270000000000 | \n",
+ " 2023-03-03 18:14:30 | \n",
+ " 1.062625 | \n",
+ " 1.062535 | \n",
+ " 1.062535 | \n",
+ " 1.062645 | \n",
+ " 43 | \n",
"
\n",
" \n",
- " | 4 | \n",
- " 7730805 | \n",
- " 2023-01-02 15:59:45 | \n",
- " 1672675200000000000 | \n",
- " 2023-01-02 16:00:00 | \n",
- " 1.065975 | \n",
- " 1.066055 | \n",
- " 1.065830 | \n",
- " 1.066055 | \n",
- " 50 | \n",
+ " 999999 | \n",
+ " 7984752 | \n",
+ " 2023-03-03 18:14:30 | \n",
+ " 1677867285000000000 | \n",
+ " 2023-03-03 18:14:45 | \n",
+ " 1.062535 | \n",
+ " 1.062520 | \n",
+ " 1.062520 | \n",
+ " 1.062580 | \n",
+ " 59 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " id from at to \\\n",
- "0 7730801 2023-01-02 15:58:45 1672675140000000000 2023-01-02 15:59:00 \n",
- "1 7730802 2023-01-02 15:59:00 1672675155000000000 2023-01-02 15:59:15 \n",
- "2 7730803 2023-01-02 15:59:15 1672675170000000000 2023-01-02 15:59:30 \n",
- "3 7730804 2023-01-02 15:59:30 1672675185000000000 2023-01-02 15:59:45 \n",
- "4 7730805 2023-01-02 15:59:45 1672675200000000000 2023-01-02 16:00:00 \n",
+ " id from at \\\n",
+ "999995 7984748 2023-03-03 18:13:30 1677867225000000000 \n",
+ "999996 7984749 2023-03-03 18:13:45 1677867240000000000 \n",
+ "999997 7984750 2023-03-03 18:14:00 1677867255000000000 \n",
+ "999998 7984751 2023-03-03 18:14:15 1677867270000000000 \n",
+ "999999 7984752 2023-03-03 18:14:30 1677867285000000000 \n",
"\n",
- " open close min max volume \n",
- "0 1.065995 1.066035 1.065930 1.066070 57 \n",
- "1 1.066055 1.066085 1.066005 1.066115 52 \n",
- "2 1.066080 1.066025 1.066025 1.066110 57 \n",
- "3 1.065980 1.065985 1.065885 1.066045 64 \n",
- "4 1.065975 1.066055 1.065830 1.066055 50 "
+ " to open close min max volume \n",
+ "999995 2023-03-03 18:13:45 1.062695 1.062635 1.062630 1.062700 64 \n",
+ "999996 2023-03-03 18:14:00 1.062645 1.062650 1.062625 1.062650 43 \n",
+ "999997 2023-03-03 18:14:15 1.062640 1.062625 1.062620 1.062665 47 \n",
+ "999998 2023-03-03 18:14:30 1.062625 1.062535 1.062535 1.062645 43 \n",
+ "999999 2023-03-03 18:14:45 1.062535 1.062520 1.062520 1.062580 59 "
]
},
- "execution_count": 4,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -264,7 +261,7 @@
"# %%time\n",
"# Load Dataset\n",
"df = pd.read_csv(\"out.csv\", index_col=0)\n",
- "df.head()"
+ "df.tail()"
]
},
{
@@ -294,7 +291,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"id": "27de1ec8-4de1-440a-b555-b4a46c5ef7ce",
"metadata": {},
"outputs": [],
@@ -308,6 +305,7 @@
"cell_type": "markdown",
"id": "4a8d5703-9bc9-4d38-83ff-457159304d58",
"metadata": {
+ "jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
@@ -316,7 +314,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 9,
"id": "c3202bbb-2655-45b2-b166-9f45a3ef854c",
"metadata": {
"tags": []
@@ -328,7 +326,7 @@
"'Database created'"
]
},
- "execution_count": 22,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -697,6 +695,7 @@
"cell_type": "markdown",
"id": "b9ddfdc6-c899-4f6c-9b4e-8ec6ab6d7e05",
"metadata": {
+ "jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
@@ -735,14 +734,6 @@
"# testar função"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "e173a45b-60a1-4c33-946e-ccf98bf8e97f",
- "metadata": {},
- "outputs": [],
- "source": []
- },
{
"cell_type": "code",
"execution_count": 18,
@@ -811,65 +802,6 @@
"psql_read_execution_time = stop - start"
]
},
- {
- "cell_type": "code",
- "execution_count": 10,
- "id": "a7883c4d-4609-4380-8a45-246b7ca2f9c5",
- "metadata": {
- "tags": []
- },
- "outputs": [
- {
- "ename": "NameError",
- "evalue": "name 'engine' is not defined",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
- "File \u001b[0;32m:2\u001b[0m\n",
- "\u001b[0;31mNameError\u001b[0m: name 'engine' is not defined"
- ]
- }
- ],
- "source": [
- "# %%time\n",
- "# # Write\n",
- "# conn = engine.raw_connection()\n",
- "# cur = conn.cursor()\n",
- "# output = io.StringIO()\n",
- "# df.to_csv(output, sep=\"\\t\", header=False, index=False)\n",
- "# output.seek(0)\n",
- "# contents = output.getvalue()\n",
- "\n",
- "# cur.copy_from(output, \"comparedbs\") # , null=\"\") # null values become ''\n",
- "# conn.commit()\n",
- "# cur.close()\n",
- "# conn.close()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "73de4294-1284-49b0-b31e-45db6e835877",
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "e37a93e1-fc0e-4d27-9e16-dca6c8aea324",
- "metadata": {},
- "outputs": [],
- "source": [
- "start = time.time()\n",
- "# %%time\n",
- "# Read\n",
- "df = pd.read_sql_query('select * from \"comparedbs\"', con=engine)\n",
- "end = time.time()\n",
- "postgresql_read_time = exec_time(start, end)"
- ]
- },
{
"cell_type": "code",
"execution_count": null,
@@ -891,7 +823,7 @@
},
"outputs": [],
"source": [
- "df.head()"
+ "# df.head()"
]
},
{
@@ -906,8 +838,8 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "60a990e2-4607-4654-84ec-17d4985adae2",
+ "execution_count": 10,
+ "id": "7c7022bf-9c3b-400a-9045-b089483f05ad",
"metadata": {
"tags": []
},
@@ -915,32 +847,63 @@
"source": [
"# fazer sem funçao para ver se melhora\n",
"# verifique se esta no ssd os arquivos da pasta git\n",
- "def main():\n",
+ "def s3Connect():\n",
" client = Minio(\n",
" S3MinioUrl,\n",
" secure=False,\n",
" region=S3MinioRegion,\n",
- " access_key=\"MatMPA7NyHltz7DQ\",\n",
- " secret_key=\"SO1IG5iBPSjNPZanYUaHCLcoSbjphLCP\",\n",
+ " access_key=S3MinioUser,\n",
+ " secret_key=S3MinioKey,\n",
" )\n",
+ " return client\n",
+ "\n",
"\n",
- " # Make bucket if not exist.\n",
- " found = client.bucket_exists(\"data\")\n",
+ "def s3CreateBucket(bucketName=\"data\"):\n",
+ " client = s3Connect()\n",
+ " found = client.bucket_exists(bucketName)\n",
" if not found:\n",
- " client.make_bucket(\"data\")\n",
+ " return client.make_bucket(bucketName)\n",
" else:\n",
- " print(\"Bucket 'data' already exists\")\n",
+ " return \"Bucket '{}' already exists\".format(bucketName)\n",
+ "\n",
"\n",
- " # Upload\n",
+ "def s3uploadCsv():\n",
+ " client = s3Connect()\n",
" client.fput_object(\n",
" \"data\",\n",
" \"data.parquet\",\n",
" \"data/data.parquet\",\n",
" )\n",
- " # print(\n",
- " # \"'data/data.parquet' is successfully uploaded as \"\n",
- " # \"object 'data.parquet' to bucket 'data'.\"\n",
- " # )"
+ " return (\n",
+ " \"'data/data.parquet' is successfully uploaded as \"\n",
+ " \"object 'data.parquet' to bucket 'data'.\"\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "cd7fe012-9eee-4f91-8c07-8e0148633766",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "def main():\n",
+ " # Insert to db and benchmark time\n",
+ " df.to_parquet(\"data/data.parquet\")\n",
+ " s3CreateBucket()\n",
+ " start = timeit.default_timer()\n",
+ " s3uploadCsv()\n",
+ " stop = timeit.default_timer()\n",
+ " s3_write_execution_time = stop - start\n",
+ "\n",
+ "\n",
+ "if __name__ == \"__main__\":\n",
+ " try:\n",
+ " main()\n",
+ " except S3Error as exc:\n",
+ " print(\"error occurred.\", exc)"
]
},
{
@@ -1136,8 +1099,6 @@
"metadata": {},
"outputs": [],
"source": [
- "import numpy as np\n",
- "\n",
"np.bool = np.bool_\n",
"from qpython import qconnection"
]